blob: 03c21e5946301bedf94b6f79e5f61065c8d047fa [file] [log] [blame]
Philipp Reisnerb411b362009-09-25 16:07:19 -07001/*
2 drbd_receiver.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23 */
24
25
Philipp Reisnerb411b362009-09-25 16:07:19 -070026#include <linux/module.h>
27
28#include <asm/uaccess.h>
29#include <net/sock.h>
30
Philipp Reisnerb411b362009-09-25 16:07:19 -070031#include <linux/drbd.h>
32#include <linux/fs.h>
33#include <linux/file.h>
34#include <linux/in.h>
35#include <linux/mm.h>
36#include <linux/memcontrol.h>
37#include <linux/mm_inline.h>
38#include <linux/slab.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070039#include <linux/pkt_sched.h>
40#define __KERNEL_SYSCALLS__
41#include <linux/unistd.h>
42#include <linux/vmalloc.h>
43#include <linux/random.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070044#include <linux/string.h>
45#include <linux/scatterlist.h>
46#include "drbd_int.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070047#include "drbd_req.h"
48
49#include "drbd_vli.h"
50
Philipp Reisner77351055b2011-02-07 17:24:26 +010051struct packet_info {
52 enum drbd_packet cmd;
53 int size;
54 int vnr;
55};
56
Philipp Reisnerb411b362009-09-25 16:07:19 -070057enum finish_epoch {
58 FE_STILL_LIVE,
59 FE_DESTROYED,
60 FE_RECYCLED,
61};
62
Philipp Reisnera4fbda82011-03-16 11:13:17 +010063enum mdev_or_conn {
64 MDEV,
65 CONN,
66};
67
Philipp Reisner65d11ed2011-02-07 17:35:59 +010068static int drbd_do_handshake(struct drbd_tconn *tconn);
Philipp Reisner13e60372011-02-08 09:54:40 +010069static int drbd_do_auth(struct drbd_tconn *tconn);
Philipp Reisner360cc742011-02-08 14:29:53 +010070static int drbd_disconnected(int vnr, void *p, void *data);
Philipp Reisnerb411b362009-09-25 16:07:19 -070071
72static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *, struct drbd_epoch *, enum epoch_event);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +010073static int e_end_block(struct drbd_work *, int);
Philipp Reisnerb411b362009-09-25 16:07:19 -070074
Philipp Reisnerb411b362009-09-25 16:07:19 -070075
76#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
77
Lars Ellenberg45bb9122010-05-14 17:10:48 +020078/*
79 * some helper functions to deal with single linked page lists,
80 * page->private being our "next" pointer.
81 */
82
83/* If at least n pages are linked at head, get n pages off.
84 * Otherwise, don't modify head, and return NULL.
85 * Locking is the responsibility of the caller.
86 */
87static struct page *page_chain_del(struct page **head, int n)
88{
89 struct page *page;
90 struct page *tmp;
91
92 BUG_ON(!n);
93 BUG_ON(!head);
94
95 page = *head;
Philipp Reisner23ce4222010-05-20 13:35:31 +020096
97 if (!page)
98 return NULL;
99
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200100 while (page) {
101 tmp = page_chain_next(page);
102 if (--n == 0)
103 break; /* found sufficient pages */
104 if (tmp == NULL)
105 /* insufficient pages, don't use any of them. */
106 return NULL;
107 page = tmp;
108 }
109
110 /* add end of list marker for the returned list */
111 set_page_private(page, 0);
112 /* actual return value, and adjustment of head */
113 page = *head;
114 *head = tmp;
115 return page;
116}
117
118/* may be used outside of locks to find the tail of a (usually short)
119 * "private" page chain, before adding it back to a global chain head
120 * with page_chain_add() under a spinlock. */
121static struct page *page_chain_tail(struct page *page, int *len)
122{
123 struct page *tmp;
124 int i = 1;
125 while ((tmp = page_chain_next(page)))
126 ++i, page = tmp;
127 if (len)
128 *len = i;
129 return page;
130}
131
132static int page_chain_free(struct page *page)
133{
134 struct page *tmp;
135 int i = 0;
136 page_chain_for_each_safe(page, tmp) {
137 put_page(page);
138 ++i;
139 }
140 return i;
141}
142
143static void page_chain_add(struct page **head,
144 struct page *chain_first, struct page *chain_last)
145{
146#if 1
147 struct page *tmp;
148 tmp = page_chain_tail(chain_first, NULL);
149 BUG_ON(tmp != chain_last);
150#endif
151
152 /* add chain to head */
153 set_page_private(chain_last, (unsigned long)*head);
154 *head = chain_first;
155}
156
157static struct page *drbd_pp_first_pages_or_try_alloc(struct drbd_conf *mdev, int number)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700158{
159 struct page *page = NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200160 struct page *tmp = NULL;
161 int i = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700162
163 /* Yes, testing drbd_pp_vacant outside the lock is racy.
164 * So what. It saves a spin_lock. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200165 if (drbd_pp_vacant >= number) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700166 spin_lock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200167 page = page_chain_del(&drbd_pp_pool, number);
168 if (page)
169 drbd_pp_vacant -= number;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700170 spin_unlock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200171 if (page)
172 return page;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700173 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200174
Philipp Reisnerb411b362009-09-25 16:07:19 -0700175 /* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD
176 * "criss-cross" setup, that might cause write-out on some other DRBD,
177 * which in turn might block on the other node at this very place. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200178 for (i = 0; i < number; i++) {
179 tmp = alloc_page(GFP_TRY);
180 if (!tmp)
181 break;
182 set_page_private(tmp, (unsigned long)page);
183 page = tmp;
184 }
185
186 if (i == number)
187 return page;
188
189 /* Not enough pages immediately available this time.
190 * No need to jump around here, drbd_pp_alloc will retry this
191 * function "soon". */
192 if (page) {
193 tmp = page_chain_tail(page, NULL);
194 spin_lock(&drbd_pp_lock);
195 page_chain_add(&drbd_pp_pool, page, tmp);
196 drbd_pp_vacant += i;
197 spin_unlock(&drbd_pp_lock);
198 }
199 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700200}
201
Philipp Reisnerb411b362009-09-25 16:07:19 -0700202static void reclaim_net_ee(struct drbd_conf *mdev, struct list_head *to_be_freed)
203{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100204 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700205 struct list_head *le, *tle;
206
207 /* The EEs are always appended to the end of the list. Since
208 they are sent in order over the wire, they have to finish
209 in order. As soon as we see the first not finished we can
210 stop to examine the list... */
211
212 list_for_each_safe(le, tle, &mdev->net_ee) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100213 peer_req = list_entry(le, struct drbd_peer_request, w.list);
214 if (drbd_ee_has_active_page(peer_req))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700215 break;
216 list_move(le, to_be_freed);
217 }
218}
219
220static void drbd_kick_lo_and_reclaim_net(struct drbd_conf *mdev)
221{
222 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100223 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700224
Philipp Reisner87eeee42011-01-19 14:16:30 +0100225 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700226 reclaim_net_ee(mdev, &reclaimed);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100227 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700228
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100229 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
230 drbd_free_net_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700231}
232
233/**
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200234 * drbd_pp_alloc() - Returns @number pages, retries forever (or until signalled)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700235 * @mdev: DRBD device.
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200236 * @number: number of pages requested
237 * @retry: whether to retry, if not enough pages are available right now
Philipp Reisnerb411b362009-09-25 16:07:19 -0700238 *
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200239 * Tries to allocate number pages, first from our own page pool, then from
240 * the kernel, unless this allocation would exceed the max_buffers setting.
241 * Possibly retry until DRBD frees sufficient pages somewhere else.
242 *
243 * Returns a page chain linked via page->private.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700244 */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200245static struct page *drbd_pp_alloc(struct drbd_conf *mdev, unsigned number, bool retry)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700246{
247 struct page *page = NULL;
248 DEFINE_WAIT(wait);
249
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200250 /* Yes, we may run up to @number over max_buffers. If we
251 * follow it strictly, the admin will get it wrong anyways. */
Philipp Reisner89e58e72011-01-19 13:12:45 +0100252 if (atomic_read(&mdev->pp_in_use) < mdev->tconn->net_conf->max_buffers)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200253 page = drbd_pp_first_pages_or_try_alloc(mdev, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700254
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200255 while (page == NULL) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700256 prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
257
258 drbd_kick_lo_and_reclaim_net(mdev);
259
Philipp Reisner89e58e72011-01-19 13:12:45 +0100260 if (atomic_read(&mdev->pp_in_use) < mdev->tconn->net_conf->max_buffers) {
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200261 page = drbd_pp_first_pages_or_try_alloc(mdev, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700262 if (page)
263 break;
264 }
265
266 if (!retry)
267 break;
268
269 if (signal_pending(current)) {
270 dev_warn(DEV, "drbd_pp_alloc interrupted!\n");
271 break;
272 }
273
274 schedule();
275 }
276 finish_wait(&drbd_pp_wait, &wait);
277
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200278 if (page)
279 atomic_add(number, &mdev->pp_in_use);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700280 return page;
281}
282
283/* Must not be used from irq, as that may deadlock: see drbd_pp_alloc.
Philipp Reisner87eeee42011-01-19 14:16:30 +0100284 * Is also used from inside an other spin_lock_irq(&mdev->tconn->req_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200285 * Either links the page chain back to the global pool,
286 * or returns all pages to the system. */
Lars Ellenberg435f0742010-09-06 12:30:25 +0200287static void drbd_pp_free(struct drbd_conf *mdev, struct page *page, int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700288{
Lars Ellenberg435f0742010-09-06 12:30:25 +0200289 atomic_t *a = is_net ? &mdev->pp_in_use_by_net : &mdev->pp_in_use;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700290 int i;
Lars Ellenberg435f0742010-09-06 12:30:25 +0200291
Philipp Reisner81a5d602011-02-22 19:53:16 -0500292 if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200293 i = page_chain_free(page);
294 else {
295 struct page *tmp;
296 tmp = page_chain_tail(page, &i);
297 spin_lock(&drbd_pp_lock);
298 page_chain_add(&drbd_pp_pool, page, tmp);
299 drbd_pp_vacant += i;
300 spin_unlock(&drbd_pp_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700301 }
Lars Ellenberg435f0742010-09-06 12:30:25 +0200302 i = atomic_sub_return(i, a);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200303 if (i < 0)
Lars Ellenberg435f0742010-09-06 12:30:25 +0200304 dev_warn(DEV, "ASSERTION FAILED: %s: %d < 0\n",
305 is_net ? "pp_in_use_by_net" : "pp_in_use", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700306 wake_up(&drbd_pp_wait);
307}
308
309/*
310You need to hold the req_lock:
311 _drbd_wait_ee_list_empty()
312
313You must not have the req_lock:
314 drbd_free_ee()
315 drbd_alloc_ee()
316 drbd_init_ee()
317 drbd_release_ee()
318 drbd_ee_fix_bhs()
319 drbd_process_done_ee()
320 drbd_clear_done_ee()
321 drbd_wait_ee_list_empty()
322*/
323
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100324struct drbd_peer_request *
325drbd_alloc_ee(struct drbd_conf *mdev, u64 id, sector_t sector,
326 unsigned int data_size, gfp_t gfp_mask) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700327{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100328 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700329 struct page *page;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200330 unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700331
Andreas Gruenbacher0cf9d272010-12-07 10:43:29 +0100332 if (drbd_insert_fault(mdev, DRBD_FAULT_AL_EE))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700333 return NULL;
334
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100335 peer_req = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
336 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700337 if (!(gfp_mask & __GFP_NOWARN))
338 dev_err(DEV, "alloc_ee: Allocation of an EE failed\n");
339 return NULL;
340 }
341
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200342 page = drbd_pp_alloc(mdev, nr_pages, (gfp_mask & __GFP_WAIT));
343 if (!page)
344 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700345
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100346 drbd_clear_interval(&peer_req->i);
347 peer_req->i.size = data_size;
348 peer_req->i.sector = sector;
349 peer_req->i.local = false;
350 peer_req->i.waiting = false;
Andreas Gruenbacher53840642011-01-28 10:31:04 +0100351
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100352 peer_req->epoch = NULL;
Philipp Reisnera21e9292011-02-08 15:08:49 +0100353 peer_req->w.mdev = mdev;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100354 peer_req->pages = page;
355 atomic_set(&peer_req->pending_bios, 0);
356 peer_req->flags = 0;
Andreas Gruenbacher9a8e7752011-01-11 14:04:09 +0100357 /*
358 * The block_id is opaque to the receiver. It is not endianness
359 * converted, and sent back to the sender unchanged.
360 */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100361 peer_req->block_id = id;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700362
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100363 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700364
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200365 fail:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100366 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700367 return NULL;
368}
369
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100370void drbd_free_some_ee(struct drbd_conf *mdev, struct drbd_peer_request *peer_req,
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100371 int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700372{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100373 if (peer_req->flags & EE_HAS_DIGEST)
374 kfree(peer_req->digest);
375 drbd_pp_free(mdev, peer_req->pages, is_net);
376 D_ASSERT(atomic_read(&peer_req->pending_bios) == 0);
377 D_ASSERT(drbd_interval_empty(&peer_req->i));
378 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700379}
380
381int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list)
382{
383 LIST_HEAD(work_list);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100384 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700385 int count = 0;
Lars Ellenberg435f0742010-09-06 12:30:25 +0200386 int is_net = list == &mdev->net_ee;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700387
Philipp Reisner87eeee42011-01-19 14:16:30 +0100388 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700389 list_splice_init(list, &work_list);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100390 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700391
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100392 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
393 drbd_free_some_ee(mdev, peer_req, is_net);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700394 count++;
395 }
396 return count;
397}
398
399
Philipp Reisner32862ec2011-02-08 16:41:01 +0100400/* See also comments in _req_mod(,BARRIER_ACKED)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700401 * and receive_Barrier.
402 *
403 * Move entries from net_ee to done_ee, if ready.
404 * Grab done_ee, call all callbacks, free the entries.
405 * The callbacks typically send out ACKs.
406 */
407static int drbd_process_done_ee(struct drbd_conf *mdev)
408{
409 LIST_HEAD(work_list);
410 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100411 struct drbd_peer_request *peer_req, *t;
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100412 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700413
Philipp Reisner87eeee42011-01-19 14:16:30 +0100414 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700415 reclaim_net_ee(mdev, &reclaimed);
416 list_splice_init(&mdev->done_ee, &work_list);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100417 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700418
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100419 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
420 drbd_free_net_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700421
422 /* possible callbacks here:
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +0100423 * e_end_block, and e_end_resync_block, e_send_discard_write.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700424 * all ignore the last argument.
425 */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100426 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100427 int err2;
428
Philipp Reisnerb411b362009-09-25 16:07:19 -0700429 /* list_del not necessary, next/prev members not touched */
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100430 err2 = peer_req->w.cb(&peer_req->w, !!err);
431 if (!err)
432 err = err2;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100433 drbd_free_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700434 }
435 wake_up(&mdev->ee_wait);
436
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100437 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700438}
439
440void _drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head)
441{
442 DEFINE_WAIT(wait);
443
444 /* avoids spin_lock/unlock
445 * and calling prepare_to_wait in the fast path */
446 while (!list_empty(head)) {
447 prepare_to_wait(&mdev->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100448 spin_unlock_irq(&mdev->tconn->req_lock);
Jens Axboe7eaceac2011-03-10 08:52:07 +0100449 io_schedule();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700450 finish_wait(&mdev->ee_wait, &wait);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100451 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700452 }
453}
454
455void drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head)
456{
Philipp Reisner87eeee42011-01-19 14:16:30 +0100457 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700458 _drbd_wait_ee_list_empty(mdev, head);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100459 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700460}
461
462/* see also kernel_accept; which is only present since 2.6.18.
463 * also we want to log which part of it failed, exactly */
Philipp Reisner76536202011-02-07 14:09:54 +0100464static int drbd_accept(const char **what, struct socket *sock, struct socket **newsock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700465{
466 struct sock *sk = sock->sk;
467 int err = 0;
468
469 *what = "listen";
470 err = sock->ops->listen(sock, 5);
471 if (err < 0)
472 goto out;
473
474 *what = "sock_create_lite";
475 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
476 newsock);
477 if (err < 0)
478 goto out;
479
480 *what = "accept";
481 err = sock->ops->accept(sock, *newsock, 0);
482 if (err < 0) {
483 sock_release(*newsock);
484 *newsock = NULL;
485 goto out;
486 }
487 (*newsock)->ops = sock->ops;
488
489out:
490 return err;
491}
492
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100493static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700494{
495 mm_segment_t oldfs;
496 struct kvec iov = {
497 .iov_base = buf,
498 .iov_len = size,
499 };
500 struct msghdr msg = {
501 .msg_iovlen = 1,
502 .msg_iov = (struct iovec *)&iov,
503 .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
504 };
505 int rv;
506
507 oldfs = get_fs();
508 set_fs(KERNEL_DS);
509 rv = sock_recvmsg(sock, &msg, size, msg.msg_flags);
510 set_fs(oldfs);
511
512 return rv;
513}
514
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100515static int drbd_recv(struct drbd_tconn *tconn, void *buf, size_t size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700516{
517 mm_segment_t oldfs;
518 struct kvec iov = {
519 .iov_base = buf,
520 .iov_len = size,
521 };
522 struct msghdr msg = {
523 .msg_iovlen = 1,
524 .msg_iov = (struct iovec *)&iov,
525 .msg_flags = MSG_WAITALL | MSG_NOSIGNAL
526 };
527 int rv;
528
529 oldfs = get_fs();
530 set_fs(KERNEL_DS);
531
532 for (;;) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100533 rv = sock_recvmsg(tconn->data.socket, &msg, size, msg.msg_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700534 if (rv == size)
535 break;
536
537 /* Note:
538 * ECONNRESET other side closed the connection
539 * ERESTARTSYS (on sock) we got a signal
540 */
541
542 if (rv < 0) {
543 if (rv == -ECONNRESET)
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100544 conn_info(tconn, "sock was reset by peer\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700545 else if (rv != -ERESTARTSYS)
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100546 conn_err(tconn, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700547 break;
548 } else if (rv == 0) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100549 conn_info(tconn, "sock was shut down by peer\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700550 break;
551 } else {
552 /* signal came in, or peer/link went down,
553 * after we read a partial message
554 */
555 /* D_ASSERT(signal_pending(current)); */
556 break;
557 }
558 };
559
560 set_fs(oldfs);
561
562 if (rv != size)
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100563 conn_request_state(tconn, NS(conn, C_BROKEN_PIPE), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700564
565 return rv;
566}
567
Lars Ellenberg5dbf1672010-05-25 16:18:01 +0200568/* quoting tcp(7):
569 * On individual connections, the socket buffer size must be set prior to the
570 * listen(2) or connect(2) calls in order to have it take effect.
571 * This is our wrapper to do so.
572 */
573static void drbd_setbufsize(struct socket *sock, unsigned int snd,
574 unsigned int rcv)
575{
576 /* open coded SO_SNDBUF, SO_RCVBUF */
577 if (snd) {
578 sock->sk->sk_sndbuf = snd;
579 sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
580 }
581 if (rcv) {
582 sock->sk->sk_rcvbuf = rcv;
583 sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
584 }
585}
586
Philipp Reisnereac3e992011-02-07 14:05:07 +0100587static struct socket *drbd_try_connect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700588{
589 const char *what;
590 struct socket *sock;
591 struct sockaddr_in6 src_in6;
592 int err;
593 int disconnect_on_error = 1;
594
Philipp Reisnereac3e992011-02-07 14:05:07 +0100595 if (!get_net_conf(tconn))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700596 return NULL;
597
598 what = "sock_create_kern";
Philipp Reisnereac3e992011-02-07 14:05:07 +0100599 err = sock_create_kern(((struct sockaddr *)tconn->net_conf->my_addr)->sa_family,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700600 SOCK_STREAM, IPPROTO_TCP, &sock);
601 if (err < 0) {
602 sock = NULL;
603 goto out;
604 }
605
606 sock->sk->sk_rcvtimeo =
Philipp Reisnereac3e992011-02-07 14:05:07 +0100607 sock->sk->sk_sndtimeo = tconn->net_conf->try_connect_int*HZ;
608 drbd_setbufsize(sock, tconn->net_conf->sndbuf_size,
609 tconn->net_conf->rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700610
611 /* explicitly bind to the configured IP as source IP
612 * for the outgoing connections.
613 * This is needed for multihomed hosts and to be
614 * able to use lo: interfaces for drbd.
615 * Make sure to use 0 as port number, so linux selects
616 * a free one dynamically.
617 */
Philipp Reisnereac3e992011-02-07 14:05:07 +0100618 memcpy(&src_in6, tconn->net_conf->my_addr,
619 min_t(int, tconn->net_conf->my_addr_len, sizeof(src_in6)));
620 if (((struct sockaddr *)tconn->net_conf->my_addr)->sa_family == AF_INET6)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700621 src_in6.sin6_port = 0;
622 else
623 ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
624
625 what = "bind before connect";
626 err = sock->ops->bind(sock,
627 (struct sockaddr *) &src_in6,
Philipp Reisnereac3e992011-02-07 14:05:07 +0100628 tconn->net_conf->my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700629 if (err < 0)
630 goto out;
631
632 /* connect may fail, peer not yet available.
633 * stay C_WF_CONNECTION, don't go Disconnecting! */
634 disconnect_on_error = 0;
635 what = "connect";
636 err = sock->ops->connect(sock,
Philipp Reisnereac3e992011-02-07 14:05:07 +0100637 (struct sockaddr *)tconn->net_conf->peer_addr,
638 tconn->net_conf->peer_addr_len, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700639
640out:
641 if (err < 0) {
642 if (sock) {
643 sock_release(sock);
644 sock = NULL;
645 }
646 switch (-err) {
647 /* timeout, busy, signal pending */
648 case ETIMEDOUT: case EAGAIN: case EINPROGRESS:
649 case EINTR: case ERESTARTSYS:
650 /* peer not (yet) available, network problem */
651 case ECONNREFUSED: case ENETUNREACH:
652 case EHOSTDOWN: case EHOSTUNREACH:
653 disconnect_on_error = 0;
654 break;
655 default:
Philipp Reisnereac3e992011-02-07 14:05:07 +0100656 conn_err(tconn, "%s failed, err = %d\n", what, err);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700657 }
658 if (disconnect_on_error)
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100659 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700660 }
Philipp Reisnereac3e992011-02-07 14:05:07 +0100661 put_net_conf(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700662 return sock;
663}
664
Philipp Reisner76536202011-02-07 14:09:54 +0100665static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700666{
667 int timeo, err;
668 struct socket *s_estab = NULL, *s_listen;
669 const char *what;
670
Philipp Reisner76536202011-02-07 14:09:54 +0100671 if (!get_net_conf(tconn))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700672 return NULL;
673
674 what = "sock_create_kern";
Philipp Reisner76536202011-02-07 14:09:54 +0100675 err = sock_create_kern(((struct sockaddr *)tconn->net_conf->my_addr)->sa_family,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700676 SOCK_STREAM, IPPROTO_TCP, &s_listen);
677 if (err) {
678 s_listen = NULL;
679 goto out;
680 }
681
Philipp Reisner76536202011-02-07 14:09:54 +0100682 timeo = tconn->net_conf->try_connect_int * HZ;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700683 timeo += (random32() & 1) ? timeo / 7 : -timeo / 7; /* 28.5% random jitter */
684
685 s_listen->sk->sk_reuse = 1; /* SO_REUSEADDR */
686 s_listen->sk->sk_rcvtimeo = timeo;
687 s_listen->sk->sk_sndtimeo = timeo;
Philipp Reisner76536202011-02-07 14:09:54 +0100688 drbd_setbufsize(s_listen, tconn->net_conf->sndbuf_size,
689 tconn->net_conf->rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700690
691 what = "bind before listen";
692 err = s_listen->ops->bind(s_listen,
Philipp Reisner76536202011-02-07 14:09:54 +0100693 (struct sockaddr *) tconn->net_conf->my_addr,
694 tconn->net_conf->my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700695 if (err < 0)
696 goto out;
697
Philipp Reisner76536202011-02-07 14:09:54 +0100698 err = drbd_accept(&what, s_listen, &s_estab);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700699
700out:
701 if (s_listen)
702 sock_release(s_listen);
703 if (err < 0) {
704 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
Philipp Reisner76536202011-02-07 14:09:54 +0100705 conn_err(tconn, "%s failed, err = %d\n", what, err);
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100706 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700707 }
708 }
Philipp Reisner76536202011-02-07 14:09:54 +0100709 put_net_conf(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700710
711 return s_estab;
712}
713
Philipp Reisnerd38e7872011-02-07 15:32:04 +0100714static int drbd_send_fp(struct drbd_tconn *tconn, struct socket *sock, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700715{
Philipp Reisnerd38e7872011-02-07 15:32:04 +0100716 struct p_header *h = &tconn->data.sbuf.header;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700717
Andreas Gruenbacherecf23632011-03-15 23:48:25 +0100718 return !_conn_send_cmd(tconn, 0, sock, cmd, h, sizeof(*h), 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700719}
720
Philipp Reisnera25b63f2011-02-07 15:43:45 +0100721static enum drbd_packet drbd_recv_fp(struct drbd_tconn *tconn, struct socket *sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700722{
Philipp Reisnera25b63f2011-02-07 15:43:45 +0100723 struct p_header80 *h = &tconn->data.rbuf.header.h80;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700724 int rr;
725
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100726 rr = drbd_recv_short(sock, h, sizeof(*h), 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700727
Andreas Gruenbacherca9bc122011-01-11 13:47:24 +0100728 if (rr == sizeof(*h) && h->magic == cpu_to_be32(DRBD_MAGIC))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700729 return be16_to_cpu(h->command);
730
731 return 0xffff;
732}
733
734/**
735 * drbd_socket_okay() - Free the socket if its connection is not okay
Philipp Reisnerb411b362009-09-25 16:07:19 -0700736 * @sock: pointer to the pointer to the socket.
737 */
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100738static int drbd_socket_okay(struct socket **sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700739{
740 int rr;
741 char tb[4];
742
743 if (!*sock)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100744 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700745
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100746 rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700747
748 if (rr > 0 || rr == -EAGAIN) {
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100749 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700750 } else {
751 sock_release(*sock);
752 *sock = NULL;
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100753 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700754 }
755}
Philipp Reisner2325eb62011-03-15 16:56:18 +0100756/* Gets called if a connection is established, or if a new minor gets created
757 in a connection */
758int drbd_connected(int vnr, void *p, void *data)
Philipp Reisner907599e2011-02-08 11:25:37 +0100759{
760 struct drbd_conf *mdev = (struct drbd_conf *)p;
761 int ok = 1;
762
763 atomic_set(&mdev->packet_seq, 0);
764 mdev->peer_seq = 0;
765
Philipp Reisner8410da82011-02-11 20:11:10 +0100766 mdev->state_mutex = mdev->tconn->agreed_pro_version < 100 ?
767 &mdev->tconn->cstate_mutex :
768 &mdev->own_state_mutex;
769
Andreas Gruenbacher103ea272011-03-16 00:43:02 +0100770 ok &= !drbd_send_sync_param(mdev);
Andreas Gruenbacherf02d4d02011-03-16 01:12:50 +0100771 ok &= !drbd_send_sizes(mdev, 0, 0);
Andreas Gruenbacher2ae5f952011-03-16 01:07:20 +0100772 ok &= !drbd_send_uuids(mdev);
Andreas Gruenbacher927036f2011-03-16 00:50:00 +0100773 ok &= !drbd_send_state(mdev);
Philipp Reisner907599e2011-02-08 11:25:37 +0100774 clear_bit(USE_DEGR_WFC_T, &mdev->flags);
775 clear_bit(RESIZE_PENDING, &mdev->flags);
776
Philipp Reisner8410da82011-02-11 20:11:10 +0100777
Philipp Reisner907599e2011-02-08 11:25:37 +0100778 return !ok;
779}
780
Philipp Reisnerb411b362009-09-25 16:07:19 -0700781/*
782 * return values:
783 * 1 yes, we have a valid connection
784 * 0 oops, did not work out, please try again
785 * -1 peer talks different language,
786 * no point in trying again, please go standalone.
787 * -2 We do not have a network config...
788 */
Philipp Reisner907599e2011-02-08 11:25:37 +0100789static int drbd_connect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700790{
791 struct socket *s, *sock, *msock;
792 int try, h, ok;
793
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100794 if (conn_request_state(tconn, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700795 return -2;
796
Philipp Reisner907599e2011-02-08 11:25:37 +0100797 clear_bit(DISCARD_CONCURRENT, &tconn->flags);
798 tconn->agreed_pro_version = 99;
Philipp Reisnerfd340c12011-01-19 16:57:39 +0100799 /* agreed_pro_version must be smaller than 100 so we send the old
800 header (h80) in the first packet and in the handshake packet. */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700801
802 sock = NULL;
803 msock = NULL;
804
805 do {
806 for (try = 0;;) {
807 /* 3 tries, this should take less than a second! */
Philipp Reisner907599e2011-02-08 11:25:37 +0100808 s = drbd_try_connect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700809 if (s || ++try >= 3)
810 break;
811 /* give the other side time to call bind() & listen() */
Philipp Reisner20ee6392011-01-18 15:28:59 +0100812 schedule_timeout_interruptible(HZ / 10);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700813 }
814
815 if (s) {
816 if (!sock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100817 drbd_send_fp(tconn, s, P_HAND_SHAKE_S);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700818 sock = s;
819 s = NULL;
820 } else if (!msock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100821 drbd_send_fp(tconn, s, P_HAND_SHAKE_M);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700822 msock = s;
823 s = NULL;
824 } else {
Philipp Reisner907599e2011-02-08 11:25:37 +0100825 conn_err(tconn, "Logic error in drbd_connect()\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700826 goto out_release_sockets;
827 }
828 }
829
830 if (sock && msock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100831 schedule_timeout_interruptible(tconn->net_conf->ping_timeo*HZ/10);
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100832 ok = drbd_socket_okay(&sock);
833 ok = drbd_socket_okay(&msock) && ok;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700834 if (ok)
835 break;
836 }
837
838retry:
Philipp Reisner907599e2011-02-08 11:25:37 +0100839 s = drbd_wait_for_connect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700840 if (s) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100841 try = drbd_recv_fp(tconn, s);
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100842 drbd_socket_okay(&sock);
843 drbd_socket_okay(&msock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700844 switch (try) {
845 case P_HAND_SHAKE_S:
846 if (sock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100847 conn_warn(tconn, "initial packet S crossed\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700848 sock_release(sock);
849 }
850 sock = s;
851 break;
852 case P_HAND_SHAKE_M:
853 if (msock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100854 conn_warn(tconn, "initial packet M crossed\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700855 sock_release(msock);
856 }
857 msock = s;
Philipp Reisner907599e2011-02-08 11:25:37 +0100858 set_bit(DISCARD_CONCURRENT, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700859 break;
860 default:
Philipp Reisner907599e2011-02-08 11:25:37 +0100861 conn_warn(tconn, "Error receiving initial packet\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700862 sock_release(s);
863 if (random32() & 1)
864 goto retry;
865 }
866 }
867
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100868 if (tconn->cstate <= C_DISCONNECTING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700869 goto out_release_sockets;
870 if (signal_pending(current)) {
871 flush_signals(current);
872 smp_rmb();
Philipp Reisner907599e2011-02-08 11:25:37 +0100873 if (get_t_state(&tconn->receiver) == EXITING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700874 goto out_release_sockets;
875 }
876
877 if (sock && msock) {
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100878 ok = drbd_socket_okay(&sock);
879 ok = drbd_socket_okay(&msock) && ok;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700880 if (ok)
881 break;
882 }
883 } while (1);
884
885 msock->sk->sk_reuse = 1; /* SO_REUSEADDR */
886 sock->sk->sk_reuse = 1; /* SO_REUSEADDR */
887
888 sock->sk->sk_allocation = GFP_NOIO;
889 msock->sk->sk_allocation = GFP_NOIO;
890
891 sock->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
892 msock->sk->sk_priority = TC_PRIO_INTERACTIVE;
893
Philipp Reisnerb411b362009-09-25 16:07:19 -0700894 /* NOT YET ...
Philipp Reisner907599e2011-02-08 11:25:37 +0100895 * sock->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700896 * sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
897 * first set it to the P_HAND_SHAKE timeout,
898 * which we set to 4x the configured ping_timeout. */
899 sock->sk->sk_sndtimeo =
Philipp Reisner907599e2011-02-08 11:25:37 +0100900 sock->sk->sk_rcvtimeo = tconn->net_conf->ping_timeo*4*HZ/10;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700901
Philipp Reisner907599e2011-02-08 11:25:37 +0100902 msock->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10;
903 msock->sk->sk_rcvtimeo = tconn->net_conf->ping_int*HZ;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700904
905 /* we don't want delays.
Lucas De Marchi25985ed2011-03-30 22:57:33 -0300906 * we use TCP_CORK where appropriate, though */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700907 drbd_tcp_nodelay(sock);
908 drbd_tcp_nodelay(msock);
909
Philipp Reisner907599e2011-02-08 11:25:37 +0100910 tconn->data.socket = sock;
911 tconn->meta.socket = msock;
912 tconn->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700913
Philipp Reisner907599e2011-02-08 11:25:37 +0100914 h = drbd_do_handshake(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700915 if (h <= 0)
916 return h;
917
Philipp Reisner907599e2011-02-08 11:25:37 +0100918 if (tconn->cram_hmac_tfm) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700919 /* drbd_request_state(mdev, NS(conn, WFAuth)); */
Philipp Reisner907599e2011-02-08 11:25:37 +0100920 switch (drbd_do_auth(tconn)) {
Johannes Thomab10d96c2010-01-07 16:02:50 +0100921 case -1:
Philipp Reisner907599e2011-02-08 11:25:37 +0100922 conn_err(tconn, "Authentication of peer failed\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700923 return -1;
Johannes Thomab10d96c2010-01-07 16:02:50 +0100924 case 0:
Philipp Reisner907599e2011-02-08 11:25:37 +0100925 conn_err(tconn, "Authentication of peer failed, trying again.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +0100926 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700927 }
928 }
929
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100930 if (conn_request_state(tconn, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE) < SS_SUCCESS)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700931 return 0;
932
Philipp Reisner907599e2011-02-08 11:25:37 +0100933 sock->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700934 sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
935
Philipp Reisner907599e2011-02-08 11:25:37 +0100936 drbd_thread_start(&tconn->asender);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700937
Andreas Gruenbacher387eb302011-03-16 01:05:37 +0100938 if (drbd_send_protocol(tconn) == -EOPNOTSUPP)
Philipp Reisner7e2455c2010-04-22 14:50:23 +0200939 return -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700940
Philipp Reisner907599e2011-02-08 11:25:37 +0100941 return !idr_for_each(&tconn->volumes, drbd_connected, tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700942
943out_release_sockets:
944 if (sock)
945 sock_release(sock);
946 if (msock)
947 sock_release(msock);
948 return -1;
949}
950
Philipp Reisnerce243852011-02-07 17:27:47 +0100951static bool decode_header(struct drbd_tconn *tconn, struct p_header *h, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700952{
Philipp Reisnerfd340c12011-01-19 16:57:39 +0100953 if (h->h80.magic == cpu_to_be32(DRBD_MAGIC)) {
Philipp Reisner77351055b2011-02-07 17:24:26 +0100954 pi->cmd = be16_to_cpu(h->h80.command);
955 pi->size = be16_to_cpu(h->h80.length);
Philipp Reisnereefc2f72011-02-08 12:55:24 +0100956 pi->vnr = 0;
Andreas Gruenbacherca9bc122011-01-11 13:47:24 +0100957 } else if (h->h95.magic == cpu_to_be16(DRBD_MAGIC_BIG)) {
Philipp Reisner77351055b2011-02-07 17:24:26 +0100958 pi->cmd = be16_to_cpu(h->h95.command);
959 pi->size = be32_to_cpu(h->h95.length) & 0x00ffffff;
960 pi->vnr = 0;
Philipp Reisner02918be2010-08-20 14:35:10 +0200961 } else {
Philipp Reisnerce243852011-02-07 17:27:47 +0100962 conn_err(tconn, "magic?? on data m: 0x%08x c: %d l: %d\n",
Lars Ellenberg004352f2010-10-05 20:13:58 +0200963 be32_to_cpu(h->h80.magic),
964 be16_to_cpu(h->h80.command),
965 be16_to_cpu(h->h80.length));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100966 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700967 }
Philipp Reisner257d0af2011-01-26 12:15:29 +0100968 return true;
969}
970
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100971static int drbd_recv_header(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisner257d0af2011-01-26 12:15:29 +0100972{
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100973 struct p_header *h = &tconn->data.rbuf.header;
Philipp Reisner257d0af2011-01-26 12:15:29 +0100974 int r;
975
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100976 r = drbd_recv(tconn, h, sizeof(*h));
Philipp Reisner257d0af2011-01-26 12:15:29 +0100977 if (unlikely(r != sizeof(*h))) {
978 if (!signal_pending(current))
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100979 conn_warn(tconn, "short read expecting header on sock: r=%d\n", r);
Philipp Reisner257d0af2011-01-26 12:15:29 +0100980 return false;
981 }
982
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100983 r = decode_header(tconn, h, pi);
984 tconn->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700985
Philipp Reisner257d0af2011-01-26 12:15:29 +0100986 return r;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700987}
988
Philipp Reisner2451fc32010-08-24 13:43:11 +0200989static void drbd_flush(struct drbd_conf *mdev)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700990{
991 int rv;
992
993 if (mdev->write_ordering >= WO_bdev_flush && get_ldev(mdev)) {
Dmitry Monakhovfbd9b092010-04-28 17:55:06 +0400994 rv = blkdev_issue_flush(mdev->ldev->backing_bdev, GFP_KERNEL,
Christoph Hellwigdd3932e2010-09-16 20:51:46 +0200995 NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700996 if (rv) {
997 dev_err(DEV, "local disk flush failed with status %d\n", rv);
998 /* would rather check on EOPNOTSUPP, but that is not reliable.
999 * don't try again for ANY return value != 0
1000 * if (rv == -EOPNOTSUPP) */
1001 drbd_bump_write_ordering(mdev, WO_drain_io);
1002 }
1003 put_ldev(mdev);
1004 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001005}
1006
1007/**
1008 * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it.
1009 * @mdev: DRBD device.
1010 * @epoch: Epoch object.
1011 * @ev: Epoch event.
1012 */
1013static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev,
1014 struct drbd_epoch *epoch,
1015 enum epoch_event ev)
1016{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001017 int epoch_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001018 struct drbd_epoch *next_epoch;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001019 enum finish_epoch rv = FE_STILL_LIVE;
1020
1021 spin_lock(&mdev->epoch_lock);
1022 do {
1023 next_epoch = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001024
1025 epoch_size = atomic_read(&epoch->epoch_size);
1026
1027 switch (ev & ~EV_CLEANUP) {
1028 case EV_PUT:
1029 atomic_dec(&epoch->active);
1030 break;
1031 case EV_GOT_BARRIER_NR:
1032 set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001033 break;
1034 case EV_BECAME_LAST:
1035 /* nothing to do*/
1036 break;
1037 }
1038
Philipp Reisnerb411b362009-09-25 16:07:19 -07001039 if (epoch_size != 0 &&
1040 atomic_read(&epoch->active) == 0 &&
Philipp Reisner2451fc32010-08-24 13:43:11 +02001041 test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001042 if (!(ev & EV_CLEANUP)) {
1043 spin_unlock(&mdev->epoch_lock);
1044 drbd_send_b_ack(mdev, epoch->barrier_nr, epoch_size);
1045 spin_lock(&mdev->epoch_lock);
1046 }
1047 dec_unacked(mdev);
1048
1049 if (mdev->current_epoch != epoch) {
1050 next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
1051 list_del(&epoch->list);
1052 ev = EV_BECAME_LAST | (ev & EV_CLEANUP);
1053 mdev->epochs--;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001054 kfree(epoch);
1055
1056 if (rv == FE_STILL_LIVE)
1057 rv = FE_DESTROYED;
1058 } else {
1059 epoch->flags = 0;
1060 atomic_set(&epoch->epoch_size, 0);
Uwe Kleine-König698f9312010-07-02 20:41:51 +02001061 /* atomic_set(&epoch->active, 0); is already zero */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001062 if (rv == FE_STILL_LIVE)
1063 rv = FE_RECYCLED;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001064 wake_up(&mdev->ee_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001065 }
1066 }
1067
1068 if (!next_epoch)
1069 break;
1070
1071 epoch = next_epoch;
1072 } while (1);
1073
1074 spin_unlock(&mdev->epoch_lock);
1075
Philipp Reisnerb411b362009-09-25 16:07:19 -07001076 return rv;
1077}
1078
1079/**
1080 * drbd_bump_write_ordering() - Fall back to an other write ordering method
1081 * @mdev: DRBD device.
1082 * @wo: Write ordering method to try.
1083 */
1084void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo) __must_hold(local)
1085{
1086 enum write_ordering_e pwo;
1087 static char *write_ordering_str[] = {
1088 [WO_none] = "none",
1089 [WO_drain_io] = "drain",
1090 [WO_bdev_flush] = "flush",
Philipp Reisnerb411b362009-09-25 16:07:19 -07001091 };
1092
1093 pwo = mdev->write_ordering;
1094 wo = min(pwo, wo);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001095 if (wo == WO_bdev_flush && mdev->ldev->dc.no_disk_flush)
1096 wo = WO_drain_io;
1097 if (wo == WO_drain_io && mdev->ldev->dc.no_disk_drain)
1098 wo = WO_none;
1099 mdev->write_ordering = wo;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001100 if (pwo != mdev->write_ordering || wo == WO_bdev_flush)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001101 dev_info(DEV, "Method to ensure write ordering: %s\n", write_ordering_str[mdev->write_ordering]);
1102}
1103
1104/**
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001105 * drbd_submit_peer_request()
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001106 * @mdev: DRBD device.
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001107 * @peer_req: peer request
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001108 * @rw: flag field, see bio->bi_rw
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001109 *
1110 * May spread the pages to multiple bios,
1111 * depending on bio_add_page restrictions.
1112 *
1113 * Returns 0 if all bios have been submitted,
1114 * -ENOMEM if we could not allocate enough bios,
1115 * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a
1116 * single page to an empty bio (which should never happen and likely indicates
1117 * that the lower level IO stack is in some way broken). This has been observed
1118 * on certain Xen deployments.
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001119 */
1120/* TODO allocate from our own bio_set. */
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001121int drbd_submit_peer_request(struct drbd_conf *mdev,
1122 struct drbd_peer_request *peer_req,
1123 const unsigned rw, const int fault_type)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001124{
1125 struct bio *bios = NULL;
1126 struct bio *bio;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001127 struct page *page = peer_req->pages;
1128 sector_t sector = peer_req->i.sector;
1129 unsigned ds = peer_req->i.size;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001130 unsigned n_bios = 0;
1131 unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT;
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001132 int err = -ENOMEM;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001133
1134 /* In most cases, we will only need one bio. But in case the lower
1135 * level restrictions happen to be different at this offset on this
1136 * side than those of the sending peer, we may need to submit the
Lars Ellenbergda4a75d2011-02-23 17:02:01 +01001137 * request in more than one bio.
1138 *
1139 * Plain bio_alloc is good enough here, this is no DRBD internally
1140 * generated bio, but a bio allocated on behalf of the peer.
1141 */
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001142next_bio:
1143 bio = bio_alloc(GFP_NOIO, nr_pages);
1144 if (!bio) {
1145 dev_err(DEV, "submit_ee: Allocation of a bio failed\n");
1146 goto fail;
1147 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001148 /* > peer_req->i.sector, unless this is the first bio */
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001149 bio->bi_sector = sector;
1150 bio->bi_bdev = mdev->ldev->backing_bdev;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001151 bio->bi_rw = rw;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001152 bio->bi_private = peer_req;
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001153 bio->bi_end_io = drbd_peer_request_endio;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001154
1155 bio->bi_next = bios;
1156 bios = bio;
1157 ++n_bios;
1158
1159 page_chain_for_each(page) {
1160 unsigned len = min_t(unsigned, ds, PAGE_SIZE);
1161 if (!bio_add_page(bio, page, len, 0)) {
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001162 /* A single page must always be possible!
1163 * But in case it fails anyways,
1164 * we deal with it, and complain (below). */
1165 if (bio->bi_vcnt == 0) {
1166 dev_err(DEV,
1167 "bio_add_page failed for len=%u, "
1168 "bi_vcnt=0 (bi_sector=%llu)\n",
1169 len, (unsigned long long)bio->bi_sector);
1170 err = -ENOSPC;
1171 goto fail;
1172 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001173 goto next_bio;
1174 }
1175 ds -= len;
1176 sector += len >> 9;
1177 --nr_pages;
1178 }
1179 D_ASSERT(page == NULL);
1180 D_ASSERT(ds == 0);
1181
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001182 atomic_set(&peer_req->pending_bios, n_bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001183 do {
1184 bio = bios;
1185 bios = bios->bi_next;
1186 bio->bi_next = NULL;
1187
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001188 drbd_generic_make_request(mdev, fault_type, bio);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001189 } while (bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001190 return 0;
1191
1192fail:
1193 while (bios) {
1194 bio = bios;
1195 bios = bios->bi_next;
1196 bio_put(bio);
1197 }
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001198 return err;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001199}
1200
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001201static void drbd_remove_epoch_entry_interval(struct drbd_conf *mdev,
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001202 struct drbd_peer_request *peer_req)
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001203{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001204 struct drbd_interval *i = &peer_req->i;
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001205
1206 drbd_remove_interval(&mdev->write_requests, i);
1207 drbd_clear_interval(i);
1208
Andreas Gruenbacher6c852be2011-02-04 15:38:52 +01001209 /* Wake up any processes waiting for this peer request to complete. */
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001210 if (i->waiting)
1211 wake_up(&mdev->misc_wait);
1212}
1213
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001214static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packet cmd,
1215 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001216{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001217 int rv;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001218 struct p_barrier *p = &mdev->tconn->data.rbuf.barrier;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001219 struct drbd_epoch *epoch;
1220
Philipp Reisnerb411b362009-09-25 16:07:19 -07001221 inc_unacked(mdev);
1222
Philipp Reisnerb411b362009-09-25 16:07:19 -07001223 mdev->current_epoch->barrier_nr = p->barrier;
1224 rv = drbd_may_finish_epoch(mdev, mdev->current_epoch, EV_GOT_BARRIER_NR);
1225
1226 /* P_BARRIER_ACK may imply that the corresponding extent is dropped from
1227 * the activity log, which means it would not be resynced in case the
1228 * R_PRIMARY crashes now.
1229 * Therefore we must send the barrier_ack after the barrier request was
1230 * completed. */
1231 switch (mdev->write_ordering) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001232 case WO_none:
1233 if (rv == FE_RECYCLED)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001234 return true;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001235
1236 /* receiver context, in the writeout path of the other node.
1237 * avoid potential distributed deadlock */
1238 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1239 if (epoch)
1240 break;
1241 else
1242 dev_warn(DEV, "Allocation of an epoch failed, slowing down\n");
1243 /* Fall through */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001244
1245 case WO_bdev_flush:
1246 case WO_drain_io:
Philipp Reisnerb411b362009-09-25 16:07:19 -07001247 drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
Philipp Reisner2451fc32010-08-24 13:43:11 +02001248 drbd_flush(mdev);
1249
1250 if (atomic_read(&mdev->current_epoch->epoch_size)) {
1251 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1252 if (epoch)
1253 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001254 }
1255
Philipp Reisner2451fc32010-08-24 13:43:11 +02001256 epoch = mdev->current_epoch;
1257 wait_event(mdev->ee_wait, atomic_read(&epoch->epoch_size) == 0);
1258
1259 D_ASSERT(atomic_read(&epoch->active) == 0);
1260 D_ASSERT(epoch->flags == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001261
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001262 return true;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001263 default:
1264 dev_err(DEV, "Strangeness in mdev->write_ordering %d\n", mdev->write_ordering);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001265 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001266 }
1267
1268 epoch->flags = 0;
1269 atomic_set(&epoch->epoch_size, 0);
1270 atomic_set(&epoch->active, 0);
1271
1272 spin_lock(&mdev->epoch_lock);
1273 if (atomic_read(&mdev->current_epoch->epoch_size)) {
1274 list_add(&epoch->list, &mdev->current_epoch->list);
1275 mdev->current_epoch = epoch;
1276 mdev->epochs++;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001277 } else {
1278 /* The current_epoch got recycled while we allocated this one... */
1279 kfree(epoch);
1280 }
1281 spin_unlock(&mdev->epoch_lock);
1282
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001283 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001284}
1285
1286/* used from receive_RSDataReply (recv_resync_read)
1287 * and from receive_Data */
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +01001288static struct drbd_peer_request *
1289read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector,
1290 int data_size) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001291{
Lars Ellenberg66660322010-04-06 12:15:04 +02001292 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001293 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001294 struct page *page;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001295 int dgs, ds, rr;
Philipp Reisnera0638452011-01-19 14:31:32 +01001296 void *dig_in = mdev->tconn->int_dig_in;
1297 void *dig_vv = mdev->tconn->int_dig_vv;
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001298 unsigned long *data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001299
Philipp Reisnera0638452011-01-19 14:31:32 +01001300 dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_r_tfm) ?
1301 crypto_hash_digestsize(mdev->tconn->integrity_r_tfm) : 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001302
1303 if (dgs) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001304 rr = drbd_recv(mdev->tconn, dig_in, dgs);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001305 if (rr != dgs) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001306 if (!signal_pending(current))
1307 dev_warn(DEV,
1308 "short read receiving data digest: read %d expected %d\n",
1309 rr, dgs);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001310 return NULL;
1311 }
1312 }
1313
1314 data_size -= dgs;
1315
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01001316 if (!expect(data_size != 0))
1317 return NULL;
1318 if (!expect(IS_ALIGNED(data_size, 512)))
1319 return NULL;
1320 if (!expect(data_size <= DRBD_MAX_BIO_SIZE))
1321 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001322
Lars Ellenberg66660322010-04-06 12:15:04 +02001323 /* even though we trust out peer,
1324 * we sometimes have to double check. */
1325 if (sector + (data_size>>9) > capacity) {
Lars Ellenbergfdda6542011-01-24 15:11:01 +01001326 dev_err(DEV, "request from peer beyond end of local disk: "
1327 "capacity: %llus < sector: %llus + size: %u\n",
Lars Ellenberg66660322010-04-06 12:15:04 +02001328 (unsigned long long)capacity,
1329 (unsigned long long)sector, data_size);
1330 return NULL;
1331 }
1332
Philipp Reisnerb411b362009-09-25 16:07:19 -07001333 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
1334 * "criss-cross" setup, that might cause write-out on some other DRBD,
1335 * which in turn might block on the other node at this very place. */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001336 peer_req = drbd_alloc_ee(mdev, id, sector, data_size, GFP_NOIO);
1337 if (!peer_req)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001338 return NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001339
Philipp Reisnerb411b362009-09-25 16:07:19 -07001340 ds = data_size;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001341 page = peer_req->pages;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001342 page_chain_for_each(page) {
1343 unsigned len = min_t(int, ds, PAGE_SIZE);
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001344 data = kmap(page);
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001345 rr = drbd_recv(mdev->tconn, data, len);
Andreas Gruenbacher0cf9d272010-12-07 10:43:29 +01001346 if (drbd_insert_fault(mdev, DRBD_FAULT_RECEIVE)) {
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001347 dev_err(DEV, "Fault injection: Corrupting data on receive\n");
1348 data[0] = data[0] ^ (unsigned long)-1;
1349 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001350 kunmap(page);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001351 if (rr != len) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001352 drbd_free_ee(mdev, peer_req);
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001353 if (!signal_pending(current))
1354 dev_warn(DEV, "short read receiving data: read %d expected %d\n",
1355 rr, len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001356 return NULL;
1357 }
1358 ds -= rr;
1359 }
1360
1361 if (dgs) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001362 drbd_csum_ee(mdev, mdev->tconn->integrity_r_tfm, peer_req, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001363 if (memcmp(dig_in, dig_vv, dgs)) {
Lars Ellenberg470be442010-11-10 10:36:52 +01001364 dev_err(DEV, "Digest integrity check FAILED: %llus +%u\n",
1365 (unsigned long long)sector, data_size);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001366 drbd_free_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001367 return NULL;
1368 }
1369 }
1370 mdev->recv_cnt += data_size>>9;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001371 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001372}
1373
1374/* drbd_drain_block() just takes a data block
1375 * out of the socket input buffer, and discards it.
1376 */
1377static int drbd_drain_block(struct drbd_conf *mdev, int data_size)
1378{
1379 struct page *page;
1380 int rr, rv = 1;
1381 void *data;
1382
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001383 if (!data_size)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001384 return true;
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001385
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001386 page = drbd_pp_alloc(mdev, 1, 1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001387
1388 data = kmap(page);
1389 while (data_size) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001390 rr = drbd_recv(mdev->tconn, data, min_t(int, data_size, PAGE_SIZE));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001391 if (rr != min_t(int, data_size, PAGE_SIZE)) {
1392 rv = 0;
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001393 if (!signal_pending(current))
1394 dev_warn(DEV,
1395 "short read receiving data: read %d expected %d\n",
1396 rr, min_t(int, data_size, PAGE_SIZE));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001397 break;
1398 }
1399 data_size -= rr;
1400 }
1401 kunmap(page);
Lars Ellenberg435f0742010-09-06 12:30:25 +02001402 drbd_pp_free(mdev, page, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001403 return rv;
1404}
1405
1406static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req,
1407 sector_t sector, int data_size)
1408{
1409 struct bio_vec *bvec;
1410 struct bio *bio;
1411 int dgs, rr, i, expect;
Philipp Reisnera0638452011-01-19 14:31:32 +01001412 void *dig_in = mdev->tconn->int_dig_in;
1413 void *dig_vv = mdev->tconn->int_dig_vv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001414
Philipp Reisnera0638452011-01-19 14:31:32 +01001415 dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_r_tfm) ?
1416 crypto_hash_digestsize(mdev->tconn->integrity_r_tfm) : 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001417
1418 if (dgs) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001419 rr = drbd_recv(mdev->tconn, dig_in, dgs);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001420 if (rr != dgs) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001421 if (!signal_pending(current))
1422 dev_warn(DEV,
1423 "short read receiving data reply digest: read %d expected %d\n",
1424 rr, dgs);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001425 return 0;
1426 }
1427 }
1428
1429 data_size -= dgs;
1430
1431 /* optimistically update recv_cnt. if receiving fails below,
1432 * we disconnect anyways, and counters will be reset. */
1433 mdev->recv_cnt += data_size>>9;
1434
1435 bio = req->master_bio;
1436 D_ASSERT(sector == bio->bi_sector);
1437
1438 bio_for_each_segment(bvec, bio, i) {
1439 expect = min_t(int, data_size, bvec->bv_len);
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001440 rr = drbd_recv(mdev->tconn,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001441 kmap(bvec->bv_page)+bvec->bv_offset,
1442 expect);
1443 kunmap(bvec->bv_page);
1444 if (rr != expect) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001445 if (!signal_pending(current))
1446 dev_warn(DEV, "short read receiving data reply: "
1447 "read %d expected %d\n",
1448 rr, expect);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001449 return 0;
1450 }
1451 data_size -= rr;
1452 }
1453
1454 if (dgs) {
Philipp Reisnera0638452011-01-19 14:31:32 +01001455 drbd_csum_bio(mdev, mdev->tconn->integrity_r_tfm, bio, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001456 if (memcmp(dig_in, dig_vv, dgs)) {
1457 dev_err(DEV, "Digest integrity check FAILED. Broken NICs?\n");
1458 return 0;
1459 }
1460 }
1461
1462 D_ASSERT(data_size == 0);
1463 return 1;
1464}
1465
1466/* e_end_resync_block() is called via
1467 * drbd_process_done_ee() by asender only */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001468static int e_end_resync_block(struct drbd_work *w, int unused)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001469{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001470 struct drbd_peer_request *peer_req =
1471 container_of(w, struct drbd_peer_request, w);
Philipp Reisner00d56942011-02-09 18:09:48 +01001472 struct drbd_conf *mdev = w->mdev;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001473 sector_t sector = peer_req->i.sector;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001474 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001475
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001476 D_ASSERT(drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001477
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001478 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
1479 drbd_set_in_sync(mdev, sector, peer_req->i.size);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001480 err = drbd_send_ack(mdev, P_RS_WRITE_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001481 } else {
1482 /* Record failure to sync */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001483 drbd_rs_failed_io(mdev, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001484
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001485 err = drbd_send_ack(mdev, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001486 }
1487 dec_unacked(mdev);
1488
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001489 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001490}
1491
1492static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_size) __releases(local)
1493{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001494 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001495
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001496 peer_req = read_in_block(mdev, ID_SYNCER, sector, data_size);
1497 if (!peer_req)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001498 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001499
1500 dec_rs_pending(mdev);
1501
Philipp Reisnerb411b362009-09-25 16:07:19 -07001502 inc_unacked(mdev);
1503 /* corresponding dec_unacked() in e_end_resync_block()
1504 * respective _drbd_clear_done_ee */
1505
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001506 peer_req->w.cb = e_end_resync_block;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001507
Philipp Reisner87eeee42011-01-19 14:16:30 +01001508 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001509 list_add(&peer_req->w.list, &mdev->sync_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001510 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001511
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001512 atomic_add(data_size >> 9, &mdev->rs_sect_ev);
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001513 if (drbd_submit_peer_request(mdev, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001514 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001515
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001516 /* don't care for the reason here */
1517 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01001518 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001519 list_del(&peer_req->w.list);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001520 spin_unlock_irq(&mdev->tconn->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02001521
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001522 drbd_free_ee(mdev, peer_req);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001523fail:
1524 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001525 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001526}
1527
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001528static struct drbd_request *
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001529find_request(struct drbd_conf *mdev, struct rb_root *root, u64 id,
1530 sector_t sector, bool missing_ok, const char *func)
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001531{
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001532 struct drbd_request *req;
1533
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001534 /* Request object according to our peer */
1535 req = (struct drbd_request *)(unsigned long)id;
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001536 if (drbd_contains_interval(root, sector, &req->i) && req->i.local)
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001537 return req;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001538 if (!missing_ok) {
1539 dev_err(DEV, "%s: failed to find request %lu, sector %llus\n", func,
1540 (unsigned long)id, (unsigned long long)sector);
1541 }
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001542 return NULL;
1543}
1544
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001545static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packet cmd,
1546 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001547{
1548 struct drbd_request *req;
1549 sector_t sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001550 int ok;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001551 struct p_data *p = &mdev->tconn->data.rbuf.data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001552
1553 sector = be64_to_cpu(p->sector);
1554
Philipp Reisner87eeee42011-01-19 14:16:30 +01001555 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001556 req = find_request(mdev, &mdev->read_requests, p->block_id, sector, false, __func__);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001557 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001558 if (unlikely(!req))
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001559 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001560
Bart Van Assche24c48302011-05-21 18:32:29 +02001561 /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid
Philipp Reisnerb411b362009-09-25 16:07:19 -07001562 * special casing it there for the various failure cases.
1563 * still no race with drbd_fail_pending_reads */
1564 ok = recv_dless_read(mdev, req, sector, data_size);
1565
1566 if (ok)
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001567 req_mod(req, DATA_RECEIVED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001568 /* else: nothing. handled from drbd_disconnect...
1569 * I don't think we may complete this just yet
1570 * in case we are "on-disconnect: freeze" */
1571
1572 return ok;
1573}
1574
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001575static int receive_RSDataReply(struct drbd_conf *mdev, enum drbd_packet cmd,
1576 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001577{
1578 sector_t sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001579 int ok;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001580 struct p_data *p = &mdev->tconn->data.rbuf.data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001581
1582 sector = be64_to_cpu(p->sector);
1583 D_ASSERT(p->block_id == ID_SYNCER);
1584
1585 if (get_ldev(mdev)) {
1586 /* data is submitted to disk within recv_resync_read.
1587 * corresponding put_ldev done below on error,
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001588 * or in drbd_peer_request_endio. */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001589 ok = recv_resync_read(mdev, sector, data_size);
1590 } else {
1591 if (__ratelimit(&drbd_ratelimit_state))
1592 dev_err(DEV, "Can not write resync data to local disk.\n");
1593
1594 ok = drbd_drain_block(mdev, data_size);
1595
Lars Ellenberg2b2bf212010-10-06 11:46:55 +02001596 drbd_send_ack_dp(mdev, P_NEG_ACK, p, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001597 }
1598
Philipp Reisner778f2712010-07-06 11:14:00 +02001599 atomic_add(data_size >> 9, &mdev->rs_sect_in);
1600
Philipp Reisnerb411b362009-09-25 16:07:19 -07001601 return ok;
1602}
1603
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001604static int w_restart_write(struct drbd_work *w, int cancel)
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001605{
1606 struct drbd_request *req = container_of(w, struct drbd_request, w);
1607 struct drbd_conf *mdev = w->mdev;
1608 struct bio *bio;
1609 unsigned long start_time;
1610 unsigned long flags;
1611
1612 spin_lock_irqsave(&mdev->tconn->req_lock, flags);
1613 if (!expect(req->rq_state & RQ_POSTPONED)) {
1614 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001615 return -EIO;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001616 }
1617 bio = req->master_bio;
1618 start_time = req->start_time;
1619 /* Postponed requests will not have their master_bio completed! */
1620 __req_mod(req, DISCARD_WRITE, NULL);
1621 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
1622
1623 while (__drbd_make_request(mdev, bio, start_time))
1624 /* retry */ ;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001625 return 0;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001626}
1627
1628static void restart_conflicting_writes(struct drbd_conf *mdev,
1629 sector_t sector, int size)
1630{
1631 struct drbd_interval *i;
1632 struct drbd_request *req;
1633
1634 drbd_for_each_overlap(i, &mdev->write_requests, sector, size) {
1635 if (!i->local)
1636 continue;
1637 req = container_of(i, struct drbd_request, i);
1638 if (req->rq_state & RQ_LOCAL_PENDING ||
1639 !(req->rq_state & RQ_POSTPONED))
1640 continue;
1641 if (expect(list_empty(&req->w.list))) {
1642 req->w.mdev = mdev;
1643 req->w.cb = w_restart_write;
1644 drbd_queue_work(&mdev->tconn->data.work, &req->w);
1645 }
1646 }
1647}
1648
Philipp Reisnerb411b362009-09-25 16:07:19 -07001649/* e_end_block() is called via drbd_process_done_ee().
1650 * this means this function only runs in the asender thread
1651 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001652static int e_end_block(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001653{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001654 struct drbd_peer_request *peer_req =
1655 container_of(w, struct drbd_peer_request, w);
Philipp Reisner00d56942011-02-09 18:09:48 +01001656 struct drbd_conf *mdev = w->mdev;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001657 sector_t sector = peer_req->i.sector;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001658 int err = 0, pcmd;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001659
Philipp Reisner89e58e72011-01-19 13:12:45 +01001660 if (mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001661 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001662 pcmd = (mdev->state.conn >= C_SYNC_SOURCE &&
1663 mdev->state.conn <= C_PAUSED_SYNC_T &&
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001664 peer_req->flags & EE_MAY_SET_IN_SYNC) ?
Philipp Reisnerb411b362009-09-25 16:07:19 -07001665 P_RS_WRITE_ACK : P_WRITE_ACK;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001666 err = drbd_send_ack(mdev, pcmd, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001667 if (pcmd == P_RS_WRITE_ACK)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001668 drbd_set_in_sync(mdev, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001669 } else {
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001670 err = drbd_send_ack(mdev, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001671 /* we expect it to be marked out of sync anyways...
1672 * maybe assert this? */
1673 }
1674 dec_unacked(mdev);
1675 }
1676 /* we delete from the conflict detection hash _after_ we sent out the
1677 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */
Philipp Reisner89e58e72011-01-19 13:12:45 +01001678 if (mdev->tconn->net_conf->two_primaries) {
Philipp Reisner87eeee42011-01-19 14:16:30 +01001679 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001680 D_ASSERT(!drbd_interval_empty(&peer_req->i));
1681 drbd_remove_epoch_entry_interval(mdev, peer_req);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001682 if (peer_req->flags & EE_RESTART_REQUESTS)
1683 restart_conflicting_writes(mdev, sector, peer_req->i.size);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001684 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +01001685 } else
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001686 D_ASSERT(drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001687
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001688 drbd_may_finish_epoch(mdev, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001689
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001690 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001691}
1692
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001693static int e_send_ack(struct drbd_work *w, enum drbd_packet ack)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001694{
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001695 struct drbd_conf *mdev = w->mdev;
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001696 struct drbd_peer_request *peer_req =
1697 container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001698 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001699
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001700 err = drbd_send_ack(mdev, ack, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001701 dec_unacked(mdev);
1702
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001703 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001704}
1705
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001706static int e_send_discard_write(struct drbd_work *w, int unused)
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001707{
1708 return e_send_ack(w, P_DISCARD_WRITE);
1709}
1710
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001711static int e_send_retry_write(struct drbd_work *w, int unused)
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001712{
1713 struct drbd_tconn *tconn = w->mdev->tconn;
1714
1715 return e_send_ack(w, tconn->agreed_pro_version >= 100 ?
1716 P_RETRY_WRITE : P_DISCARD_WRITE);
1717}
1718
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001719static bool seq_greater(u32 a, u32 b)
1720{
1721 /*
1722 * We assume 32-bit wrap-around here.
1723 * For 24-bit wrap-around, we would have to shift:
1724 * a <<= 8; b <<= 8;
1725 */
1726 return (s32)a - (s32)b > 0;
1727}
1728
1729static u32 seq_max(u32 a, u32 b)
1730{
1731 return seq_greater(a, b) ? a : b;
1732}
1733
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001734static bool need_peer_seq(struct drbd_conf *mdev)
1735{
1736 struct drbd_tconn *tconn = mdev->tconn;
1737
1738 /*
1739 * We only need to keep track of the last packet_seq number of our peer
1740 * if we are in dual-primary mode and we have the discard flag set; see
1741 * handle_write_conflicts().
1742 */
1743 return tconn->net_conf->two_primaries &&
1744 test_bit(DISCARD_CONCURRENT, &tconn->flags);
1745}
1746
Andreas Gruenbacher43ae0772011-02-03 18:42:08 +01001747static void update_peer_seq(struct drbd_conf *mdev, unsigned int peer_seq)
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001748{
Lars Ellenberg3c13b682011-02-23 16:10:01 +01001749 unsigned int newest_peer_seq;
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001750
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001751 if (need_peer_seq(mdev)) {
1752 spin_lock(&mdev->peer_seq_lock);
Lars Ellenberg3c13b682011-02-23 16:10:01 +01001753 newest_peer_seq = seq_max(mdev->peer_seq, peer_seq);
1754 mdev->peer_seq = newest_peer_seq;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001755 spin_unlock(&mdev->peer_seq_lock);
Lars Ellenberg3c13b682011-02-23 16:10:01 +01001756 /* wake up only if we actually changed mdev->peer_seq */
1757 if (peer_seq == newest_peer_seq)
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001758 wake_up(&mdev->seq_wait);
1759 }
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001760}
1761
Philipp Reisnerb411b362009-09-25 16:07:19 -07001762/* Called from receive_Data.
1763 * Synchronize packets on sock with packets on msock.
1764 *
1765 * This is here so even when a P_DATA packet traveling via sock overtook an Ack
1766 * packet traveling on msock, they are still processed in the order they have
1767 * been sent.
1768 *
1769 * Note: we don't care for Ack packets overtaking P_DATA packets.
1770 *
1771 * In case packet_seq is larger than mdev->peer_seq number, there are
1772 * outstanding packets on the msock. We wait for them to arrive.
1773 * In case we are the logically next packet, we update mdev->peer_seq
1774 * ourselves. Correctly handles 32bit wrap around.
1775 *
1776 * Assume we have a 10 GBit connection, that is about 1<<30 byte per second,
1777 * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds
1778 * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have
1779 * 1<<9 == 512 seconds aka ages for the 32bit wrap around...
1780 *
1781 * returns 0 if we may process the packet,
1782 * -ERESTARTSYS if we were interrupted (by disconnect signal). */
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001783static int wait_for_and_update_peer_seq(struct drbd_conf *mdev, const u32 peer_seq)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001784{
1785 DEFINE_WAIT(wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001786 long timeout;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001787 int ret;
1788
1789 if (!need_peer_seq(mdev))
1790 return 0;
1791
Philipp Reisnerb411b362009-09-25 16:07:19 -07001792 spin_lock(&mdev->peer_seq_lock);
1793 for (;;) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001794 if (!seq_greater(peer_seq - 1, mdev->peer_seq)) {
1795 mdev->peer_seq = seq_max(mdev->peer_seq, peer_seq);
1796 ret = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001797 break;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001798 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001799 if (signal_pending(current)) {
1800 ret = -ERESTARTSYS;
1801 break;
1802 }
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001803 prepare_to_wait(&mdev->seq_wait, &wait, TASK_INTERRUPTIBLE);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001804 spin_unlock(&mdev->peer_seq_lock);
Andreas Gruenbacher71b1c1e2011-03-01 15:40:43 +01001805 timeout = mdev->tconn->net_conf->ping_timeo*HZ/10;
1806 timeout = schedule_timeout(timeout);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001807 spin_lock(&mdev->peer_seq_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001808 if (!timeout) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001809 ret = -ETIMEDOUT;
Andreas Gruenbacher71b1c1e2011-03-01 15:40:43 +01001810 dev_err(DEV, "Timed out waiting for missing ack packets; disconnecting\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001811 break;
1812 }
1813 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001814 spin_unlock(&mdev->peer_seq_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001815 finish_wait(&mdev->seq_wait, &wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001816 return ret;
1817}
1818
Lars Ellenberg688593c2010-11-17 22:25:03 +01001819/* see also bio_flags_to_wire()
1820 * DRBD_REQ_*, because we need to semantically map the flags to data packet
1821 * flags and back. We may replicate to other kernel versions. */
1822static unsigned long wire_flags_to_bio(struct drbd_conf *mdev, u32 dpf)
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02001823{
Lars Ellenberg688593c2010-11-17 22:25:03 +01001824 return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
1825 (dpf & DP_FUA ? REQ_FUA : 0) |
1826 (dpf & DP_FLUSH ? REQ_FLUSH : 0) |
1827 (dpf & DP_DISCARD ? REQ_DISCARD : 0);
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02001828}
1829
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001830static void fail_postponed_requests(struct drbd_conf *mdev, sector_t sector,
1831 unsigned int size)
1832{
1833 struct drbd_interval *i;
1834
1835 repeat:
1836 drbd_for_each_overlap(i, &mdev->write_requests, sector, size) {
1837 struct drbd_request *req;
1838 struct bio_and_error m;
1839
1840 if (!i->local)
1841 continue;
1842 req = container_of(i, struct drbd_request, i);
1843 if (!(req->rq_state & RQ_POSTPONED))
1844 continue;
1845 req->rq_state &= ~RQ_POSTPONED;
1846 __req_mod(req, NEG_ACKED, &m);
1847 spin_unlock_irq(&mdev->tconn->req_lock);
1848 if (m.bio)
1849 complete_master_bio(mdev, &m);
1850 spin_lock_irq(&mdev->tconn->req_lock);
1851 goto repeat;
1852 }
1853}
1854
1855static int handle_write_conflicts(struct drbd_conf *mdev,
1856 struct drbd_peer_request *peer_req)
1857{
1858 struct drbd_tconn *tconn = mdev->tconn;
1859 bool resolve_conflicts = test_bit(DISCARD_CONCURRENT, &tconn->flags);
1860 sector_t sector = peer_req->i.sector;
1861 const unsigned int size = peer_req->i.size;
1862 struct drbd_interval *i;
1863 bool equal;
1864 int err;
1865
1866 /*
1867 * Inserting the peer request into the write_requests tree will prevent
1868 * new conflicting local requests from being added.
1869 */
1870 drbd_insert_interval(&mdev->write_requests, &peer_req->i);
1871
1872 repeat:
1873 drbd_for_each_overlap(i, &mdev->write_requests, sector, size) {
1874 if (i == &peer_req->i)
1875 continue;
1876
1877 if (!i->local) {
1878 /*
1879 * Our peer has sent a conflicting remote request; this
1880 * should not happen in a two-node setup. Wait for the
1881 * earlier peer request to complete.
1882 */
1883 err = drbd_wait_misc(mdev, i);
1884 if (err)
1885 goto out;
1886 goto repeat;
1887 }
1888
1889 equal = i->sector == sector && i->size == size;
1890 if (resolve_conflicts) {
1891 /*
1892 * If the peer request is fully contained within the
1893 * overlapping request, it can be discarded; otherwise,
1894 * it will be retried once all overlapping requests
1895 * have completed.
1896 */
1897 bool discard = i->sector <= sector && i->sector +
1898 (i->size >> 9) >= sector + (size >> 9);
1899
1900 if (!equal)
1901 dev_alert(DEV, "Concurrent writes detected: "
1902 "local=%llus +%u, remote=%llus +%u, "
1903 "assuming %s came first\n",
1904 (unsigned long long)i->sector, i->size,
1905 (unsigned long long)sector, size,
1906 discard ? "local" : "remote");
1907
1908 inc_unacked(mdev);
1909 peer_req->w.cb = discard ? e_send_discard_write :
1910 e_send_retry_write;
1911 list_add_tail(&peer_req->w.list, &mdev->done_ee);
1912 wake_asender(mdev->tconn);
1913
1914 err = -ENOENT;
1915 goto out;
1916 } else {
1917 struct drbd_request *req =
1918 container_of(i, struct drbd_request, i);
1919
1920 if (!equal)
1921 dev_alert(DEV, "Concurrent writes detected: "
1922 "local=%llus +%u, remote=%llus +%u\n",
1923 (unsigned long long)i->sector, i->size,
1924 (unsigned long long)sector, size);
1925
1926 if (req->rq_state & RQ_LOCAL_PENDING ||
1927 !(req->rq_state & RQ_POSTPONED)) {
1928 /*
1929 * Wait for the node with the discard flag to
1930 * decide if this request will be discarded or
1931 * retried. Requests that are discarded will
1932 * disappear from the write_requests tree.
1933 *
1934 * In addition, wait for the conflicting
1935 * request to finish locally before submitting
1936 * the conflicting peer request.
1937 */
1938 err = drbd_wait_misc(mdev, &req->i);
1939 if (err) {
1940 _conn_request_state(mdev->tconn,
1941 NS(conn, C_TIMEOUT),
1942 CS_HARD);
1943 fail_postponed_requests(mdev, sector, size);
1944 goto out;
1945 }
1946 goto repeat;
1947 }
1948 /*
1949 * Remember to restart the conflicting requests after
1950 * the new peer request has completed.
1951 */
1952 peer_req->flags |= EE_RESTART_REQUESTS;
1953 }
1954 }
1955 err = 0;
1956
1957 out:
1958 if (err)
1959 drbd_remove_epoch_entry_interval(mdev, peer_req);
1960 return err;
1961}
1962
Philipp Reisnerb411b362009-09-25 16:07:19 -07001963/* mirrored write */
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001964static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd,
1965 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001966{
1967 sector_t sector;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001968 struct drbd_peer_request *peer_req;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001969 struct p_data *p = &mdev->tconn->data.rbuf.data;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001970 u32 peer_seq = be32_to_cpu(p->seq_num);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001971 int rw = WRITE;
1972 u32 dp_flags;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001973 int err;
1974
Philipp Reisnerb411b362009-09-25 16:07:19 -07001975
Philipp Reisnerb411b362009-09-25 16:07:19 -07001976 if (!get_ldev(mdev)) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001977 err = wait_for_and_update_peer_seq(mdev, peer_seq);
Lars Ellenberg2b2bf212010-10-06 11:46:55 +02001978 drbd_send_ack_dp(mdev, P_NEG_ACK, p, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001979 atomic_inc(&mdev->current_epoch->epoch_size);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001980 return drbd_drain_block(mdev, data_size) && err == 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001981 }
1982
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001983 /*
1984 * Corresponding put_ldev done either below (on various errors), or in
1985 * drbd_peer_request_endio, if we successfully submit the data at the
1986 * end of this function.
1987 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001988
1989 sector = be64_to_cpu(p->sector);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001990 peer_req = read_in_block(mdev, p->block_id, sector, data_size);
1991 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001992 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001993 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001994 }
1995
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001996 peer_req->w.cb = e_end_block;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001997
Lars Ellenberg688593c2010-11-17 22:25:03 +01001998 dp_flags = be32_to_cpu(p->dp_flags);
1999 rw |= wire_flags_to_bio(mdev, dp_flags);
2000
2001 if (dp_flags & DP_MAY_SET_IN_SYNC)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002002 peer_req->flags |= EE_MAY_SET_IN_SYNC;
Lars Ellenberg688593c2010-11-17 22:25:03 +01002003
Philipp Reisnerb411b362009-09-25 16:07:19 -07002004 spin_lock(&mdev->epoch_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002005 peer_req->epoch = mdev->current_epoch;
2006 atomic_inc(&peer_req->epoch->epoch_size);
2007 atomic_inc(&peer_req->epoch->active);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002008 spin_unlock(&mdev->epoch_lock);
2009
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002010 if (mdev->tconn->net_conf->two_primaries) {
2011 err = wait_for_and_update_peer_seq(mdev, peer_seq);
2012 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002013 goto out_interrupted;
Philipp Reisner87eeee42011-01-19 14:16:30 +01002014 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002015 err = handle_write_conflicts(mdev, peer_req);
2016 if (err) {
2017 spin_unlock_irq(&mdev->tconn->req_lock);
2018 if (err == -ENOENT) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002019 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002020 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002021 }
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002022 goto out_interrupted;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002023 }
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002024 } else
2025 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002026 list_add(&peer_req->w.list, &mdev->active_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002027 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002028
Philipp Reisner89e58e72011-01-19 13:12:45 +01002029 switch (mdev->tconn->net_conf->wire_protocol) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002030 case DRBD_PROT_C:
2031 inc_unacked(mdev);
2032 /* corresponding dec_unacked() in e_end_block()
2033 * respective _drbd_clear_done_ee */
2034 break;
2035 case DRBD_PROT_B:
2036 /* I really don't like it that the receiver thread
2037 * sends on the msock, but anyways */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002038 drbd_send_ack(mdev, P_RECV_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002039 break;
2040 case DRBD_PROT_A:
2041 /* nothing to do */
2042 break;
2043 }
2044
Lars Ellenberg6719fb02010-10-18 23:04:07 +02002045 if (mdev->state.pdsk < D_INCONSISTENT) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002046 /* In case we have the only disk of the cluster, */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002047 drbd_set_out_of_sync(mdev, peer_req->i.sector, peer_req->i.size);
2048 peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
2049 peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
2050 drbd_al_begin_io(mdev, peer_req->i.sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002051 }
2052
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01002053 if (drbd_submit_peer_request(mdev, peer_req, rw, DRBD_FAULT_DT_WR) == 0)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002054 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002055
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002056 /* don't care for the reason here */
2057 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01002058 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002059 list_del(&peer_req->w.list);
2060 drbd_remove_epoch_entry_interval(mdev, peer_req);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002061 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002062 if (peer_req->flags & EE_CALL_AL_COMPLETE_IO)
2063 drbd_al_complete_io(mdev, peer_req->i.sector);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002064
Philipp Reisnerb411b362009-09-25 16:07:19 -07002065out_interrupted:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002066 drbd_may_finish_epoch(mdev, peer_req->epoch, EV_PUT + EV_CLEANUP);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002067 put_ldev(mdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002068 drbd_free_ee(mdev, peer_req);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002069 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002070}
2071
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002072/* We may throttle resync, if the lower device seems to be busy,
2073 * and current sync rate is above c_min_rate.
2074 *
2075 * To decide whether or not the lower device is busy, we use a scheme similar
2076 * to MD RAID is_mddev_idle(): if the partition stats reveal "significant"
2077 * (more than 64 sectors) of activity we cannot account for with our own resync
2078 * activity, it obviously is "busy".
2079 *
2080 * The current sync rate used here uses only the most recent two step marks,
2081 * to have a short time average so we can react faster.
2082 */
Philipp Reisnere3555d82010-11-07 15:56:29 +01002083int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002084{
2085 struct gendisk *disk = mdev->ldev->backing_bdev->bd_contains->bd_disk;
2086 unsigned long db, dt, dbdt;
Philipp Reisnere3555d82010-11-07 15:56:29 +01002087 struct lc_element *tmp;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002088 int curr_events;
2089 int throttle = 0;
2090
2091 /* feature disabled? */
Lars Ellenbergf3990022011-03-23 14:31:09 +01002092 if (mdev->ldev->dc.c_min_rate == 0)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002093 return 0;
2094
Philipp Reisnere3555d82010-11-07 15:56:29 +01002095 spin_lock_irq(&mdev->al_lock);
2096 tmp = lc_find(mdev->resync, BM_SECT_TO_EXT(sector));
2097 if (tmp) {
2098 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
2099 if (test_bit(BME_PRIORITY, &bm_ext->flags)) {
2100 spin_unlock_irq(&mdev->al_lock);
2101 return 0;
2102 }
2103 /* Do not slow down if app IO is already waiting for this extent */
2104 }
2105 spin_unlock_irq(&mdev->al_lock);
2106
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002107 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
2108 (int)part_stat_read(&disk->part0, sectors[1]) -
2109 atomic_read(&mdev->rs_sect_ev);
Philipp Reisnere3555d82010-11-07 15:56:29 +01002110
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002111 if (!mdev->rs_last_events || curr_events - mdev->rs_last_events > 64) {
2112 unsigned long rs_left;
2113 int i;
2114
2115 mdev->rs_last_events = curr_events;
2116
2117 /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
2118 * approx. */
Lars Ellenberg2649f082010-11-05 10:05:47 +01002119 i = (mdev->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
2120
2121 if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T)
2122 rs_left = mdev->ov_left;
2123 else
2124 rs_left = drbd_bm_total_weight(mdev) - mdev->rs_failed;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002125
2126 dt = ((long)jiffies - (long)mdev->rs_mark_time[i]) / HZ;
2127 if (!dt)
2128 dt++;
2129 db = mdev->rs_mark_left[i] - rs_left;
2130 dbdt = Bit2KB(db/dt);
2131
Lars Ellenbergf3990022011-03-23 14:31:09 +01002132 if (dbdt > mdev->ldev->dc.c_min_rate)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002133 throttle = 1;
2134 }
2135 return throttle;
2136}
2137
2138
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01002139static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packet cmd,
2140 unsigned int digest_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002141{
2142 sector_t sector;
2143 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002144 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002145 struct digest_info *di = NULL;
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002146 int size, verb;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002147 unsigned int fault_type;
Philipp Reisnere42325a2011-01-19 13:55:45 +01002148 struct p_block_req *p = &mdev->tconn->data.rbuf.block_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002149
2150 sector = be64_to_cpu(p->sector);
2151 size = be32_to_cpu(p->blksize);
2152
Andreas Gruenbacherc670a392011-02-21 12:41:39 +01002153 if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002154 dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2155 (unsigned long long)sector, size);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002156 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002157 }
2158 if (sector + (size>>9) > capacity) {
2159 dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2160 (unsigned long long)sector, size);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002161 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002162 }
2163
2164 if (!get_ldev_if_state(mdev, D_UP_TO_DATE)) {
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002165 verb = 1;
2166 switch (cmd) {
2167 case P_DATA_REQUEST:
2168 drbd_send_ack_rp(mdev, P_NEG_DREPLY, p);
2169 break;
2170 case P_RS_DATA_REQUEST:
2171 case P_CSUM_RS_REQUEST:
2172 case P_OV_REQUEST:
2173 drbd_send_ack_rp(mdev, P_NEG_RS_DREPLY , p);
2174 break;
2175 case P_OV_REPLY:
2176 verb = 0;
2177 dec_rs_pending(mdev);
2178 drbd_send_ack_ex(mdev, P_OV_RESULT, sector, size, ID_IN_SYNC);
2179 break;
2180 default:
2181 dev_err(DEV, "unexpected command (%s) in receive_DataRequest\n",
2182 cmdname(cmd));
2183 }
2184 if (verb && __ratelimit(&drbd_ratelimit_state))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002185 dev_err(DEV, "Can not satisfy peer's read request, "
2186 "no local data.\n");
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002187
Lars Ellenberga821cc42010-09-06 12:31:37 +02002188 /* drain possibly payload */
2189 return drbd_drain_block(mdev, digest_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002190 }
2191
2192 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
2193 * "criss-cross" setup, that might cause write-out on some other DRBD,
2194 * which in turn might block on the other node at this very place. */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002195 peer_req = drbd_alloc_ee(mdev, p->block_id, sector, size, GFP_NOIO);
2196 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002197 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002198 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002199 }
2200
Philipp Reisner02918be2010-08-20 14:35:10 +02002201 switch (cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002202 case P_DATA_REQUEST:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002203 peer_req->w.cb = w_e_end_data_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002204 fault_type = DRBD_FAULT_DT_RD;
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002205 /* application IO, don't drbd_rs_begin_io */
2206 goto submit;
2207
Philipp Reisnerb411b362009-09-25 16:07:19 -07002208 case P_RS_DATA_REQUEST:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002209 peer_req->w.cb = w_e_end_rsdata_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002210 fault_type = DRBD_FAULT_RS_RD;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002211 /* used in the sector offset progress display */
2212 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002213 break;
2214
2215 case P_OV_REPLY:
2216 case P_CSUM_RS_REQUEST:
2217 fault_type = DRBD_FAULT_RS_RD;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002218 di = kmalloc(sizeof(*di) + digest_size, GFP_NOIO);
2219 if (!di)
2220 goto out_free_e;
2221
2222 di->digest_size = digest_size;
2223 di->digest = (((char *)di)+sizeof(struct digest_info));
2224
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002225 peer_req->digest = di;
2226 peer_req->flags |= EE_HAS_DIGEST;
Lars Ellenbergc36c3ce2010-08-11 20:42:55 +02002227
Philipp Reisnerde0ff332011-02-07 16:56:20 +01002228 if (drbd_recv(mdev->tconn, di->digest, digest_size) != digest_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002229 goto out_free_e;
2230
Philipp Reisner02918be2010-08-20 14:35:10 +02002231 if (cmd == P_CSUM_RS_REQUEST) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002232 D_ASSERT(mdev->tconn->agreed_pro_version >= 89);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002233 peer_req->w.cb = w_e_end_csum_rs_req;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002234 /* used in the sector offset progress display */
2235 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
Philipp Reisner02918be2010-08-20 14:35:10 +02002236 } else if (cmd == P_OV_REPLY) {
Lars Ellenberg2649f082010-11-05 10:05:47 +01002237 /* track progress, we may need to throttle */
2238 atomic_add(size >> 9, &mdev->rs_sect_in);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002239 peer_req->w.cb = w_e_end_ov_reply;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002240 dec_rs_pending(mdev);
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002241 /* drbd_rs_begin_io done when we sent this request,
2242 * but accounting still needs to be done. */
2243 goto submit_for_resync;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002244 }
2245 break;
2246
2247 case P_OV_REQUEST:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002248 if (mdev->ov_start_sector == ~(sector_t)0 &&
Philipp Reisner31890f42011-01-19 14:12:51 +01002249 mdev->tconn->agreed_pro_version >= 90) {
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002250 unsigned long now = jiffies;
2251 int i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002252 mdev->ov_start_sector = sector;
2253 mdev->ov_position = sector;
Lars Ellenberg30b743a2010-11-05 09:39:06 +01002254 mdev->ov_left = drbd_bm_bits(mdev) - BM_SECT_TO_BIT(sector);
2255 mdev->rs_total = mdev->ov_left;
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002256 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
2257 mdev->rs_mark_left[i] = mdev->ov_left;
2258 mdev->rs_mark_time[i] = now;
2259 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002260 dev_info(DEV, "Online Verify start sector: %llu\n",
2261 (unsigned long long)sector);
2262 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002263 peer_req->w.cb = w_e_end_ov_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002264 fault_type = DRBD_FAULT_RS_RD;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002265 break;
2266
Philipp Reisnerb411b362009-09-25 16:07:19 -07002267 default:
2268 dev_err(DEV, "unexpected command (%s) in receive_DataRequest\n",
Philipp Reisner02918be2010-08-20 14:35:10 +02002269 cmdname(cmd));
Philipp Reisnerb411b362009-09-25 16:07:19 -07002270 fault_type = DRBD_FAULT_MAX;
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002271 goto out_free_e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002272 }
2273
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002274 /* Throttle, drbd_rs_begin_io and submit should become asynchronous
2275 * wrt the receiver, but it is not as straightforward as it may seem.
2276 * Various places in the resync start and stop logic assume resync
2277 * requests are processed in order, requeuing this on the worker thread
2278 * introduces a bunch of new code for synchronization between threads.
2279 *
2280 * Unlimited throttling before drbd_rs_begin_io may stall the resync
2281 * "forever", throttling after drbd_rs_begin_io will lock that extent
2282 * for application writes for the same time. For now, just throttle
2283 * here, where the rest of the code expects the receiver to sleep for
2284 * a while, anyways.
2285 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002286
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002287 /* Throttle before drbd_rs_begin_io, as that locks out application IO;
2288 * this defers syncer requests for some time, before letting at least
2289 * on request through. The resync controller on the receiving side
2290 * will adapt to the incoming rate accordingly.
2291 *
2292 * We cannot throttle here if remote is Primary/SyncTarget:
2293 * we would also throttle its application reads.
2294 * In that case, throttling is done on the SyncTarget only.
2295 */
Philipp Reisnere3555d82010-11-07 15:56:29 +01002296 if (mdev->state.peer != R_PRIMARY && drbd_rs_should_slow_down(mdev, sector))
2297 schedule_timeout_uninterruptible(HZ/10);
2298 if (drbd_rs_begin_io(mdev, sector))
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002299 goto out_free_e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002300
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002301submit_for_resync:
2302 atomic_add(size >> 9, &mdev->rs_sect_ev);
2303
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002304submit:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002305 inc_unacked(mdev);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002306 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002307 list_add_tail(&peer_req->w.list, &mdev->read_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002308 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002309
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01002310 if (drbd_submit_peer_request(mdev, peer_req, READ, fault_type) == 0)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002311 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002312
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002313 /* don't care for the reason here */
2314 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01002315 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002316 list_del(&peer_req->w.list);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002317 spin_unlock_irq(&mdev->tconn->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002318 /* no drbd_rs_complete_io(), we are dropping the connection anyways */
2319
Philipp Reisnerb411b362009-09-25 16:07:19 -07002320out_free_e:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002321 put_ldev(mdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002322 drbd_free_ee(mdev, peer_req);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002323 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002324}
2325
2326static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local)
2327{
2328 int self, peer, rv = -100;
2329 unsigned long ch_self, ch_peer;
2330
2331 self = mdev->ldev->md.uuid[UI_BITMAP] & 1;
2332 peer = mdev->p_uuid[UI_BITMAP] & 1;
2333
2334 ch_peer = mdev->p_uuid[UI_SIZE];
2335 ch_self = mdev->comm_bm_set;
2336
Philipp Reisner89e58e72011-01-19 13:12:45 +01002337 switch (mdev->tconn->net_conf->after_sb_0p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002338 case ASB_CONSENSUS:
2339 case ASB_DISCARD_SECONDARY:
2340 case ASB_CALL_HELPER:
2341 dev_err(DEV, "Configuration error.\n");
2342 break;
2343 case ASB_DISCONNECT:
2344 break;
2345 case ASB_DISCARD_YOUNGER_PRI:
2346 if (self == 0 && peer == 1) {
2347 rv = -1;
2348 break;
2349 }
2350 if (self == 1 && peer == 0) {
2351 rv = 1;
2352 break;
2353 }
2354 /* Else fall through to one of the other strategies... */
2355 case ASB_DISCARD_OLDER_PRI:
2356 if (self == 0 && peer == 1) {
2357 rv = 1;
2358 break;
2359 }
2360 if (self == 1 && peer == 0) {
2361 rv = -1;
2362 break;
2363 }
2364 /* Else fall through to one of the other strategies... */
Lars Ellenbergad19bf62009-10-14 09:36:49 +02002365 dev_warn(DEV, "Discard younger/older primary did not find a decision\n"
Philipp Reisnerb411b362009-09-25 16:07:19 -07002366 "Using discard-least-changes instead\n");
2367 case ASB_DISCARD_ZERO_CHG:
2368 if (ch_peer == 0 && ch_self == 0) {
Philipp Reisner25703f82011-02-07 14:35:25 +01002369 rv = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002370 ? -1 : 1;
2371 break;
2372 } else {
2373 if (ch_peer == 0) { rv = 1; break; }
2374 if (ch_self == 0) { rv = -1; break; }
2375 }
Philipp Reisner89e58e72011-01-19 13:12:45 +01002376 if (mdev->tconn->net_conf->after_sb_0p == ASB_DISCARD_ZERO_CHG)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002377 break;
2378 case ASB_DISCARD_LEAST_CHG:
2379 if (ch_self < ch_peer)
2380 rv = -1;
2381 else if (ch_self > ch_peer)
2382 rv = 1;
2383 else /* ( ch_self == ch_peer ) */
2384 /* Well, then use something else. */
Philipp Reisner25703f82011-02-07 14:35:25 +01002385 rv = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002386 ? -1 : 1;
2387 break;
2388 case ASB_DISCARD_LOCAL:
2389 rv = -1;
2390 break;
2391 case ASB_DISCARD_REMOTE:
2392 rv = 1;
2393 }
2394
2395 return rv;
2396}
2397
2398static int drbd_asb_recover_1p(struct drbd_conf *mdev) __must_hold(local)
2399{
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002400 int hg, rv = -100;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002401
Philipp Reisner89e58e72011-01-19 13:12:45 +01002402 switch (mdev->tconn->net_conf->after_sb_1p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002403 case ASB_DISCARD_YOUNGER_PRI:
2404 case ASB_DISCARD_OLDER_PRI:
2405 case ASB_DISCARD_LEAST_CHG:
2406 case ASB_DISCARD_LOCAL:
2407 case ASB_DISCARD_REMOTE:
2408 dev_err(DEV, "Configuration error.\n");
2409 break;
2410 case ASB_DISCONNECT:
2411 break;
2412 case ASB_CONSENSUS:
2413 hg = drbd_asb_recover_0p(mdev);
2414 if (hg == -1 && mdev->state.role == R_SECONDARY)
2415 rv = hg;
2416 if (hg == 1 && mdev->state.role == R_PRIMARY)
2417 rv = hg;
2418 break;
2419 case ASB_VIOLENTLY:
2420 rv = drbd_asb_recover_0p(mdev);
2421 break;
2422 case ASB_DISCARD_SECONDARY:
2423 return mdev->state.role == R_PRIMARY ? 1 : -1;
2424 case ASB_CALL_HELPER:
2425 hg = drbd_asb_recover_0p(mdev);
2426 if (hg == -1 && mdev->state.role == R_PRIMARY) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002427 enum drbd_state_rv rv2;
2428
2429 drbd_set_role(mdev, R_SECONDARY, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002430 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2431 * we might be here in C_WF_REPORT_PARAMS which is transient.
2432 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002433 rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
2434 if (rv2 != SS_SUCCESS) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002435 drbd_khelper(mdev, "pri-lost-after-sb");
2436 } else {
2437 dev_warn(DEV, "Successfully gave up primary role.\n");
2438 rv = hg;
2439 }
2440 } else
2441 rv = hg;
2442 }
2443
2444 return rv;
2445}
2446
2447static int drbd_asb_recover_2p(struct drbd_conf *mdev) __must_hold(local)
2448{
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002449 int hg, rv = -100;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002450
Philipp Reisner89e58e72011-01-19 13:12:45 +01002451 switch (mdev->tconn->net_conf->after_sb_2p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002452 case ASB_DISCARD_YOUNGER_PRI:
2453 case ASB_DISCARD_OLDER_PRI:
2454 case ASB_DISCARD_LEAST_CHG:
2455 case ASB_DISCARD_LOCAL:
2456 case ASB_DISCARD_REMOTE:
2457 case ASB_CONSENSUS:
2458 case ASB_DISCARD_SECONDARY:
2459 dev_err(DEV, "Configuration error.\n");
2460 break;
2461 case ASB_VIOLENTLY:
2462 rv = drbd_asb_recover_0p(mdev);
2463 break;
2464 case ASB_DISCONNECT:
2465 break;
2466 case ASB_CALL_HELPER:
2467 hg = drbd_asb_recover_0p(mdev);
2468 if (hg == -1) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002469 enum drbd_state_rv rv2;
2470
Philipp Reisnerb411b362009-09-25 16:07:19 -07002471 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2472 * we might be here in C_WF_REPORT_PARAMS which is transient.
2473 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002474 rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
2475 if (rv2 != SS_SUCCESS) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002476 drbd_khelper(mdev, "pri-lost-after-sb");
2477 } else {
2478 dev_warn(DEV, "Successfully gave up primary role.\n");
2479 rv = hg;
2480 }
2481 } else
2482 rv = hg;
2483 }
2484
2485 return rv;
2486}
2487
2488static void drbd_uuid_dump(struct drbd_conf *mdev, char *text, u64 *uuid,
2489 u64 bits, u64 flags)
2490{
2491 if (!uuid) {
2492 dev_info(DEV, "%s uuid info vanished while I was looking!\n", text);
2493 return;
2494 }
2495 dev_info(DEV, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n",
2496 text,
2497 (unsigned long long)uuid[UI_CURRENT],
2498 (unsigned long long)uuid[UI_BITMAP],
2499 (unsigned long long)uuid[UI_HISTORY_START],
2500 (unsigned long long)uuid[UI_HISTORY_END],
2501 (unsigned long long)bits,
2502 (unsigned long long)flags);
2503}
2504
2505/*
2506 100 after split brain try auto recover
2507 2 C_SYNC_SOURCE set BitMap
2508 1 C_SYNC_SOURCE use BitMap
2509 0 no Sync
2510 -1 C_SYNC_TARGET use BitMap
2511 -2 C_SYNC_TARGET set BitMap
2512 -100 after split brain, disconnect
2513-1000 unrelated data
Philipp Reisner4a23f262011-01-11 17:42:17 +01002514-1091 requires proto 91
2515-1096 requires proto 96
Philipp Reisnerb411b362009-09-25 16:07:19 -07002516 */
2517static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(local)
2518{
2519 u64 self, peer;
2520 int i, j;
2521
2522 self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2523 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
2524
2525 *rule_nr = 10;
2526 if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED)
2527 return 0;
2528
2529 *rule_nr = 20;
2530 if ((self == UUID_JUST_CREATED || self == (u64)0) &&
2531 peer != UUID_JUST_CREATED)
2532 return -2;
2533
2534 *rule_nr = 30;
2535 if (self != UUID_JUST_CREATED &&
2536 (peer == UUID_JUST_CREATED || peer == (u64)0))
2537 return 2;
2538
2539 if (self == peer) {
2540 int rct, dc; /* roles at crash time */
2541
2542 if (mdev->p_uuid[UI_BITMAP] == (u64)0 && mdev->ldev->md.uuid[UI_BITMAP] != (u64)0) {
2543
Philipp Reisner31890f42011-01-19 14:12:51 +01002544 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002545 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002546
2547 if ((mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
2548 (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
2549 dev_info(DEV, "was SyncSource, missed the resync finished event, corrected myself:\n");
2550 drbd_uuid_set_bm(mdev, 0UL);
2551
2552 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
2553 mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
2554 *rule_nr = 34;
2555 } else {
2556 dev_info(DEV, "was SyncSource (peer failed to write sync_uuid)\n");
2557 *rule_nr = 36;
2558 }
2559
2560 return 1;
2561 }
2562
2563 if (mdev->ldev->md.uuid[UI_BITMAP] == (u64)0 && mdev->p_uuid[UI_BITMAP] != (u64)0) {
2564
Philipp Reisner31890f42011-01-19 14:12:51 +01002565 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002566 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002567
2568 if ((mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_BITMAP] & ~((u64)1)) &&
2569 (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
2570 dev_info(DEV, "was SyncTarget, peer missed the resync finished event, corrected peer:\n");
2571
2572 mdev->p_uuid[UI_HISTORY_START + 1] = mdev->p_uuid[UI_HISTORY_START];
2573 mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_BITMAP];
2574 mdev->p_uuid[UI_BITMAP] = 0UL;
2575
2576 drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2577 *rule_nr = 35;
2578 } else {
2579 dev_info(DEV, "was SyncTarget (failed to write sync_uuid)\n");
2580 *rule_nr = 37;
2581 }
2582
2583 return -1;
2584 }
2585
2586 /* Common power [off|failure] */
2587 rct = (test_bit(CRASHED_PRIMARY, &mdev->flags) ? 1 : 0) +
2588 (mdev->p_uuid[UI_FLAGS] & 2);
2589 /* lowest bit is set when we were primary,
2590 * next bit (weight 2) is set when peer was primary */
2591 *rule_nr = 40;
2592
2593 switch (rct) {
2594 case 0: /* !self_pri && !peer_pri */ return 0;
2595 case 1: /* self_pri && !peer_pri */ return 1;
2596 case 2: /* !self_pri && peer_pri */ return -1;
2597 case 3: /* self_pri && peer_pri */
Philipp Reisner25703f82011-02-07 14:35:25 +01002598 dc = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002599 return dc ? -1 : 1;
2600 }
2601 }
2602
2603 *rule_nr = 50;
2604 peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1);
2605 if (self == peer)
2606 return -1;
2607
2608 *rule_nr = 51;
2609 peer = mdev->p_uuid[UI_HISTORY_START] & ~((u64)1);
2610 if (self == peer) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002611 if (mdev->tconn->agreed_pro_version < 96 ?
Philipp Reisner4a23f262011-01-11 17:42:17 +01002612 (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
2613 (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
2614 peer + UUID_NEW_BM_OFFSET == (mdev->p_uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002615 /* The last P_SYNC_UUID did not get though. Undo the last start of
2616 resync as sync source modifications of the peer's UUIDs. */
2617
Philipp Reisner31890f42011-01-19 14:12:51 +01002618 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002619 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002620
2621 mdev->p_uuid[UI_BITMAP] = mdev->p_uuid[UI_HISTORY_START];
2622 mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_HISTORY_START + 1];
Philipp Reisner4a23f262011-01-11 17:42:17 +01002623
2624 dev_info(DEV, "Did not got last syncUUID packet, corrected:\n");
2625 drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2626
Philipp Reisnerb411b362009-09-25 16:07:19 -07002627 return -1;
2628 }
2629 }
2630
2631 *rule_nr = 60;
2632 self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2633 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2634 peer = mdev->p_uuid[i] & ~((u64)1);
2635 if (self == peer)
2636 return -2;
2637 }
2638
2639 *rule_nr = 70;
2640 self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2641 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
2642 if (self == peer)
2643 return 1;
2644
2645 *rule_nr = 71;
2646 self = mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
2647 if (self == peer) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002648 if (mdev->tconn->agreed_pro_version < 96 ?
Philipp Reisner4a23f262011-01-11 17:42:17 +01002649 (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
2650 (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
2651 self + UUID_NEW_BM_OFFSET == (mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002652 /* The last P_SYNC_UUID did not get though. Undo the last start of
2653 resync as sync source modifications of our UUIDs. */
2654
Philipp Reisner31890f42011-01-19 14:12:51 +01002655 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002656 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002657
2658 _drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_HISTORY_START]);
2659 _drbd_uuid_set(mdev, UI_HISTORY_START, mdev->ldev->md.uuid[UI_HISTORY_START + 1]);
2660
Philipp Reisner4a23f262011-01-11 17:42:17 +01002661 dev_info(DEV, "Last syncUUID did not get through, corrected:\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002662 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
2663 mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
2664
2665 return 1;
2666 }
2667 }
2668
2669
2670 *rule_nr = 80;
Philipp Reisnerd8c2a362009-11-18 15:52:51 +01002671 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002672 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2673 self = mdev->ldev->md.uuid[i] & ~((u64)1);
2674 if (self == peer)
2675 return 2;
2676 }
2677
2678 *rule_nr = 90;
2679 self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2680 peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1);
2681 if (self == peer && self != ((u64)0))
2682 return 100;
2683
2684 *rule_nr = 100;
2685 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2686 self = mdev->ldev->md.uuid[i] & ~((u64)1);
2687 for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) {
2688 peer = mdev->p_uuid[j] & ~((u64)1);
2689 if (self == peer)
2690 return -100;
2691 }
2692 }
2693
2694 return -1000;
2695}
2696
2697/* drbd_sync_handshake() returns the new conn state on success, or
2698 CONN_MASK (-1) on failure.
2699 */
2700static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_role peer_role,
2701 enum drbd_disk_state peer_disk) __must_hold(local)
2702{
2703 int hg, rule_nr;
2704 enum drbd_conns rv = C_MASK;
2705 enum drbd_disk_state mydisk;
2706
2707 mydisk = mdev->state.disk;
2708 if (mydisk == D_NEGOTIATING)
2709 mydisk = mdev->new_state_tmp.disk;
2710
2711 dev_info(DEV, "drbd_sync_handshake:\n");
2712 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid, mdev->comm_bm_set, 0);
2713 drbd_uuid_dump(mdev, "peer", mdev->p_uuid,
2714 mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2715
2716 hg = drbd_uuid_compare(mdev, &rule_nr);
2717
2718 dev_info(DEV, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
2719
2720 if (hg == -1000) {
2721 dev_alert(DEV, "Unrelated data, aborting!\n");
2722 return C_MASK;
2723 }
Philipp Reisner4a23f262011-01-11 17:42:17 +01002724 if (hg < -1000) {
2725 dev_alert(DEV, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002726 return C_MASK;
2727 }
2728
2729 if ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) ||
2730 (peer_disk == D_INCONSISTENT && mydisk > D_INCONSISTENT)) {
2731 int f = (hg == -100) || abs(hg) == 2;
2732 hg = mydisk > D_INCONSISTENT ? 1 : -1;
2733 if (f)
2734 hg = hg*2;
2735 dev_info(DEV, "Becoming sync %s due to disk states.\n",
2736 hg > 0 ? "source" : "target");
2737 }
2738
Adam Gandelman3a11a482010-04-08 16:48:23 -07002739 if (abs(hg) == 100)
2740 drbd_khelper(mdev, "initial-split-brain");
2741
Philipp Reisner89e58e72011-01-19 13:12:45 +01002742 if (hg == 100 || (hg == -100 && mdev->tconn->net_conf->always_asbp)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002743 int pcount = (mdev->state.role == R_PRIMARY)
2744 + (peer_role == R_PRIMARY);
2745 int forced = (hg == -100);
2746
2747 switch (pcount) {
2748 case 0:
2749 hg = drbd_asb_recover_0p(mdev);
2750 break;
2751 case 1:
2752 hg = drbd_asb_recover_1p(mdev);
2753 break;
2754 case 2:
2755 hg = drbd_asb_recover_2p(mdev);
2756 break;
2757 }
2758 if (abs(hg) < 100) {
2759 dev_warn(DEV, "Split-Brain detected, %d primaries, "
2760 "automatically solved. Sync from %s node\n",
2761 pcount, (hg < 0) ? "peer" : "this");
2762 if (forced) {
2763 dev_warn(DEV, "Doing a full sync, since"
2764 " UUIDs where ambiguous.\n");
2765 hg = hg*2;
2766 }
2767 }
2768 }
2769
2770 if (hg == -100) {
Philipp Reisner89e58e72011-01-19 13:12:45 +01002771 if (mdev->tconn->net_conf->want_lose && !(mdev->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002772 hg = -1;
Philipp Reisner89e58e72011-01-19 13:12:45 +01002773 if (!mdev->tconn->net_conf->want_lose && (mdev->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002774 hg = 1;
2775
2776 if (abs(hg) < 100)
2777 dev_warn(DEV, "Split-Brain detected, manually solved. "
2778 "Sync from %s node\n",
2779 (hg < 0) ? "peer" : "this");
2780 }
2781
2782 if (hg == -100) {
Lars Ellenberg580b9762010-02-26 23:15:23 +01002783 /* FIXME this log message is not correct if we end up here
2784 * after an attempted attach on a diskless node.
2785 * We just refuse to attach -- well, we drop the "connection"
2786 * to that disk, in a way... */
Adam Gandelman3a11a482010-04-08 16:48:23 -07002787 dev_alert(DEV, "Split-Brain detected but unresolved, dropping connection!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002788 drbd_khelper(mdev, "split-brain");
2789 return C_MASK;
2790 }
2791
2792 if (hg > 0 && mydisk <= D_INCONSISTENT) {
2793 dev_err(DEV, "I shall become SyncSource, but I am inconsistent!\n");
2794 return C_MASK;
2795 }
2796
2797 if (hg < 0 && /* by intention we do not use mydisk here. */
2798 mdev->state.role == R_PRIMARY && mdev->state.disk >= D_CONSISTENT) {
Philipp Reisner89e58e72011-01-19 13:12:45 +01002799 switch (mdev->tconn->net_conf->rr_conflict) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002800 case ASB_CALL_HELPER:
2801 drbd_khelper(mdev, "pri-lost");
2802 /* fall through */
2803 case ASB_DISCONNECT:
2804 dev_err(DEV, "I shall become SyncTarget, but I am primary!\n");
2805 return C_MASK;
2806 case ASB_VIOLENTLY:
2807 dev_warn(DEV, "Becoming SyncTarget, violating the stable-data"
2808 "assumption\n");
2809 }
2810 }
2811
Philipp Reisner8169e412011-03-15 18:40:27 +01002812 if (mdev->tconn->net_conf->dry_run || test_bit(CONN_DRY_RUN, &mdev->tconn->flags)) {
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002813 if (hg == 0)
2814 dev_info(DEV, "dry-run connect: No resync, would become Connected immediately.\n");
2815 else
2816 dev_info(DEV, "dry-run connect: Would become %s, doing a %s resync.",
2817 drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET),
2818 abs(hg) >= 2 ? "full" : "bit-map based");
2819 return C_MASK;
2820 }
2821
Philipp Reisnerb411b362009-09-25 16:07:19 -07002822 if (abs(hg) >= 2) {
2823 dev_info(DEV, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01002824 if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
2825 BM_LOCKED_SET_ALLOWED))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002826 return C_MASK;
2827 }
2828
2829 if (hg > 0) { /* become sync source. */
2830 rv = C_WF_BITMAP_S;
2831 } else if (hg < 0) { /* become sync target */
2832 rv = C_WF_BITMAP_T;
2833 } else {
2834 rv = C_CONNECTED;
2835 if (drbd_bm_total_weight(mdev)) {
2836 dev_info(DEV, "No resync, but %lu bits in bitmap!\n",
2837 drbd_bm_total_weight(mdev));
2838 }
2839 }
2840
2841 return rv;
2842}
2843
2844/* returns 1 if invalid */
2845static int cmp_after_sb(enum drbd_after_sb_p peer, enum drbd_after_sb_p self)
2846{
2847 /* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */
2848 if ((peer == ASB_DISCARD_REMOTE && self == ASB_DISCARD_LOCAL) ||
2849 (self == ASB_DISCARD_REMOTE && peer == ASB_DISCARD_LOCAL))
2850 return 0;
2851
2852 /* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */
2853 if (peer == ASB_DISCARD_REMOTE || peer == ASB_DISCARD_LOCAL ||
2854 self == ASB_DISCARD_REMOTE || self == ASB_DISCARD_LOCAL)
2855 return 1;
2856
2857 /* everything else is valid if they are equal on both sides. */
2858 if (peer == self)
2859 return 0;
2860
2861 /* everything es is invalid. */
2862 return 1;
2863}
2864
Philipp Reisner72046242011-03-15 18:51:47 +01002865static int receive_protocol(struct drbd_tconn *tconn, enum drbd_packet cmd,
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01002866 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002867{
Philipp Reisner72046242011-03-15 18:51:47 +01002868 struct p_protocol *p = &tconn->data.rbuf.protocol;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002869 int p_proto, p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002870 int p_want_lose, p_two_primaries, cf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002871 char p_integrity_alg[SHARED_SECRET_MAX] = "";
2872
Philipp Reisnerb411b362009-09-25 16:07:19 -07002873 p_proto = be32_to_cpu(p->protocol);
2874 p_after_sb_0p = be32_to_cpu(p->after_sb_0p);
2875 p_after_sb_1p = be32_to_cpu(p->after_sb_1p);
2876 p_after_sb_2p = be32_to_cpu(p->after_sb_2p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002877 p_two_primaries = be32_to_cpu(p->two_primaries);
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002878 cf = be32_to_cpu(p->conn_flags);
2879 p_want_lose = cf & CF_WANT_LOSE;
2880
Philipp Reisner72046242011-03-15 18:51:47 +01002881 clear_bit(CONN_DRY_RUN, &tconn->flags);
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002882
2883 if (cf & CF_DRY_RUN)
Philipp Reisner72046242011-03-15 18:51:47 +01002884 set_bit(CONN_DRY_RUN, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002885
Philipp Reisner72046242011-03-15 18:51:47 +01002886 if (p_proto != tconn->net_conf->wire_protocol) {
2887 conn_err(tconn, "incompatible communication protocols\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002888 goto disconnect;
2889 }
2890
Philipp Reisner72046242011-03-15 18:51:47 +01002891 if (cmp_after_sb(p_after_sb_0p, tconn->net_conf->after_sb_0p)) {
2892 conn_err(tconn, "incompatible after-sb-0pri settings\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002893 goto disconnect;
2894 }
2895
Philipp Reisner72046242011-03-15 18:51:47 +01002896 if (cmp_after_sb(p_after_sb_1p, tconn->net_conf->after_sb_1p)) {
2897 conn_err(tconn, "incompatible after-sb-1pri settings\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002898 goto disconnect;
2899 }
2900
Philipp Reisner72046242011-03-15 18:51:47 +01002901 if (cmp_after_sb(p_after_sb_2p, tconn->net_conf->after_sb_2p)) {
2902 conn_err(tconn, "incompatible after-sb-2pri settings\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002903 goto disconnect;
2904 }
2905
Philipp Reisner72046242011-03-15 18:51:47 +01002906 if (p_want_lose && tconn->net_conf->want_lose) {
2907 conn_err(tconn, "both sides have the 'want_lose' flag set\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002908 goto disconnect;
2909 }
2910
Philipp Reisner72046242011-03-15 18:51:47 +01002911 if (p_two_primaries != tconn->net_conf->two_primaries) {
2912 conn_err(tconn, "incompatible setting of the two-primaries options\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002913 goto disconnect;
2914 }
2915
Philipp Reisner72046242011-03-15 18:51:47 +01002916 if (tconn->agreed_pro_version >= 87) {
2917 unsigned char *my_alg = tconn->net_conf->integrity_alg;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002918
Philipp Reisner72046242011-03-15 18:51:47 +01002919 if (drbd_recv(tconn, p_integrity_alg, data_size) != data_size)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002920 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002921
2922 p_integrity_alg[SHARED_SECRET_MAX-1] = 0;
2923 if (strcmp(p_integrity_alg, my_alg)) {
Philipp Reisner72046242011-03-15 18:51:47 +01002924 conn_err(tconn, "incompatible setting of the data-integrity-alg\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002925 goto disconnect;
2926 }
Philipp Reisner72046242011-03-15 18:51:47 +01002927 conn_info(tconn, "data-integrity-alg: %s\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07002928 my_alg[0] ? my_alg : (unsigned char *)"<not-used>");
2929 }
2930
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002931 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002932
2933disconnect:
Philipp Reisner72046242011-03-15 18:51:47 +01002934 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002935 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002936}
2937
2938/* helper function
2939 * input: alg name, feature name
2940 * return: NULL (alg name was "")
2941 * ERR_PTR(error) if something goes wrong
2942 * or the crypto hash ptr, if it worked out ok. */
2943struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_conf *mdev,
2944 const char *alg, const char *name)
2945{
2946 struct crypto_hash *tfm;
2947
2948 if (!alg[0])
2949 return NULL;
2950
2951 tfm = crypto_alloc_hash(alg, 0, CRYPTO_ALG_ASYNC);
2952 if (IS_ERR(tfm)) {
2953 dev_err(DEV, "Can not allocate \"%s\" as %s (reason: %ld)\n",
2954 alg, name, PTR_ERR(tfm));
2955 return tfm;
2956 }
2957 if (!drbd_crypto_is_hash(crypto_hash_tfm(tfm))) {
2958 crypto_free_hash(tfm);
2959 dev_err(DEV, "\"%s\" is not a digest (%s)\n", alg, name);
2960 return ERR_PTR(-EINVAL);
2961 }
2962 return tfm;
2963}
2964
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01002965static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packet cmd,
2966 unsigned int packet_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002967{
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002968 int ok = true;
Philipp Reisnere42325a2011-01-19 13:55:45 +01002969 struct p_rs_param_95 *p = &mdev->tconn->data.rbuf.rs_param_95;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002970 unsigned int header_size, data_size, exp_max_sz;
2971 struct crypto_hash *verify_tfm = NULL;
2972 struct crypto_hash *csums_tfm = NULL;
Philipp Reisner31890f42011-01-19 14:12:51 +01002973 const int apv = mdev->tconn->agreed_pro_version;
Philipp Reisner778f2712010-07-06 11:14:00 +02002974 int *rs_plan_s = NULL;
2975 int fifo_size = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002976
2977 exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param)
2978 : apv == 88 ? sizeof(struct p_rs_param)
2979 + SHARED_SECRET_MAX
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002980 : apv <= 94 ? sizeof(struct p_rs_param_89)
2981 : /* apv >= 95 */ sizeof(struct p_rs_param_95);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002982
Philipp Reisner02918be2010-08-20 14:35:10 +02002983 if (packet_size > exp_max_sz) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002984 dev_err(DEV, "SyncParam packet too long: received %u, expected <= %u bytes\n",
Philipp Reisner02918be2010-08-20 14:35:10 +02002985 packet_size, exp_max_sz);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002986 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002987 }
2988
2989 if (apv <= 88) {
Philipp Reisner257d0af2011-01-26 12:15:29 +01002990 header_size = sizeof(struct p_rs_param) - sizeof(struct p_header);
Philipp Reisner02918be2010-08-20 14:35:10 +02002991 data_size = packet_size - header_size;
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002992 } else if (apv <= 94) {
Philipp Reisner257d0af2011-01-26 12:15:29 +01002993 header_size = sizeof(struct p_rs_param_89) - sizeof(struct p_header);
Philipp Reisner02918be2010-08-20 14:35:10 +02002994 data_size = packet_size - header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002995 D_ASSERT(data_size == 0);
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002996 } else {
Philipp Reisner257d0af2011-01-26 12:15:29 +01002997 header_size = sizeof(struct p_rs_param_95) - sizeof(struct p_header);
Philipp Reisner02918be2010-08-20 14:35:10 +02002998 data_size = packet_size - header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002999 D_ASSERT(data_size == 0);
3000 }
3001
3002 /* initialize verify_alg and csums_alg */
3003 memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
3004
Philipp Reisnerde0ff332011-02-07 16:56:20 +01003005 if (drbd_recv(mdev->tconn, &p->head.payload, header_size) != header_size)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003006 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003007
Lars Ellenbergf3990022011-03-23 14:31:09 +01003008 if (get_ldev(mdev)) {
3009 mdev->ldev->dc.resync_rate = be32_to_cpu(p->rate);
3010 put_ldev(mdev);
3011 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003012
3013 if (apv >= 88) {
3014 if (apv == 88) {
3015 if (data_size > SHARED_SECRET_MAX) {
3016 dev_err(DEV, "verify-alg too long, "
3017 "peer wants %u, accepting only %u byte\n",
3018 data_size, SHARED_SECRET_MAX);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003019 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003020 }
3021
Philipp Reisnerde0ff332011-02-07 16:56:20 +01003022 if (drbd_recv(mdev->tconn, p->verify_alg, data_size) != data_size)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003023 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003024
3025 /* we expect NUL terminated string */
3026 /* but just in case someone tries to be evil */
3027 D_ASSERT(p->verify_alg[data_size-1] == 0);
3028 p->verify_alg[data_size-1] = 0;
3029
3030 } else /* apv >= 89 */ {
3031 /* we still expect NUL terminated strings */
3032 /* but just in case someone tries to be evil */
3033 D_ASSERT(p->verify_alg[SHARED_SECRET_MAX-1] == 0);
3034 D_ASSERT(p->csums_alg[SHARED_SECRET_MAX-1] == 0);
3035 p->verify_alg[SHARED_SECRET_MAX-1] = 0;
3036 p->csums_alg[SHARED_SECRET_MAX-1] = 0;
3037 }
3038
Lars Ellenbergf3990022011-03-23 14:31:09 +01003039 if (strcmp(mdev->tconn->net_conf->verify_alg, p->verify_alg)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003040 if (mdev->state.conn == C_WF_REPORT_PARAMS) {
3041 dev_err(DEV, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
Lars Ellenbergf3990022011-03-23 14:31:09 +01003042 mdev->tconn->net_conf->verify_alg, p->verify_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003043 goto disconnect;
3044 }
3045 verify_tfm = drbd_crypto_alloc_digest_safe(mdev,
3046 p->verify_alg, "verify-alg");
3047 if (IS_ERR(verify_tfm)) {
3048 verify_tfm = NULL;
3049 goto disconnect;
3050 }
3051 }
3052
Lars Ellenbergf3990022011-03-23 14:31:09 +01003053 if (apv >= 89 && strcmp(mdev->tconn->net_conf->csums_alg, p->csums_alg)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003054 if (mdev->state.conn == C_WF_REPORT_PARAMS) {
3055 dev_err(DEV, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
Lars Ellenbergf3990022011-03-23 14:31:09 +01003056 mdev->tconn->net_conf->csums_alg, p->csums_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003057 goto disconnect;
3058 }
3059 csums_tfm = drbd_crypto_alloc_digest_safe(mdev,
3060 p->csums_alg, "csums-alg");
3061 if (IS_ERR(csums_tfm)) {
3062 csums_tfm = NULL;
3063 goto disconnect;
3064 }
3065 }
3066
Lars Ellenbergf3990022011-03-23 14:31:09 +01003067 if (apv > 94 && get_ldev(mdev)) {
3068 mdev->ldev->dc.resync_rate = be32_to_cpu(p->rate);
3069 mdev->ldev->dc.c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
3070 mdev->ldev->dc.c_delay_target = be32_to_cpu(p->c_delay_target);
3071 mdev->ldev->dc.c_fill_target = be32_to_cpu(p->c_fill_target);
3072 mdev->ldev->dc.c_max_rate = be32_to_cpu(p->c_max_rate);
Philipp Reisner778f2712010-07-06 11:14:00 +02003073
Lars Ellenbergf3990022011-03-23 14:31:09 +01003074 fifo_size = (mdev->ldev->dc.c_plan_ahead * 10 * SLEEP_TIME) / HZ;
Philipp Reisner778f2712010-07-06 11:14:00 +02003075 if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) {
3076 rs_plan_s = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL);
3077 if (!rs_plan_s) {
3078 dev_err(DEV, "kmalloc of fifo_buffer failed");
Lars Ellenbergf3990022011-03-23 14:31:09 +01003079 put_ldev(mdev);
Philipp Reisner778f2712010-07-06 11:14:00 +02003080 goto disconnect;
3081 }
3082 }
Lars Ellenbergf3990022011-03-23 14:31:09 +01003083 put_ldev(mdev);
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003084 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003085
3086 spin_lock(&mdev->peer_seq_lock);
3087 /* lock against drbd_nl_syncer_conf() */
3088 if (verify_tfm) {
Lars Ellenbergf3990022011-03-23 14:31:09 +01003089 strcpy(mdev->tconn->net_conf->verify_alg, p->verify_alg);
3090 mdev->tconn->net_conf->verify_alg_len = strlen(p->verify_alg) + 1;
3091 crypto_free_hash(mdev->tconn->verify_tfm);
3092 mdev->tconn->verify_tfm = verify_tfm;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003093 dev_info(DEV, "using verify-alg: \"%s\"\n", p->verify_alg);
3094 }
3095 if (csums_tfm) {
Lars Ellenbergf3990022011-03-23 14:31:09 +01003096 strcpy(mdev->tconn->net_conf->csums_alg, p->csums_alg);
3097 mdev->tconn->net_conf->csums_alg_len = strlen(p->csums_alg) + 1;
3098 crypto_free_hash(mdev->tconn->csums_tfm);
3099 mdev->tconn->csums_tfm = csums_tfm;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003100 dev_info(DEV, "using csums-alg: \"%s\"\n", p->csums_alg);
3101 }
Philipp Reisner778f2712010-07-06 11:14:00 +02003102 if (fifo_size != mdev->rs_plan_s.size) {
3103 kfree(mdev->rs_plan_s.values);
3104 mdev->rs_plan_s.values = rs_plan_s;
3105 mdev->rs_plan_s.size = fifo_size;
3106 mdev->rs_planed = 0;
3107 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003108 spin_unlock(&mdev->peer_seq_lock);
3109 }
3110
3111 return ok;
3112disconnect:
3113 /* just for completeness: actually not needed,
3114 * as this is not reached if csums_tfm was ok. */
3115 crypto_free_hash(csums_tfm);
3116 /* but free the verify_tfm again, if csums_tfm did not work out */
3117 crypto_free_hash(verify_tfm);
Philipp Reisner38fa9982011-03-15 18:24:49 +01003118 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003119 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003120}
3121
Philipp Reisnerb411b362009-09-25 16:07:19 -07003122/* warn if the arguments differ by more than 12.5% */
3123static void warn_if_differ_considerably(struct drbd_conf *mdev,
3124 const char *s, sector_t a, sector_t b)
3125{
3126 sector_t d;
3127 if (a == 0 || b == 0)
3128 return;
3129 d = (a > b) ? (a - b) : (b - a);
3130 if (d > (a>>3) || d > (b>>3))
3131 dev_warn(DEV, "Considerable difference in %s: %llus vs. %llus\n", s,
3132 (unsigned long long)a, (unsigned long long)b);
3133}
3134
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003135static int receive_sizes(struct drbd_conf *mdev, enum drbd_packet cmd,
3136 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003137{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003138 struct p_sizes *p = &mdev->tconn->data.rbuf.sizes;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003139 enum determine_dev_size dd = unchanged;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003140 sector_t p_size, p_usize, my_usize;
3141 int ldsc = 0; /* local disk size changed */
Philipp Reisnere89b5912010-03-24 17:11:33 +01003142 enum dds_flags ddsf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003143
Philipp Reisnerb411b362009-09-25 16:07:19 -07003144 p_size = be64_to_cpu(p->d_size);
3145 p_usize = be64_to_cpu(p->u_size);
3146
Philipp Reisnerb411b362009-09-25 16:07:19 -07003147 /* just store the peer's disk size for now.
3148 * we still need to figure out whether we accept that. */
3149 mdev->p_size = p_size;
3150
Philipp Reisnerb411b362009-09-25 16:07:19 -07003151 if (get_ldev(mdev)) {
3152 warn_if_differ_considerably(mdev, "lower level device sizes",
3153 p_size, drbd_get_max_capacity(mdev->ldev));
3154 warn_if_differ_considerably(mdev, "user requested size",
3155 p_usize, mdev->ldev->dc.disk_size);
3156
3157 /* if this is the first connect, or an otherwise expected
3158 * param exchange, choose the minimum */
3159 if (mdev->state.conn == C_WF_REPORT_PARAMS)
3160 p_usize = min_not_zero((sector_t)mdev->ldev->dc.disk_size,
3161 p_usize);
3162
3163 my_usize = mdev->ldev->dc.disk_size;
3164
3165 if (mdev->ldev->dc.disk_size != p_usize) {
3166 mdev->ldev->dc.disk_size = p_usize;
3167 dev_info(DEV, "Peer sets u_size to %lu sectors\n",
3168 (unsigned long)mdev->ldev->dc.disk_size);
3169 }
3170
3171 /* Never shrink a device with usable data during connect.
3172 But allow online shrinking if we are connected. */
Philipp Reisnera393db62009-12-22 13:35:52 +01003173 if (drbd_new_dev_size(mdev, mdev->ldev, 0) <
Philipp Reisnerb411b362009-09-25 16:07:19 -07003174 drbd_get_capacity(mdev->this_bdev) &&
3175 mdev->state.disk >= D_OUTDATED &&
3176 mdev->state.conn < C_CONNECTED) {
3177 dev_err(DEV, "The peer's disk size is too small!\n");
Philipp Reisner38fa9982011-03-15 18:24:49 +01003178 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003179 mdev->ldev->dc.disk_size = my_usize;
3180 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003181 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003182 }
3183 put_ldev(mdev);
3184 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003185
Philipp Reisnere89b5912010-03-24 17:11:33 +01003186 ddsf = be16_to_cpu(p->dds_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003187 if (get_ldev(mdev)) {
Bart Van Assche24c48302011-05-21 18:32:29 +02003188 dd = drbd_determine_dev_size(mdev, ddsf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003189 put_ldev(mdev);
3190 if (dd == dev_size_error)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003191 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003192 drbd_md_sync(mdev);
3193 } else {
3194 /* I am diskless, need to accept the peer's size. */
3195 drbd_set_my_capacity(mdev, p_size);
3196 }
3197
Philipp Reisner99432fc2011-05-20 16:39:13 +02003198 mdev->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
3199 drbd_reconsider_max_bio_size(mdev);
3200
Philipp Reisnerb411b362009-09-25 16:07:19 -07003201 if (get_ldev(mdev)) {
3202 if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) {
3203 mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev);
3204 ldsc = 1;
3205 }
3206
Philipp Reisnerb411b362009-09-25 16:07:19 -07003207 put_ldev(mdev);
3208 }
3209
3210 if (mdev->state.conn > C_WF_REPORT_PARAMS) {
3211 if (be64_to_cpu(p->c_size) !=
3212 drbd_get_capacity(mdev->this_bdev) || ldsc) {
3213 /* we have different sizes, probably peer
3214 * needs to know my new size... */
Philipp Reisnere89b5912010-03-24 17:11:33 +01003215 drbd_send_sizes(mdev, 0, ddsf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003216 }
3217 if (test_and_clear_bit(RESIZE_PENDING, &mdev->flags) ||
3218 (dd == grew && mdev->state.conn == C_CONNECTED)) {
3219 if (mdev->state.pdsk >= D_INCONSISTENT &&
Philipp Reisnere89b5912010-03-24 17:11:33 +01003220 mdev->state.disk >= D_INCONSISTENT) {
3221 if (ddsf & DDSF_NO_RESYNC)
3222 dev_info(DEV, "Resync of new storage suppressed with --assume-clean\n");
3223 else
3224 resync_after_online_grow(mdev);
3225 } else
Philipp Reisnerb411b362009-09-25 16:07:19 -07003226 set_bit(RESYNC_AFTER_NEG, &mdev->flags);
3227 }
3228 }
3229
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003230 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003231}
3232
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003233static int receive_uuids(struct drbd_conf *mdev, enum drbd_packet cmd,
3234 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003235{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003236 struct p_uuids *p = &mdev->tconn->data.rbuf.uuids;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003237 u64 *p_uuid;
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003238 int i, updated_uuids = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003239
Philipp Reisnerb411b362009-09-25 16:07:19 -07003240 p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO);
3241
3242 for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
3243 p_uuid[i] = be64_to_cpu(p->uuid[i]);
3244
3245 kfree(mdev->p_uuid);
3246 mdev->p_uuid = p_uuid;
3247
3248 if (mdev->state.conn < C_CONNECTED &&
3249 mdev->state.disk < D_INCONSISTENT &&
3250 mdev->state.role == R_PRIMARY &&
3251 (mdev->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
3252 dev_err(DEV, "Can only connect to data with current UUID=%016llX\n",
3253 (unsigned long long)mdev->ed_uuid);
Philipp Reisner38fa9982011-03-15 18:24:49 +01003254 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003255 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003256 }
3257
3258 if (get_ldev(mdev)) {
3259 int skip_initial_sync =
3260 mdev->state.conn == C_CONNECTED &&
Philipp Reisner31890f42011-01-19 14:12:51 +01003261 mdev->tconn->agreed_pro_version >= 90 &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003262 mdev->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
3263 (p_uuid[UI_FLAGS] & 8);
3264 if (skip_initial_sync) {
3265 dev_info(DEV, "Accepted new current UUID, preparing to skip initial sync\n");
3266 drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write,
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003267 "clear_n_write from receive_uuids",
3268 BM_LOCKED_TEST_ALLOWED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003269 _drbd_uuid_set(mdev, UI_CURRENT, p_uuid[UI_CURRENT]);
3270 _drbd_uuid_set(mdev, UI_BITMAP, 0);
3271 _drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
3272 CS_VERBOSE, NULL);
3273 drbd_md_sync(mdev);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003274 updated_uuids = 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003275 }
3276 put_ldev(mdev);
Philipp Reisner18a50fa2010-06-21 14:14:15 +02003277 } else if (mdev->state.disk < D_INCONSISTENT &&
3278 mdev->state.role == R_PRIMARY) {
3279 /* I am a diskless primary, the peer just created a new current UUID
3280 for me. */
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003281 updated_uuids = drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003282 }
3283
3284 /* Before we test for the disk state, we should wait until an eventually
3285 ongoing cluster wide state change is finished. That is important if
3286 we are primary and are detaching from our disk. We need to see the
3287 new disk state... */
Philipp Reisner8410da82011-02-11 20:11:10 +01003288 mutex_lock(mdev->state_mutex);
3289 mutex_unlock(mdev->state_mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003290 if (mdev->state.conn >= C_CONNECTED && mdev->state.disk < D_INCONSISTENT)
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003291 updated_uuids |= drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
3292
3293 if (updated_uuids)
3294 drbd_print_uuids(mdev, "receiver updated UUIDs to");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003295
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003296 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003297}
3298
3299/**
3300 * convert_state() - Converts the peer's view of the cluster state to our point of view
3301 * @ps: The state as seen by the peer.
3302 */
3303static union drbd_state convert_state(union drbd_state ps)
3304{
3305 union drbd_state ms;
3306
3307 static enum drbd_conns c_tab[] = {
3308 [C_CONNECTED] = C_CONNECTED,
3309
3310 [C_STARTING_SYNC_S] = C_STARTING_SYNC_T,
3311 [C_STARTING_SYNC_T] = C_STARTING_SYNC_S,
3312 [C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */
3313 [C_VERIFY_S] = C_VERIFY_T,
3314 [C_MASK] = C_MASK,
3315 };
3316
3317 ms.i = ps.i;
3318
3319 ms.conn = c_tab[ps.conn];
3320 ms.peer = ps.role;
3321 ms.role = ps.peer;
3322 ms.pdsk = ps.disk;
3323 ms.disk = ps.pdsk;
3324 ms.peer_isp = (ps.aftr_isp | ps.user_isp);
3325
3326 return ms;
3327}
3328
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003329static int receive_req_state(struct drbd_conf *mdev, enum drbd_packet cmd,
3330 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003331{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003332 struct p_req_state *p = &mdev->tconn->data.rbuf.req_state;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003333 union drbd_state mask, val;
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +01003334 enum drbd_state_rv rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003335
Philipp Reisnerb411b362009-09-25 16:07:19 -07003336 mask.i = be32_to_cpu(p->mask);
3337 val.i = be32_to_cpu(p->val);
3338
Philipp Reisner25703f82011-02-07 14:35:25 +01003339 if (test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags) &&
Philipp Reisner8410da82011-02-11 20:11:10 +01003340 mutex_is_locked(mdev->state_mutex)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003341 drbd_send_sr_reply(mdev, SS_CONCURRENT_ST_CHG);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003342 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003343 }
3344
3345 mask = convert_state(mask);
3346 val = convert_state(val);
3347
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003348 rv = drbd_change_state(mdev, CS_VERBOSE, mask, val);
3349 drbd_send_sr_reply(mdev, rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003350
Philipp Reisnerb411b362009-09-25 16:07:19 -07003351 drbd_md_sync(mdev);
3352
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003353 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003354}
3355
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003356static int receive_req_conn_state(struct drbd_tconn *tconn, enum drbd_packet cmd,
3357 unsigned int data_size)
3358{
3359 struct p_req_state *p = &tconn->data.rbuf.req_state;
3360 union drbd_state mask, val;
3361 enum drbd_state_rv rv;
3362
3363 mask.i = be32_to_cpu(p->mask);
3364 val.i = be32_to_cpu(p->val);
3365
3366 if (test_bit(DISCARD_CONCURRENT, &tconn->flags) &&
3367 mutex_is_locked(&tconn->cstate_mutex)) {
3368 conn_send_sr_reply(tconn, SS_CONCURRENT_ST_CHG);
3369 return true;
3370 }
3371
3372 mask = convert_state(mask);
3373 val = convert_state(val);
3374
3375 rv = conn_request_state(tconn, mask, val, CS_VERBOSE | CS_LOCAL_ONLY);
3376 conn_send_sr_reply(tconn, rv);
3377
3378 return true;
3379}
3380
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003381static int receive_state(struct drbd_conf *mdev, enum drbd_packet cmd,
3382 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003383{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003384 struct p_state *p = &mdev->tconn->data.rbuf.state;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003385 union drbd_state os, ns, peer_state;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003386 enum drbd_disk_state real_peer_disk;
Philipp Reisner65d922c2010-06-16 16:18:09 +02003387 enum chg_state_flags cs_flags;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003388 int rv;
3389
Philipp Reisnerb411b362009-09-25 16:07:19 -07003390 peer_state.i = be32_to_cpu(p->state);
3391
3392 real_peer_disk = peer_state.disk;
3393 if (peer_state.disk == D_NEGOTIATING) {
3394 real_peer_disk = mdev->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT;
3395 dev_info(DEV, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
3396 }
3397
Philipp Reisner87eeee42011-01-19 14:16:30 +01003398 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003399 retry:
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003400 os = ns = mdev->state;
Philipp Reisner87eeee42011-01-19 14:16:30 +01003401 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003402
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003403 /* peer says his disk is uptodate, while we think it is inconsistent,
3404 * and this happens while we think we have a sync going on. */
3405 if (os.pdsk == D_INCONSISTENT && real_peer_disk == D_UP_TO_DATE &&
3406 os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
3407 /* If we are (becoming) SyncSource, but peer is still in sync
3408 * preparation, ignore its uptodate-ness to avoid flapping, it
3409 * will change to inconsistent once the peer reaches active
3410 * syncing states.
3411 * It may have changed syncer-paused flags, however, so we
3412 * cannot ignore this completely. */
3413 if (peer_state.conn > C_CONNECTED &&
3414 peer_state.conn < C_SYNC_SOURCE)
3415 real_peer_disk = D_INCONSISTENT;
3416
3417 /* if peer_state changes to connected at the same time,
3418 * it explicitly notifies us that it finished resync.
3419 * Maybe we should finish it up, too? */
3420 else if (os.conn >= C_SYNC_SOURCE &&
3421 peer_state.conn == C_CONNECTED) {
3422 if (drbd_bm_total_weight(mdev) <= mdev->rs_failed)
3423 drbd_resync_finished(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003424 return true;
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003425 }
3426 }
3427
3428 /* peer says his disk is inconsistent, while we think it is uptodate,
3429 * and this happens while the peer still thinks we have a sync going on,
3430 * but we think we are already done with the sync.
3431 * We ignore this to avoid flapping pdsk.
3432 * This should not happen, if the peer is a recent version of drbd. */
3433 if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT &&
3434 os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE)
3435 real_peer_disk = D_UP_TO_DATE;
3436
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003437 if (ns.conn == C_WF_REPORT_PARAMS)
3438 ns.conn = C_CONNECTED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003439
Philipp Reisner67531712010-10-27 12:21:30 +02003440 if (peer_state.conn == C_AHEAD)
3441 ns.conn = C_BEHIND;
3442
Philipp Reisnerb411b362009-09-25 16:07:19 -07003443 if (mdev->p_uuid && peer_state.disk >= D_NEGOTIATING &&
3444 get_ldev_if_state(mdev, D_NEGOTIATING)) {
3445 int cr; /* consider resync */
3446
3447 /* if we established a new connection */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003448 cr = (os.conn < C_CONNECTED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003449 /* if we had an established connection
3450 * and one of the nodes newly attaches a disk */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003451 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003452 (peer_state.disk == D_NEGOTIATING ||
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003453 os.disk == D_NEGOTIATING));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003454 /* if we have both been inconsistent, and the peer has been
3455 * forced to be UpToDate with --overwrite-data */
3456 cr |= test_bit(CONSIDER_RESYNC, &mdev->flags);
3457 /* if we had been plain connected, and the admin requested to
3458 * start a sync by "invalidate" or "invalidate-remote" */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003459 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003460 (peer_state.conn >= C_STARTING_SYNC_S &&
3461 peer_state.conn <= C_WF_BITMAP_T));
3462
3463 if (cr)
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003464 ns.conn = drbd_sync_handshake(mdev, peer_state.role, real_peer_disk);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003465
3466 put_ldev(mdev);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003467 if (ns.conn == C_MASK) {
3468 ns.conn = C_CONNECTED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003469 if (mdev->state.disk == D_NEGOTIATING) {
Lars Ellenberg82f59cc2010-10-16 12:13:47 +02003470 drbd_force_state(mdev, NS(disk, D_FAILED));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003471 } else if (peer_state.disk == D_NEGOTIATING) {
3472 dev_err(DEV, "Disk attach process on the peer node was aborted.\n");
3473 peer_state.disk = D_DISKLESS;
Lars Ellenberg580b9762010-02-26 23:15:23 +01003474 real_peer_disk = D_DISKLESS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003475 } else {
Philipp Reisner8169e412011-03-15 18:40:27 +01003476 if (test_and_clear_bit(CONN_DRY_RUN, &mdev->tconn->flags))
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003477 return false;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003478 D_ASSERT(os.conn == C_WF_REPORT_PARAMS);
Philipp Reisner38fa9982011-03-15 18:24:49 +01003479 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003480 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003481 }
3482 }
3483 }
3484
Philipp Reisner87eeee42011-01-19 14:16:30 +01003485 spin_lock_irq(&mdev->tconn->req_lock);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003486 if (mdev->state.i != os.i)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003487 goto retry;
3488 clear_bit(CONSIDER_RESYNC, &mdev->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003489 ns.peer = peer_state.role;
3490 ns.pdsk = real_peer_disk;
3491 ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003492 if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003493 ns.disk = mdev->new_state_tmp.disk;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003494 cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
3495 if (ns.pdsk == D_CONSISTENT && is_susp(ns) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
Philipp Reisner481c6f52010-06-22 14:03:27 +02003496 test_bit(NEW_CUR_UUID, &mdev->flags)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01003497 /* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this
Philipp Reisner481c6f52010-06-22 14:03:27 +02003498 for temporal network outages! */
Philipp Reisner87eeee42011-01-19 14:16:30 +01003499 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisner481c6f52010-06-22 14:03:27 +02003500 dev_err(DEV, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
Philipp Reisner2f5cdd02011-02-21 14:29:27 +01003501 tl_clear(mdev->tconn);
Philipp Reisner481c6f52010-06-22 14:03:27 +02003502 drbd_uuid_new_current(mdev);
3503 clear_bit(NEW_CUR_UUID, &mdev->flags);
Philipp Reisner38fa9982011-03-15 18:24:49 +01003504 conn_request_state(mdev->tconn, NS2(conn, C_PROTOCOL_ERROR, susp, 0), CS_HARD);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003505 return false;
Philipp Reisner481c6f52010-06-22 14:03:27 +02003506 }
Philipp Reisner65d922c2010-06-16 16:18:09 +02003507 rv = _drbd_set_state(mdev, ns, cs_flags, NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003508 ns = mdev->state;
Philipp Reisner87eeee42011-01-19 14:16:30 +01003509 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003510
3511 if (rv < SS_SUCCESS) {
Philipp Reisner38fa9982011-03-15 18:24:49 +01003512 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003513 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003514 }
3515
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003516 if (os.conn > C_WF_REPORT_PARAMS) {
3517 if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003518 peer_state.disk != D_NEGOTIATING ) {
3519 /* we want resync, peer has not yet decided to sync... */
3520 /* Nowadays only used when forcing a node into primary role and
3521 setting its disk to UpToDate with that */
3522 drbd_send_uuids(mdev);
3523 drbd_send_state(mdev);
3524 }
3525 }
3526
Philipp Reisner89e58e72011-01-19 13:12:45 +01003527 mdev->tconn->net_conf->want_lose = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003528
3529 drbd_md_sync(mdev); /* update connected indicator, la_size, ... */
3530
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003531 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003532}
3533
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003534static int receive_sync_uuid(struct drbd_conf *mdev, enum drbd_packet cmd,
3535 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003536{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003537 struct p_rs_uuid *p = &mdev->tconn->data.rbuf.rs_uuid;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003538
3539 wait_event(mdev->misc_wait,
3540 mdev->state.conn == C_WF_SYNC_UUID ||
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02003541 mdev->state.conn == C_BEHIND ||
Philipp Reisnerb411b362009-09-25 16:07:19 -07003542 mdev->state.conn < C_CONNECTED ||
3543 mdev->state.disk < D_NEGOTIATING);
3544
3545 /* D_ASSERT( mdev->state.conn == C_WF_SYNC_UUID ); */
3546
Philipp Reisnerb411b362009-09-25 16:07:19 -07003547 /* Here the _drbd_uuid_ functions are right, current should
3548 _not_ be rotated into the history */
3549 if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
3550 _drbd_uuid_set(mdev, UI_CURRENT, be64_to_cpu(p->uuid));
3551 _drbd_uuid_set(mdev, UI_BITMAP, 0UL);
3552
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003553 drbd_print_uuids(mdev, "updated sync uuid");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003554 drbd_start_resync(mdev, C_SYNC_TARGET);
3555
3556 put_ldev(mdev);
3557 } else
3558 dev_err(DEV, "Ignoring SyncUUID packet!\n");
3559
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003560 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003561}
3562
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003563/**
3564 * receive_bitmap_plain
3565 *
3566 * Return 0 when done, 1 when another iteration is needed, and a negative error
3567 * code upon failure.
3568 */
3569static int
Philipp Reisner02918be2010-08-20 14:35:10 +02003570receive_bitmap_plain(struct drbd_conf *mdev, unsigned int data_size,
3571 unsigned long *buffer, struct bm_xfer_ctx *c)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003572{
3573 unsigned num_words = min_t(size_t, BM_PACKET_WORDS, c->bm_words - c->word_offset);
3574 unsigned want = num_words * sizeof(long);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003575 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003576
Philipp Reisner02918be2010-08-20 14:35:10 +02003577 if (want != data_size) {
3578 dev_err(DEV, "%s:want (%u) != data_size (%u)\n", __func__, want, data_size);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003579 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003580 }
3581 if (want == 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003582 return 0;
Philipp Reisnerde0ff332011-02-07 16:56:20 +01003583 err = drbd_recv(mdev->tconn, buffer, want);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003584 if (err != want) {
3585 if (err >= 0)
3586 err = -EIO;
3587 return err;
3588 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003589
3590 drbd_bm_merge_lel(mdev, c->word_offset, num_words, buffer);
3591
3592 c->word_offset += num_words;
3593 c->bit_offset = c->word_offset * BITS_PER_LONG;
3594 if (c->bit_offset > c->bm_bits)
3595 c->bit_offset = c->bm_bits;
3596
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003597 return 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003598}
3599
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003600/**
3601 * recv_bm_rle_bits
3602 *
3603 * Return 0 when done, 1 when another iteration is needed, and a negative error
3604 * code upon failure.
3605 */
3606static int
Philipp Reisnerb411b362009-09-25 16:07:19 -07003607recv_bm_rle_bits(struct drbd_conf *mdev,
3608 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003609 struct bm_xfer_ctx *c,
3610 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003611{
3612 struct bitstream bs;
3613 u64 look_ahead;
3614 u64 rl;
3615 u64 tmp;
3616 unsigned long s = c->bit_offset;
3617 unsigned long e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003618 int toggle = DCBP_get_start(p);
3619 int have;
3620 int bits;
3621
3622 bitstream_init(&bs, p->code, len, DCBP_get_pad_bits(p));
3623
3624 bits = bitstream_get_bits(&bs, &look_ahead, 64);
3625 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003626 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003627
3628 for (have = bits; have > 0; s += rl, toggle = !toggle) {
3629 bits = vli_decode_bits(&rl, look_ahead);
3630 if (bits <= 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003631 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003632
3633 if (toggle) {
3634 e = s + rl -1;
3635 if (e >= c->bm_bits) {
3636 dev_err(DEV, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003637 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003638 }
3639 _drbd_bm_set_bits(mdev, s, e);
3640 }
3641
3642 if (have < bits) {
3643 dev_err(DEV, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n",
3644 have, bits, look_ahead,
3645 (unsigned int)(bs.cur.b - p->code),
3646 (unsigned int)bs.buf_len);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003647 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003648 }
3649 look_ahead >>= bits;
3650 have -= bits;
3651
3652 bits = bitstream_get_bits(&bs, &tmp, 64 - have);
3653 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003654 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003655 look_ahead |= tmp << have;
3656 have += bits;
3657 }
3658
3659 c->bit_offset = s;
3660 bm_xfer_ctx_bit_to_word_offset(c);
3661
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003662 return (s != c->bm_bits);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003663}
3664
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003665/**
3666 * decode_bitmap_c
3667 *
3668 * Return 0 when done, 1 when another iteration is needed, and a negative error
3669 * code upon failure.
3670 */
3671static int
Philipp Reisnerb411b362009-09-25 16:07:19 -07003672decode_bitmap_c(struct drbd_conf *mdev,
3673 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003674 struct bm_xfer_ctx *c,
3675 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003676{
3677 if (DCBP_get_code(p) == RLE_VLI_Bits)
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003678 return recv_bm_rle_bits(mdev, p, c, len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003679
3680 /* other variants had been implemented for evaluation,
3681 * but have been dropped as this one turned out to be "best"
3682 * during all our tests. */
3683
3684 dev_err(DEV, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
Philipp Reisner38fa9982011-03-15 18:24:49 +01003685 conn_request_state(mdev->tconn, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003686 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003687}
3688
3689void INFO_bm_xfer_stats(struct drbd_conf *mdev,
3690 const char *direction, struct bm_xfer_ctx *c)
3691{
3692 /* what would it take to transfer it "plaintext" */
Philipp Reisnerc0129492011-01-19 16:58:16 +01003693 unsigned plain = sizeof(struct p_header) *
Philipp Reisnerb411b362009-09-25 16:07:19 -07003694 ((c->bm_words+BM_PACKET_WORDS-1)/BM_PACKET_WORDS+1)
3695 + c->bm_words * sizeof(long);
3696 unsigned total = c->bytes[0] + c->bytes[1];
3697 unsigned r;
3698
3699 /* total can not be zero. but just in case: */
3700 if (total == 0)
3701 return;
3702
3703 /* don't report if not compressed */
3704 if (total >= plain)
3705 return;
3706
3707 /* total < plain. check for overflow, still */
3708 r = (total > UINT_MAX/1000) ? (total / (plain/1000))
3709 : (1000 * total / plain);
3710
3711 if (r > 1000)
3712 r = 1000;
3713
3714 r = 1000 - r;
3715 dev_info(DEV, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
3716 "total %u; compression: %u.%u%%\n",
3717 direction,
3718 c->bytes[1], c->packets[1],
3719 c->bytes[0], c->packets[0],
3720 total, r/10, r % 10);
3721}
3722
3723/* Since we are processing the bitfield from lower addresses to higher,
3724 it does not matter if the process it in 32 bit chunks or 64 bit
3725 chunks as long as it is little endian. (Understand it as byte stream,
3726 beginning with the lowest byte...) If we would use big endian
3727 we would need to process it from the highest address to the lowest,
3728 in order to be agnostic to the 32 vs 64 bits issue.
3729
3730 returns 0 on failure, 1 if we successfully received it. */
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003731static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packet cmd,
3732 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003733{
3734 struct bm_xfer_ctx c;
3735 void *buffer;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003736 int err;
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003737 int ok = false;
Philipp Reisner257d0af2011-01-26 12:15:29 +01003738 struct p_header *h = &mdev->tconn->data.rbuf.header;
Philipp Reisner77351055b2011-02-07 17:24:26 +01003739 struct packet_info pi;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003740
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003741 drbd_bm_lock(mdev, "receive bitmap", BM_LOCKED_SET_ALLOWED);
3742 /* you are supposed to send additional out-of-sync information
3743 * if you actually set bits during this phase */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003744
3745 /* maybe we should use some per thread scratch page,
3746 * and allocate that during initial device creation? */
3747 buffer = (unsigned long *) __get_free_page(GFP_NOIO);
3748 if (!buffer) {
3749 dev_err(DEV, "failed to allocate one page buffer in %s\n", __func__);
3750 goto out;
3751 }
3752
3753 c = (struct bm_xfer_ctx) {
3754 .bm_bits = drbd_bm_bits(mdev),
3755 .bm_words = drbd_bm_words(mdev),
3756 };
3757
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003758 for(;;) {
Philipp Reisner02918be2010-08-20 14:35:10 +02003759 if (cmd == P_BITMAP) {
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003760 err = receive_bitmap_plain(mdev, data_size, buffer, &c);
Philipp Reisner02918be2010-08-20 14:35:10 +02003761 } else if (cmd == P_COMPRESSED_BITMAP) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003762 /* MAYBE: sanity check that we speak proto >= 90,
3763 * and the feature is enabled! */
3764 struct p_compressed_bm *p;
3765
Philipp Reisner02918be2010-08-20 14:35:10 +02003766 if (data_size > BM_PACKET_PAYLOAD_BYTES) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003767 dev_err(DEV, "ReportCBitmap packet too large\n");
3768 goto out;
3769 }
3770 /* use the page buff */
3771 p = buffer;
3772 memcpy(p, h, sizeof(*h));
Philipp Reisnerde0ff332011-02-07 16:56:20 +01003773 if (drbd_recv(mdev->tconn, p->head.payload, data_size) != data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003774 goto out;
Lars Ellenberg004352f2010-10-05 20:13:58 +02003775 if (data_size <= (sizeof(*p) - sizeof(p->head))) {
3776 dev_err(DEV, "ReportCBitmap packet too small (l:%u)\n", data_size);
Andreas Gruenbacher78fcbda2010-12-10 22:18:27 +01003777 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003778 }
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003779 err = decode_bitmap_c(mdev, p, &c, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003780 } else {
Philipp Reisner02918be2010-08-20 14:35:10 +02003781 dev_warn(DEV, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003782 goto out;
3783 }
3784
Philipp Reisner02918be2010-08-20 14:35:10 +02003785 c.packets[cmd == P_BITMAP]++;
Philipp Reisner257d0af2011-01-26 12:15:29 +01003786 c.bytes[cmd == P_BITMAP] += sizeof(struct p_header) + data_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003787
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003788 if (err <= 0) {
3789 if (err < 0)
3790 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003791 break;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003792 }
Philipp Reisner9ba7aa02011-02-07 17:32:41 +01003793 if (!drbd_recv_header(mdev->tconn, &pi))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003794 goto out;
Philipp Reisner77351055b2011-02-07 17:24:26 +01003795 cmd = pi.cmd;
3796 data_size = pi.size;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003797 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003798
3799 INFO_bm_xfer_stats(mdev, "receive", &c);
3800
3801 if (mdev->state.conn == C_WF_BITMAP_T) {
Andreas Gruenbacherde1f8e42010-12-10 21:04:00 +01003802 enum drbd_state_rv rv;
3803
Philipp Reisnerb411b362009-09-25 16:07:19 -07003804 ok = !drbd_send_bitmap(mdev);
3805 if (!ok)
3806 goto out;
3807 /* Omit CS_ORDERED with this state transition to avoid deadlocks. */
Andreas Gruenbacherde1f8e42010-12-10 21:04:00 +01003808 rv = _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
3809 D_ASSERT(rv == SS_SUCCESS);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003810 } else if (mdev->state.conn != C_WF_BITMAP_S) {
3811 /* admin may have requested C_DISCONNECTING,
3812 * other threads may have noticed network errors */
3813 dev_info(DEV, "unexpected cstate (%s) in receive_bitmap\n",
3814 drbd_conn_str(mdev->state.conn));
3815 }
3816
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003817 ok = true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003818 out:
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003819 drbd_bm_unlock(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003820 if (ok && mdev->state.conn == C_WF_BITMAP_S)
3821 drbd_start_resync(mdev, C_SYNC_SOURCE);
3822 free_page((unsigned long) buffer);
3823 return ok;
3824}
3825
Philipp Reisner2de876e2011-03-15 14:38:01 +01003826static int _tconn_receive_skip(struct drbd_tconn *tconn, unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003827{
3828 /* TODO zero copy sink :) */
3829 static char sink[128];
3830 int size, want, r;
3831
Philipp Reisner02918be2010-08-20 14:35:10 +02003832 size = data_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003833 while (size > 0) {
3834 want = min_t(int, size, sizeof(sink));
Philipp Reisner2de876e2011-03-15 14:38:01 +01003835 r = drbd_recv(tconn, sink, want);
3836 if (r <= 0)
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01003837 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003838 size -= r;
3839 }
3840 return size == 0;
3841}
3842
Philipp Reisner2de876e2011-03-15 14:38:01 +01003843static int receive_skip(struct drbd_conf *mdev, enum drbd_packet cmd,
3844 unsigned int data_size)
3845{
3846 dev_warn(DEV, "skipping unknown optional packet type %d, l: %d!\n",
3847 cmd, data_size);
3848
3849 return _tconn_receive_skip(mdev->tconn, data_size);
3850}
3851
3852static int tconn_receive_skip(struct drbd_tconn *tconn, enum drbd_packet cmd, unsigned int data_size)
3853{
3854 conn_warn(tconn, "skipping packet for non existing volume type %d, l: %d!\n",
3855 cmd, data_size);
3856
3857 return _tconn_receive_skip(tconn, data_size);
3858}
3859
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003860static int receive_UnplugRemote(struct drbd_conf *mdev, enum drbd_packet cmd,
3861 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003862{
Philipp Reisnerb411b362009-09-25 16:07:19 -07003863 /* Make sure we've acked all the TCP data associated
3864 * with the data requests being unplugged */
Philipp Reisnere42325a2011-01-19 13:55:45 +01003865 drbd_tcp_quickack(mdev->tconn->data.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003866
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003867 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003868}
3869
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003870static int receive_out_of_sync(struct drbd_conf *mdev, enum drbd_packet cmd,
3871 unsigned int data_size)
Philipp Reisner73a01a12010-10-27 14:33:00 +02003872{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003873 struct p_block_desc *p = &mdev->tconn->data.rbuf.block_desc;
Philipp Reisner73a01a12010-10-27 14:33:00 +02003874
Lars Ellenbergf735e3632010-12-17 21:06:18 +01003875 switch (mdev->state.conn) {
3876 case C_WF_SYNC_UUID:
3877 case C_WF_BITMAP_T:
3878 case C_BEHIND:
3879 break;
3880 default:
3881 dev_err(DEV, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n",
3882 drbd_conn_str(mdev->state.conn));
3883 }
3884
Philipp Reisner73a01a12010-10-27 14:33:00 +02003885 drbd_set_out_of_sync(mdev, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
3886
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003887 return true;
Philipp Reisner73a01a12010-10-27 14:33:00 +02003888}
3889
Philipp Reisner02918be2010-08-20 14:35:10 +02003890struct data_cmd {
3891 int expect_payload;
3892 size_t pkt_size;
Philipp Reisnera4fbda82011-03-16 11:13:17 +01003893 enum mdev_or_conn fa_type; /* first argument's type */
Philipp Reisnerd9ae84e2011-03-15 18:50:22 +01003894 union {
3895 int (*mdev_fn)(struct drbd_conf *, enum drbd_packet cmd,
3896 unsigned int to_receive);
3897 int (*conn_fn)(struct drbd_tconn *, enum drbd_packet cmd,
3898 unsigned int to_receive);
3899 };
Philipp Reisnerb411b362009-09-25 16:07:19 -07003900};
3901
Philipp Reisner02918be2010-08-20 14:35:10 +02003902static struct data_cmd drbd_cmd_handler[] = {
Philipp Reisnerd9ae84e2011-03-15 18:50:22 +01003903 [P_DATA] = { 1, sizeof(struct p_data), MDEV, { receive_Data } },
3904 [P_DATA_REPLY] = { 1, sizeof(struct p_data), MDEV, { receive_DataReply } },
3905 [P_RS_DATA_REPLY] = { 1, sizeof(struct p_data), MDEV, { receive_RSDataReply } } ,
3906 [P_BARRIER] = { 0, sizeof(struct p_barrier), MDEV, { receive_Barrier } } ,
3907 [P_BITMAP] = { 1, sizeof(struct p_header), MDEV, { receive_bitmap } } ,
3908 [P_COMPRESSED_BITMAP] = { 1, sizeof(struct p_header), MDEV, { receive_bitmap } } ,
3909 [P_UNPLUG_REMOTE] = { 0, sizeof(struct p_header), MDEV, { receive_UnplugRemote } },
3910 [P_DATA_REQUEST] = { 0, sizeof(struct p_block_req), MDEV, { receive_DataRequest } },
3911 [P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), MDEV, { receive_DataRequest } },
3912 [P_SYNC_PARAM] = { 1, sizeof(struct p_header), MDEV, { receive_SyncParam } },
3913 [P_SYNC_PARAM89] = { 1, sizeof(struct p_header), MDEV, { receive_SyncParam } },
Philipp Reisner72046242011-03-15 18:51:47 +01003914 [P_PROTOCOL] = { 1, sizeof(struct p_protocol), CONN, { .conn_fn = receive_protocol } },
Philipp Reisnerd9ae84e2011-03-15 18:50:22 +01003915 [P_UUIDS] = { 0, sizeof(struct p_uuids), MDEV, { receive_uuids } },
3916 [P_SIZES] = { 0, sizeof(struct p_sizes), MDEV, { receive_sizes } },
3917 [P_STATE] = { 0, sizeof(struct p_state), MDEV, { receive_state } },
3918 [P_STATE_CHG_REQ] = { 0, sizeof(struct p_req_state), MDEV, { receive_req_state } },
3919 [P_SYNC_UUID] = { 0, sizeof(struct p_rs_uuid), MDEV, { receive_sync_uuid } },
3920 [P_OV_REQUEST] = { 0, sizeof(struct p_block_req), MDEV, { receive_DataRequest } },
3921 [P_OV_REPLY] = { 1, sizeof(struct p_block_req), MDEV, { receive_DataRequest } },
3922 [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), MDEV, { receive_DataRequest } },
3923 [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), MDEV, { receive_skip } },
3924 [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), MDEV, { receive_out_of_sync } },
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003925 [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), CONN, { .conn_fn = receive_req_conn_state } },
Philipp Reisner02918be2010-08-20 14:35:10 +02003926};
3927
3928/* All handler functions that expect a sub-header get that sub-heder in
Philipp Reisnere42325a2011-01-19 13:55:45 +01003929 mdev->tconn->data.rbuf.header.head.payload.
Philipp Reisner02918be2010-08-20 14:35:10 +02003930
Philipp Reisnere42325a2011-01-19 13:55:45 +01003931 Usually in mdev->tconn->data.rbuf.header.head the callback can find the usual
Philipp Reisner02918be2010-08-20 14:35:10 +02003932 p_header, but they may not rely on that. Since there is also p_header95 !
3933 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003934
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003935static void drbdd(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003936{
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003937 struct p_header *header = &tconn->data.rbuf.header;
Philipp Reisner77351055b2011-02-07 17:24:26 +01003938 struct packet_info pi;
Philipp Reisner02918be2010-08-20 14:35:10 +02003939 size_t shs; /* sub header size */
3940 int rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003941
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003942 while (get_t_state(&tconn->receiver) == RUNNING) {
3943 drbd_thread_current_set_cpu(&tconn->receiver);
3944 if (!drbd_recv_header(tconn, &pi))
Philipp Reisner02918be2010-08-20 14:35:10 +02003945 goto err_out;
3946
Andreas Gruenbacher6e849ce2011-03-14 17:27:45 +01003947 if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) ||
Philipp Reisnerd9ae84e2011-03-15 18:50:22 +01003948 !drbd_cmd_handler[pi.cmd].mdev_fn)) {
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003949 conn_err(tconn, "unknown packet type %d, l: %d!\n", pi.cmd, pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02003950 goto err_out;
Lars Ellenberg0b33a912009-11-16 15:58:04 +01003951 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003952
Philipp Reisner77351055b2011-02-07 17:24:26 +01003953 shs = drbd_cmd_handler[pi.cmd].pkt_size - sizeof(struct p_header);
3954 if (pi.size - shs > 0 && !drbd_cmd_handler[pi.cmd].expect_payload) {
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003955 conn_err(tconn, "No payload expected %s l:%d\n", cmdname(pi.cmd), pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02003956 goto err_out;
3957 }
3958
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02003959 if (shs) {
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003960 rv = drbd_recv(tconn, &header->payload, shs);
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02003961 if (unlikely(rv != shs)) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01003962 if (!signal_pending(current))
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003963 conn_warn(tconn, "short read while reading sub header: rv=%d\n", rv);
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02003964 goto err_out;
3965 }
3966 }
3967
Philipp Reisnera4fbda82011-03-16 11:13:17 +01003968 if (drbd_cmd_handler[pi.cmd].fa_type == CONN) {
Philipp Reisnerd9ae84e2011-03-15 18:50:22 +01003969 rv = drbd_cmd_handler[pi.cmd].conn_fn(tconn, pi.cmd, pi.size - shs);
3970 } else {
3971 struct drbd_conf *mdev = vnr_to_mdev(tconn, pi.vnr);
3972 rv = mdev ?
3973 drbd_cmd_handler[pi.cmd].mdev_fn(mdev, pi.cmd, pi.size - shs) :
3974 tconn_receive_skip(tconn, pi.cmd, pi.size - shs);
3975 }
Philipp Reisner02918be2010-08-20 14:35:10 +02003976
3977 if (unlikely(!rv)) {
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003978 conn_err(tconn, "error receiving %s, l: %d!\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01003979 cmdname(pi.cmd), pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02003980 goto err_out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003981 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003982 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003983
Philipp Reisner02918be2010-08-20 14:35:10 +02003984 if (0) {
3985 err_out:
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01003986 conn_request_state(tconn, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003987 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003988}
3989
Philipp Reisner0e29d162011-02-18 14:23:11 +01003990void conn_flush_workqueue(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003991{
3992 struct drbd_wq_barrier barr;
3993
3994 barr.w.cb = w_prev_work_done;
Philipp Reisner0e29d162011-02-18 14:23:11 +01003995 barr.w.tconn = tconn;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003996 init_completion(&barr.done);
Philipp Reisner0e29d162011-02-18 14:23:11 +01003997 drbd_queue_work(&tconn->data.work, &barr.w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003998 wait_for_completion(&barr.done);
3999}
4000
Philipp Reisner360cc742011-02-08 14:29:53 +01004001static void drbd_disconnect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004002{
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004003 enum drbd_conns oc;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004004 int rv = SS_UNKNOWN_ERROR;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004005
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004006 if (tconn->cstate == C_STANDALONE)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004007 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004008
4009 /* asender does not clean up anything. it must not interfere, either */
Philipp Reisner360cc742011-02-08 14:29:53 +01004010 drbd_thread_stop(&tconn->asender);
4011 drbd_free_sock(tconn);
4012
4013 idr_for_each(&tconn->volumes, drbd_disconnected, tconn);
4014
4015 conn_info(tconn, "Connection closed\n");
4016
4017 spin_lock_irq(&tconn->req_lock);
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004018 oc = tconn->cstate;
4019 if (oc >= C_UNCONNECTED)
4020 rv = _conn_request_state(tconn, NS(conn, C_UNCONNECTED), CS_VERBOSE);
4021
Philipp Reisner360cc742011-02-08 14:29:53 +01004022 spin_unlock_irq(&tconn->req_lock);
4023
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004024 if (oc == C_DISCONNECTING) {
Philipp Reisner360cc742011-02-08 14:29:53 +01004025 wait_event(tconn->net_cnt_wait, atomic_read(&tconn->net_cnt) == 0);
4026
4027 crypto_free_hash(tconn->cram_hmac_tfm);
4028 tconn->cram_hmac_tfm = NULL;
4029
4030 kfree(tconn->net_conf);
4031 tconn->net_conf = NULL;
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004032 conn_request_state(tconn, NS(conn, C_STANDALONE), CS_VERBOSE);
Philipp Reisner360cc742011-02-08 14:29:53 +01004033 }
4034}
4035
4036static int drbd_disconnected(int vnr, void *p, void *data)
4037{
4038 struct drbd_conf *mdev = (struct drbd_conf *)p;
4039 enum drbd_fencing_p fp;
4040 unsigned int i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004041
Philipp Reisner85719572010-07-21 10:20:17 +02004042 /* wait for current activity to cease. */
Philipp Reisner87eeee42011-01-19 14:16:30 +01004043 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004044 _drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
4045 _drbd_wait_ee_list_empty(mdev, &mdev->sync_ee);
4046 _drbd_wait_ee_list_empty(mdev, &mdev->read_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01004047 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004048
4049 /* We do not have data structures that would allow us to
4050 * get the rs_pending_cnt down to 0 again.
4051 * * On C_SYNC_TARGET we do not have any data structures describing
4052 * the pending RSDataRequest's we have sent.
4053 * * On C_SYNC_SOURCE there is no data structure that tracks
4054 * the P_RS_DATA_REPLY blocks that we sent to the SyncTarget.
4055 * And no, it is not the sum of the reference counts in the
4056 * resync_LRU. The resync_LRU tracks the whole operation including
4057 * the disk-IO, while the rs_pending_cnt only tracks the blocks
4058 * on the fly. */
4059 drbd_rs_cancel_all(mdev);
4060 mdev->rs_total = 0;
4061 mdev->rs_failed = 0;
4062 atomic_set(&mdev->rs_pending_cnt, 0);
4063 wake_up(&mdev->misc_wait);
4064
Philipp Reisner7fde2be2011-03-01 11:08:28 +01004065 del_timer(&mdev->request_timer);
4066
Philipp Reisnerb411b362009-09-25 16:07:19 -07004067 del_timer_sync(&mdev->resync_timer);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004068 resync_timer_fn((unsigned long)mdev);
4069
Philipp Reisnerb411b362009-09-25 16:07:19 -07004070 /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
4071 * w_make_resync_request etc. which may still be on the worker queue
4072 * to be "canceled" */
Philipp Reisnera21e9292011-02-08 15:08:49 +01004073 drbd_flush_workqueue(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004074
4075 /* This also does reclaim_net_ee(). If we do this too early, we might
4076 * miss some resync ee and pages.*/
4077 drbd_process_done_ee(mdev);
4078
4079 kfree(mdev->p_uuid);
4080 mdev->p_uuid = NULL;
4081
Philipp Reisnerfb22c402010-09-08 23:20:21 +02004082 if (!is_susp(mdev->state))
Philipp Reisner2f5cdd02011-02-21 14:29:27 +01004083 tl_clear(mdev->tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004084
Philipp Reisnerb411b362009-09-25 16:07:19 -07004085 drbd_md_sync(mdev);
4086
4087 fp = FP_DONT_CARE;
4088 if (get_ldev(mdev)) {
4089 fp = mdev->ldev->dc.fencing;
4090 put_ldev(mdev);
4091 }
4092
Philipp Reisner87f7be42010-06-11 13:56:33 +02004093 if (mdev->state.role == R_PRIMARY && fp >= FP_RESOURCE && mdev->state.pdsk >= D_UNKNOWN)
4094 drbd_try_outdate_peer_async(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004095
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004096 /* serialize with bitmap writeout triggered by the state change,
4097 * if any. */
4098 wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
4099
Philipp Reisnerb411b362009-09-25 16:07:19 -07004100 /* tcp_close and release of sendpage pages can be deferred. I don't
4101 * want to use SO_LINGER, because apparently it can be deferred for
4102 * more than 20 seconds (longest time I checked).
4103 *
4104 * Actually we don't care for exactly when the network stack does its
4105 * put_page(), but release our reference on these pages right here.
4106 */
4107 i = drbd_release_ee(mdev, &mdev->net_ee);
4108 if (i)
4109 dev_info(DEV, "net_ee not empty, killed %u entries\n", i);
Lars Ellenberg435f0742010-09-06 12:30:25 +02004110 i = atomic_read(&mdev->pp_in_use_by_net);
4111 if (i)
4112 dev_info(DEV, "pp_in_use_by_net = %d, expected 0\n", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004113 i = atomic_read(&mdev->pp_in_use);
4114 if (i)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02004115 dev_info(DEV, "pp_in_use = %d, expected 0\n", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004116
4117 D_ASSERT(list_empty(&mdev->read_ee));
4118 D_ASSERT(list_empty(&mdev->active_ee));
4119 D_ASSERT(list_empty(&mdev->sync_ee));
4120 D_ASSERT(list_empty(&mdev->done_ee));
4121
4122 /* ok, no more ee's on the fly, it is safe to reset the epoch_size */
4123 atomic_set(&mdev->current_epoch->epoch_size, 0);
4124 D_ASSERT(list_empty(&mdev->current_epoch->list));
Philipp Reisner360cc742011-02-08 14:29:53 +01004125
4126 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004127}
4128
4129/*
4130 * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version
4131 * we can agree on is stored in agreed_pro_version.
4132 *
4133 * feature flags and the reserved array should be enough room for future
4134 * enhancements of the handshake protocol, and possible plugins...
4135 *
4136 * for now, they are expected to be zero, but ignored.
4137 */
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01004138static int drbd_send_handshake(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004139{
Philipp Reisnere6b3ea82011-01-19 14:02:01 +01004140 /* ASSERT current == mdev->tconn->receiver ... */
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01004141 struct p_handshake *p = &tconn->data.sbuf.handshake;
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004142 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004143
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01004144 if (mutex_lock_interruptible(&tconn->data.mutex)) {
4145 conn_err(tconn, "interrupted during initial handshake\n");
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004146 return -EINTR;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004147 }
4148
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01004149 if (tconn->data.socket == NULL) {
4150 mutex_unlock(&tconn->data.mutex);
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004151 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004152 }
4153
4154 memset(p, 0, sizeof(*p));
4155 p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
4156 p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004157 err = _conn_send_cmd(tconn, 0, tconn->data.socket, P_HAND_SHAKE,
Andreas Gruenbacherecf23632011-03-15 23:48:25 +01004158 &p->head, sizeof(*p), 0);
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01004159 mutex_unlock(&tconn->data.mutex);
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004160 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004161}
4162
4163/*
4164 * return values:
4165 * 1 yes, we have a valid connection
4166 * 0 oops, did not work out, please try again
4167 * -1 peer talks different language,
4168 * no point in trying again, please go standalone.
4169 */
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004170static int drbd_do_handshake(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004171{
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004172 /* ASSERT current == tconn->receiver ... */
4173 struct p_handshake *p = &tconn->data.rbuf.handshake;
Philipp Reisner02918be2010-08-20 14:35:10 +02004174 const int expect = sizeof(struct p_handshake) - sizeof(struct p_header80);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004175 struct packet_info pi;
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004176 int err, rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004177
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004178 err = drbd_send_handshake(tconn);
4179 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004180 return 0;
4181
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004182 rv = drbd_recv_header(tconn, &pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004183 if (!rv)
4184 return 0;
4185
Philipp Reisner77351055b2011-02-07 17:24:26 +01004186 if (pi.cmd != P_HAND_SHAKE) {
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004187 conn_err(tconn, "expected HandShake packet, received: %s (0x%04x)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004188 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004189 return -1;
4190 }
4191
Philipp Reisner77351055b2011-02-07 17:24:26 +01004192 if (pi.size != expect) {
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004193 conn_err(tconn, "expected HandShake length: %u, received: %u\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004194 expect, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004195 return -1;
4196 }
4197
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004198 rv = drbd_recv(tconn, &p->head.payload, expect);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004199
4200 if (rv != expect) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01004201 if (!signal_pending(current))
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004202 conn_warn(tconn, "short read receiving handshake packet: l=%u\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004203 return 0;
4204 }
4205
Philipp Reisnerb411b362009-09-25 16:07:19 -07004206 p->protocol_min = be32_to_cpu(p->protocol_min);
4207 p->protocol_max = be32_to_cpu(p->protocol_max);
4208 if (p->protocol_max == 0)
4209 p->protocol_max = p->protocol_min;
4210
4211 if (PRO_VERSION_MAX < p->protocol_min ||
4212 PRO_VERSION_MIN > p->protocol_max)
4213 goto incompat;
4214
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004215 tconn->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004216
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004217 conn_info(tconn, "Handshake successful: "
4218 "Agreed network protocol version %d\n", tconn->agreed_pro_version);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004219
4220 return 1;
4221
4222 incompat:
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004223 conn_err(tconn, "incompatible DRBD dialects: "
Philipp Reisnerb411b362009-09-25 16:07:19 -07004224 "I support %d-%d, peer supports %d-%d\n",
4225 PRO_VERSION_MIN, PRO_VERSION_MAX,
4226 p->protocol_min, p->protocol_max);
4227 return -1;
4228}
4229
4230#if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
Philipp Reisner13e60372011-02-08 09:54:40 +01004231static int drbd_do_auth(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004232{
4233 dev_err(DEV, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
4234 dev_err(DEV, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004235 return -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004236}
4237#else
4238#define CHALLENGE_LEN 64
Johannes Thomab10d96c2010-01-07 16:02:50 +01004239
4240/* Return value:
4241 1 - auth succeeded,
4242 0 - failed, try again (network error),
4243 -1 - auth failed, don't try again.
4244*/
4245
Philipp Reisner13e60372011-02-08 09:54:40 +01004246static int drbd_do_auth(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004247{
4248 char my_challenge[CHALLENGE_LEN]; /* 64 Bytes... */
4249 struct scatterlist sg;
4250 char *response = NULL;
4251 char *right_response = NULL;
4252 char *peers_ch = NULL;
Philipp Reisner13e60372011-02-08 09:54:40 +01004253 unsigned int key_len = strlen(tconn->net_conf->shared_secret);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004254 unsigned int resp_size;
4255 struct hash_desc desc;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004256 struct packet_info pi;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004257 int rv;
4258
Philipp Reisner13e60372011-02-08 09:54:40 +01004259 desc.tfm = tconn->cram_hmac_tfm;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004260 desc.flags = 0;
4261
Philipp Reisner13e60372011-02-08 09:54:40 +01004262 rv = crypto_hash_setkey(tconn->cram_hmac_tfm,
4263 (u8 *)tconn->net_conf->shared_secret, key_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004264 if (rv) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004265 conn_err(tconn, "crypto_hash_setkey() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004266 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004267 goto fail;
4268 }
4269
4270 get_random_bytes(my_challenge, CHALLENGE_LEN);
4271
Andreas Gruenbacherce9879c2011-03-15 23:34:29 +01004272 rv = !conn_send_cmd2(tconn, P_AUTH_CHALLENGE, my_challenge, CHALLENGE_LEN);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004273 if (!rv)
4274 goto fail;
4275
Philipp Reisner13e60372011-02-08 09:54:40 +01004276 rv = drbd_recv_header(tconn, &pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004277 if (!rv)
4278 goto fail;
4279
Philipp Reisner77351055b2011-02-07 17:24:26 +01004280 if (pi.cmd != P_AUTH_CHALLENGE) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004281 conn_err(tconn, "expected AuthChallenge packet, received: %s (0x%04x)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004282 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004283 rv = 0;
4284 goto fail;
4285 }
4286
Philipp Reisner77351055b2011-02-07 17:24:26 +01004287 if (pi.size > CHALLENGE_LEN * 2) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004288 conn_err(tconn, "expected AuthChallenge payload too big.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004289 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004290 goto fail;
4291 }
4292
Philipp Reisner77351055b2011-02-07 17:24:26 +01004293 peers_ch = kmalloc(pi.size, GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004294 if (peers_ch == NULL) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004295 conn_err(tconn, "kmalloc of peers_ch failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004296 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004297 goto fail;
4298 }
4299
Philipp Reisner13e60372011-02-08 09:54:40 +01004300 rv = drbd_recv(tconn, peers_ch, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004301
Philipp Reisner77351055b2011-02-07 17:24:26 +01004302 if (rv != pi.size) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01004303 if (!signal_pending(current))
Philipp Reisner13e60372011-02-08 09:54:40 +01004304 conn_warn(tconn, "short read AuthChallenge: l=%u\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004305 rv = 0;
4306 goto fail;
4307 }
4308
Philipp Reisner13e60372011-02-08 09:54:40 +01004309 resp_size = crypto_hash_digestsize(tconn->cram_hmac_tfm);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004310 response = kmalloc(resp_size, GFP_NOIO);
4311 if (response == NULL) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004312 conn_err(tconn, "kmalloc of response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004313 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004314 goto fail;
4315 }
4316
4317 sg_init_table(&sg, 1);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004318 sg_set_buf(&sg, peers_ch, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004319
4320 rv = crypto_hash_digest(&desc, &sg, sg.length, response);
4321 if (rv) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004322 conn_err(tconn, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004323 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004324 goto fail;
4325 }
4326
Andreas Gruenbacherce9879c2011-03-15 23:34:29 +01004327 rv = !conn_send_cmd2(tconn, P_AUTH_RESPONSE, response, resp_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004328 if (!rv)
4329 goto fail;
4330
Philipp Reisner13e60372011-02-08 09:54:40 +01004331 rv = drbd_recv_header(tconn, &pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004332 if (!rv)
4333 goto fail;
4334
Philipp Reisner77351055b2011-02-07 17:24:26 +01004335 if (pi.cmd != P_AUTH_RESPONSE) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004336 conn_err(tconn, "expected AuthResponse packet, received: %s (0x%04x)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004337 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004338 rv = 0;
4339 goto fail;
4340 }
4341
Philipp Reisner77351055b2011-02-07 17:24:26 +01004342 if (pi.size != resp_size) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004343 conn_err(tconn, "expected AuthResponse payload of wrong size\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004344 rv = 0;
4345 goto fail;
4346 }
4347
Philipp Reisner13e60372011-02-08 09:54:40 +01004348 rv = drbd_recv(tconn, response , resp_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004349
4350 if (rv != resp_size) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01004351 if (!signal_pending(current))
Philipp Reisner13e60372011-02-08 09:54:40 +01004352 conn_warn(tconn, "short read receiving AuthResponse: l=%u\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004353 rv = 0;
4354 goto fail;
4355 }
4356
4357 right_response = kmalloc(resp_size, GFP_NOIO);
Julia Lawall2d1ee872009-12-27 22:27:11 +01004358 if (right_response == NULL) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004359 conn_err(tconn, "kmalloc of right_response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004360 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004361 goto fail;
4362 }
4363
4364 sg_set_buf(&sg, my_challenge, CHALLENGE_LEN);
4365
4366 rv = crypto_hash_digest(&desc, &sg, sg.length, right_response);
4367 if (rv) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004368 conn_err(tconn, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004369 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004370 goto fail;
4371 }
4372
4373 rv = !memcmp(response, right_response, resp_size);
4374
4375 if (rv)
Philipp Reisner13e60372011-02-08 09:54:40 +01004376 conn_info(tconn, "Peer authenticated using %d bytes of '%s' HMAC\n",
4377 resp_size, tconn->net_conf->cram_hmac_alg);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004378 else
4379 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004380
4381 fail:
4382 kfree(peers_ch);
4383 kfree(response);
4384 kfree(right_response);
4385
4386 return rv;
4387}
4388#endif
4389
4390int drbdd_init(struct drbd_thread *thi)
4391{
Philipp Reisner392c8802011-02-09 10:33:31 +01004392 struct drbd_tconn *tconn = thi->tconn;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004393 int h;
4394
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004395 conn_info(tconn, "receiver (re)started\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004396
4397 do {
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004398 h = drbd_connect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004399 if (h == 0) {
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004400 drbd_disconnect(tconn);
Philipp Reisner20ee6392011-01-18 15:28:59 +01004401 schedule_timeout_interruptible(HZ);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004402 }
4403 if (h == -1) {
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004404 conn_warn(tconn, "Discarding network configuration.\n");
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004405 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004406 }
4407 } while (h == 0);
4408
4409 if (h > 0) {
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004410 if (get_net_conf(tconn)) {
4411 drbdd(tconn);
4412 put_net_conf(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004413 }
4414 }
4415
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004416 drbd_disconnect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004417
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004418 conn_info(tconn, "receiver terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004419 return 0;
4420}
4421
4422/* ********* acknowledge sender ******** */
4423
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004424static int got_conn_RqSReply(struct drbd_tconn *tconn, enum drbd_packet cmd)
4425{
4426 struct p_req_state_reply *p = &tconn->meta.rbuf.req_state_reply;
4427 int retcode = be32_to_cpu(p->retcode);
4428
4429 if (retcode >= SS_SUCCESS) {
4430 set_bit(CONN_WD_ST_CHG_OKAY, &tconn->flags);
4431 } else {
4432 set_bit(CONN_WD_ST_CHG_FAIL, &tconn->flags);
4433 conn_err(tconn, "Requested state change failed by peer: %s (%d)\n",
4434 drbd_set_st_err_str(retcode), retcode);
4435 }
4436 wake_up(&tconn->ping_wait);
4437
4438 return true;
4439}
4440
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004441static int got_RqSReply(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004442{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004443 struct p_req_state_reply *p = &mdev->tconn->meta.rbuf.req_state_reply;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004444 int retcode = be32_to_cpu(p->retcode);
4445
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004446 if (retcode >= SS_SUCCESS) {
4447 set_bit(CL_ST_CHG_SUCCESS, &mdev->flags);
4448 } else {
4449 set_bit(CL_ST_CHG_FAIL, &mdev->flags);
4450 dev_err(DEV, "Requested state change failed by peer: %s (%d)\n",
4451 drbd_set_st_err_str(retcode), retcode);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004452 }
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004453 wake_up(&mdev->state_wait);
4454
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004455 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004456}
4457
Philipp Reisnerf19e4f82011-03-16 11:21:50 +01004458static int got_Ping(struct drbd_tconn *tconn, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004459{
Philipp Reisnerf19e4f82011-03-16 11:21:50 +01004460 return drbd_send_ping_ack(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004461
4462}
4463
Philipp Reisnerf19e4f82011-03-16 11:21:50 +01004464static int got_PingAck(struct drbd_tconn *tconn, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004465{
4466 /* restore idle timeout */
Philipp Reisner2a67d8b2011-02-09 14:10:32 +01004467 tconn->meta.socket->sk->sk_rcvtimeo = tconn->net_conf->ping_int*HZ;
4468 if (!test_and_set_bit(GOT_PING_ACK, &tconn->flags))
4469 wake_up(&tconn->ping_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004470
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004471 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004472}
4473
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004474static int got_IsInSync(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004475{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004476 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004477 sector_t sector = be64_to_cpu(p->sector);
4478 int blksize = be32_to_cpu(p->blksize);
4479
Philipp Reisner31890f42011-01-19 14:12:51 +01004480 D_ASSERT(mdev->tconn->agreed_pro_version >= 89);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004481
4482 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4483
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004484 if (get_ldev(mdev)) {
4485 drbd_rs_complete_io(mdev, sector);
4486 drbd_set_in_sync(mdev, sector, blksize);
4487 /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
4488 mdev->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
4489 put_ldev(mdev);
4490 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004491 dec_rs_pending(mdev);
Philipp Reisner778f2712010-07-06 11:14:00 +02004492 atomic_add(blksize >> 9, &mdev->rs_sect_in);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004493
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004494 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004495}
4496
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004497static int
4498validate_req_change_req_state(struct drbd_conf *mdev, u64 id, sector_t sector,
4499 struct rb_root *root, const char *func,
4500 enum drbd_req_event what, bool missing_ok)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004501{
4502 struct drbd_request *req;
4503 struct bio_and_error m;
4504
Philipp Reisner87eeee42011-01-19 14:16:30 +01004505 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004506 req = find_request(mdev, root, id, sector, missing_ok, func);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004507 if (unlikely(!req)) {
Philipp Reisner87eeee42011-01-19 14:16:30 +01004508 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004509 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004510 }
4511 __req_mod(req, what, &m);
Philipp Reisner87eeee42011-01-19 14:16:30 +01004512 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004513
4514 if (m.bio)
4515 complete_master_bio(mdev, &m);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004516 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004517}
4518
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004519static int got_BlockAck(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004520{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004521 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004522 sector_t sector = be64_to_cpu(p->sector);
4523 int blksize = be32_to_cpu(p->blksize);
4524 enum drbd_req_event what;
4525
4526 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4527
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01004528 if (p->block_id == ID_SYNCER) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004529 drbd_set_in_sync(mdev, sector, blksize);
4530 dec_rs_pending(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004531 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004532 }
Philipp Reisner257d0af2011-01-26 12:15:29 +01004533 switch (cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004534 case P_RS_WRITE_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004535 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004536 what = WRITE_ACKED_BY_PEER_AND_SIS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004537 break;
4538 case P_WRITE_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004539 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004540 what = WRITE_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004541 break;
4542 case P_RECV_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004543 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_B);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004544 what = RECV_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004545 break;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01004546 case P_DISCARD_WRITE:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004547 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01004548 what = DISCARD_WRITE;
4549 break;
4550 case P_RETRY_WRITE:
4551 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
4552 what = POSTPONE_WRITE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004553 break;
4554 default:
4555 D_ASSERT(0);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004556 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004557 }
4558
4559 return validate_req_change_req_state(mdev, p->block_id, sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004560 &mdev->write_requests, __func__,
4561 what, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004562}
4563
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004564static int got_NegAck(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004565{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004566 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004567 sector_t sector = be64_to_cpu(p->sector);
Philipp Reisner2deb8332011-01-17 18:39:18 +01004568 int size = be32_to_cpu(p->blksize);
Philipp Reisner89e58e72011-01-19 13:12:45 +01004569 bool missing_ok = mdev->tconn->net_conf->wire_protocol == DRBD_PROT_A ||
4570 mdev->tconn->net_conf->wire_protocol == DRBD_PROT_B;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004571 bool found;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004572
4573 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4574
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01004575 if (p->block_id == ID_SYNCER) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004576 dec_rs_pending(mdev);
4577 drbd_rs_failed_io(mdev, sector, size);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004578 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004579 }
Philipp Reisner2deb8332011-01-17 18:39:18 +01004580
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004581 found = validate_req_change_req_state(mdev, p->block_id, sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004582 &mdev->write_requests, __func__,
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004583 NEG_ACKED, missing_ok);
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004584 if (!found) {
4585 /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs.
4586 The master bio might already be completed, therefore the
4587 request is no longer in the collision hash. */
4588 /* In Protocol B we might already have got a P_RECV_ACK
4589 but then get a P_NEG_ACK afterwards. */
4590 if (!missing_ok)
Philipp Reisner2deb8332011-01-17 18:39:18 +01004591 return false;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004592 drbd_set_out_of_sync(mdev, sector, size);
Philipp Reisner2deb8332011-01-17 18:39:18 +01004593 }
Philipp Reisner2deb8332011-01-17 18:39:18 +01004594 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004595}
4596
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004597static int got_NegDReply(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004598{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004599 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004600 sector_t sector = be64_to_cpu(p->sector);
4601
4602 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01004603
Philipp Reisnerb411b362009-09-25 16:07:19 -07004604 dev_err(DEV, "Got NegDReply; Sector %llus, len %u; Fail original request.\n",
4605 (unsigned long long)sector, be32_to_cpu(p->blksize));
4606
4607 return validate_req_change_req_state(mdev, p->block_id, sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004608 &mdev->read_requests, __func__,
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004609 NEG_ACKED, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004610}
4611
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004612static int got_NegRSDReply(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004613{
4614 sector_t sector;
4615 int size;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004616 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004617
4618 sector = be64_to_cpu(p->sector);
4619 size = be32_to_cpu(p->blksize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004620
4621 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4622
4623 dec_rs_pending(mdev);
4624
4625 if (get_ldev_if_state(mdev, D_FAILED)) {
4626 drbd_rs_complete_io(mdev, sector);
Philipp Reisner257d0af2011-01-26 12:15:29 +01004627 switch (cmd) {
Philipp Reisnerd612d302010-12-27 10:53:28 +01004628 case P_NEG_RS_DREPLY:
4629 drbd_rs_failed_io(mdev, sector, size);
4630 case P_RS_CANCEL:
4631 break;
4632 default:
4633 D_ASSERT(0);
4634 put_ldev(mdev);
4635 return false;
4636 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004637 put_ldev(mdev);
4638 }
4639
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004640 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004641}
4642
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004643static int got_BarrierAck(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004644{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004645 struct p_barrier_ack *p = &mdev->tconn->meta.rbuf.barrier_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004646
Philipp Reisner2f5cdd02011-02-21 14:29:27 +01004647 tl_release(mdev->tconn, p->barrier, be32_to_cpu(p->set_size));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004648
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02004649 if (mdev->state.conn == C_AHEAD &&
4650 atomic_read(&mdev->ap_in_flight) == 0 &&
Philipp Reisner370a43e2011-01-14 16:03:11 +01004651 !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &mdev->current_epoch->flags)) {
4652 mdev->start_resync_timer.expires = jiffies + HZ;
4653 add_timer(&mdev->start_resync_timer);
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02004654 }
4655
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004656 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004657}
4658
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004659static int got_OVResult(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004660{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004661 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004662 struct drbd_work *w;
4663 sector_t sector;
4664 int size;
4665
4666 sector = be64_to_cpu(p->sector);
4667 size = be32_to_cpu(p->blksize);
4668
4669 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4670
4671 if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
4672 drbd_ov_oos_found(mdev, sector, size);
4673 else
4674 ov_oos_print(mdev);
4675
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004676 if (!get_ldev(mdev))
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004677 return true;
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004678
Philipp Reisnerb411b362009-09-25 16:07:19 -07004679 drbd_rs_complete_io(mdev, sector);
4680 dec_rs_pending(mdev);
4681
Lars Ellenbergea5442a2010-11-05 09:48:01 +01004682 --mdev->ov_left;
4683
4684 /* let's advance progress step marks only for every other megabyte */
4685 if ((mdev->ov_left & 0x200) == 0x200)
4686 drbd_advance_rs_marks(mdev, mdev->ov_left);
4687
4688 if (mdev->ov_left == 0) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004689 w = kmalloc(sizeof(*w), GFP_NOIO);
4690 if (w) {
4691 w->cb = w_ov_finished;
Philipp Reisnera21e9292011-02-08 15:08:49 +01004692 w->mdev = mdev;
Philipp Reisnere42325a2011-01-19 13:55:45 +01004693 drbd_queue_work_front(&mdev->tconn->data.work, w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004694 } else {
4695 dev_err(DEV, "kmalloc(w) failed.");
4696 ov_oos_print(mdev);
4697 drbd_resync_finished(mdev);
4698 }
4699 }
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004700 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004701 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004702}
4703
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004704static int got_skip(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisner0ced55a2010-04-30 15:26:20 +02004705{
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004706 return true;
Philipp Reisner0ced55a2010-04-30 15:26:20 +02004707}
4708
Philipp Reisner32862ec2011-02-08 16:41:01 +01004709static int tconn_process_done_ee(struct drbd_tconn *tconn)
4710{
Philipp Reisner082a3432011-03-15 16:05:42 +01004711 struct drbd_conf *mdev;
4712 int i, not_empty = 0;
Philipp Reisner32862ec2011-02-08 16:41:01 +01004713
4714 do {
4715 clear_bit(SIGNAL_ASENDER, &tconn->flags);
4716 flush_signals(current);
Philipp Reisner082a3432011-03-15 16:05:42 +01004717 idr_for_each_entry(&tconn->volumes, mdev, i) {
Andreas Gruenbachere2b30322011-03-16 17:16:12 +01004718 if (drbd_process_done_ee(mdev))
Philipp Reisner082a3432011-03-15 16:05:42 +01004719 return 1; /* error */
4720 }
Philipp Reisner32862ec2011-02-08 16:41:01 +01004721 set_bit(SIGNAL_ASENDER, &tconn->flags);
Philipp Reisner082a3432011-03-15 16:05:42 +01004722
4723 spin_lock_irq(&tconn->req_lock);
4724 idr_for_each_entry(&tconn->volumes, mdev, i) {
4725 not_empty = !list_empty(&mdev->done_ee);
4726 if (not_empty)
4727 break;
4728 }
4729 spin_unlock_irq(&tconn->req_lock);
Philipp Reisner32862ec2011-02-08 16:41:01 +01004730 } while (not_empty);
4731
4732 return 0;
4733}
4734
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01004735struct asender_cmd {
4736 size_t pkt_size;
Philipp Reisnera4fbda82011-03-16 11:13:17 +01004737 enum mdev_or_conn fa_type; /* first argument's type */
4738 union {
4739 int (*mdev_fn)(struct drbd_conf *mdev, enum drbd_packet cmd);
4740 int (*conn_fn)(struct drbd_tconn *tconn, enum drbd_packet cmd);
4741 };
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01004742};
4743
4744static struct asender_cmd asender_tbl[] = {
Philipp Reisnerf19e4f82011-03-16 11:21:50 +01004745 [P_PING] = { sizeof(struct p_header), CONN, { .conn_fn = got_Ping } },
4746 [P_PING_ACK] = { sizeof(struct p_header), CONN, { .conn_fn = got_PingAck } },
Philipp Reisnera4fbda82011-03-16 11:13:17 +01004747 [P_RECV_ACK] = { sizeof(struct p_block_ack), MDEV, { got_BlockAck } },
4748 [P_WRITE_ACK] = { sizeof(struct p_block_ack), MDEV, { got_BlockAck } },
4749 [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), MDEV, { got_BlockAck } },
4750 [P_DISCARD_WRITE] = { sizeof(struct p_block_ack), MDEV, { got_BlockAck } },
4751 [P_NEG_ACK] = { sizeof(struct p_block_ack), MDEV, { got_NegAck } },
4752 [P_NEG_DREPLY] = { sizeof(struct p_block_ack), MDEV, { got_NegDReply } },
4753 [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), MDEV, { got_NegRSDReply } },
4754 [P_OV_RESULT] = { sizeof(struct p_block_ack), MDEV, { got_OVResult } },
4755 [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), MDEV, { got_BarrierAck } },
4756 [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), MDEV, { got_RqSReply } },
4757 [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), MDEV, { got_IsInSync } },
4758 [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), MDEV, { got_skip } },
4759 [P_RS_CANCEL] = { sizeof(struct p_block_ack), MDEV, { got_NegRSDReply } },
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004760 [P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), CONN, {.conn_fn = got_conn_RqSReply}},
Philipp Reisnera4fbda82011-03-16 11:13:17 +01004761 [P_RETRY_WRITE] = { sizeof(struct p_block_ack), MDEV, { got_BlockAck } },
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01004762};
4763
Philipp Reisnerb411b362009-09-25 16:07:19 -07004764int drbd_asender(struct drbd_thread *thi)
4765{
Philipp Reisner392c8802011-02-09 10:33:31 +01004766 struct drbd_tconn *tconn = thi->tconn;
Philipp Reisner32862ec2011-02-08 16:41:01 +01004767 struct p_header *h = &tconn->meta.rbuf.header;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004768 struct asender_cmd *cmd = NULL;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004769 struct packet_info pi;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004770 int rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004771 void *buf = h;
4772 int received = 0;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004773 int expect = sizeof(struct p_header);
Lars Ellenbergf36af182011-03-09 22:44:55 +01004774 int ping_timeout_active = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004775
Philipp Reisnerb411b362009-09-25 16:07:19 -07004776 current->policy = SCHED_RR; /* Make this a realtime task! */
4777 current->rt_priority = 2; /* more important than all other tasks */
4778
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +01004779 while (get_t_state(thi) == RUNNING) {
Philipp Reisner80822282011-02-08 12:46:30 +01004780 drbd_thread_current_set_cpu(thi);
Philipp Reisner32862ec2011-02-08 16:41:01 +01004781 if (test_and_clear_bit(SEND_PING, &tconn->flags)) {
Philipp Reisner2a67d8b2011-02-09 14:10:32 +01004782 if (!drbd_send_ping(tconn)) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004783 conn_err(tconn, "drbd_send_ping has failed\n");
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01004784 goto reconnect;
4785 }
Philipp Reisner32862ec2011-02-08 16:41:01 +01004786 tconn->meta.socket->sk->sk_rcvtimeo =
4787 tconn->net_conf->ping_timeo*HZ/10;
Lars Ellenbergf36af182011-03-09 22:44:55 +01004788 ping_timeout_active = 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004789 }
4790
Philipp Reisner32862ec2011-02-08 16:41:01 +01004791 /* TODO: conditionally cork; it may hurt latency if we cork without
4792 much to send */
4793 if (!tconn->net_conf->no_cork)
4794 drbd_tcp_cork(tconn->meta.socket);
Philipp Reisner082a3432011-03-15 16:05:42 +01004795 if (tconn_process_done_ee(tconn)) {
4796 conn_err(tconn, "tconn_process_done_ee() failed\n");
Philipp Reisner32862ec2011-02-08 16:41:01 +01004797 goto reconnect;
Philipp Reisner082a3432011-03-15 16:05:42 +01004798 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004799 /* but unconditionally uncork unless disabled */
Philipp Reisner32862ec2011-02-08 16:41:01 +01004800 if (!tconn->net_conf->no_cork)
4801 drbd_tcp_uncork(tconn->meta.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004802
4803 /* short circuit, recv_msg would return EINTR anyways. */
4804 if (signal_pending(current))
4805 continue;
4806
Philipp Reisner32862ec2011-02-08 16:41:01 +01004807 rv = drbd_recv_short(tconn->meta.socket, buf, expect-received, 0);
4808 clear_bit(SIGNAL_ASENDER, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004809
4810 flush_signals(current);
4811
4812 /* Note:
4813 * -EINTR (on meta) we got a signal
4814 * -EAGAIN (on meta) rcvtimeo expired
4815 * -ECONNRESET other side closed the connection
4816 * -ERESTARTSYS (on data) we got a signal
4817 * rv < 0 other than above: unexpected error!
4818 * rv == expected: full header or command
4819 * rv < expected: "woken" by signal during receive
4820 * rv == 0 : "connection shut down by peer"
4821 */
4822 if (likely(rv > 0)) {
4823 received += rv;
4824 buf += rv;
4825 } else if (rv == 0) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004826 conn_err(tconn, "meta connection shut down by peer.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004827 goto reconnect;
4828 } else if (rv == -EAGAIN) {
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02004829 /* If the data socket received something meanwhile,
4830 * that is good enough: peer is still alive. */
Philipp Reisner32862ec2011-02-08 16:41:01 +01004831 if (time_after(tconn->last_received,
4832 jiffies - tconn->meta.socket->sk->sk_rcvtimeo))
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02004833 continue;
Lars Ellenbergf36af182011-03-09 22:44:55 +01004834 if (ping_timeout_active) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004835 conn_err(tconn, "PingAck did not arrive in time.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004836 goto reconnect;
4837 }
Philipp Reisner32862ec2011-02-08 16:41:01 +01004838 set_bit(SEND_PING, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004839 continue;
4840 } else if (rv == -EINTR) {
4841 continue;
4842 } else {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004843 conn_err(tconn, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004844 goto reconnect;
4845 }
4846
4847 if (received == expect && cmd == NULL) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004848 if (!decode_header(tconn, h, &pi))
Philipp Reisnerb411b362009-09-25 16:07:19 -07004849 goto reconnect;
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01004850 cmd = &asender_tbl[pi.cmd];
4851 if (pi.cmd >= ARRAY_SIZE(asender_tbl) || !cmd) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004852 conn_err(tconn, "unknown command %d on meta (l: %d)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004853 pi.cmd, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004854 goto disconnect;
4855 }
4856 expect = cmd->pkt_size;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004857 if (pi.size != expect - sizeof(struct p_header)) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004858 conn_err(tconn, "Wrong packet size on meta (c: %d, l: %d)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004859 pi.cmd, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004860 goto reconnect;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004861 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004862 }
4863 if (received == expect) {
Philipp Reisnera4fbda82011-03-16 11:13:17 +01004864 bool rv;
4865
4866 if (cmd->fa_type == CONN) {
4867 rv = cmd->conn_fn(tconn, pi.cmd);
4868 } else {
4869 struct drbd_conf *mdev = vnr_to_mdev(tconn, pi.vnr);
4870 rv = cmd->mdev_fn(mdev, pi.cmd);
4871 }
4872
4873 if (!rv)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004874 goto reconnect;
4875
Philipp Reisnera4fbda82011-03-16 11:13:17 +01004876 tconn->last_received = jiffies;
4877
Lars Ellenbergf36af182011-03-09 22:44:55 +01004878 /* the idle_timeout (ping-int)
4879 * has been restored in got_PingAck() */
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01004880 if (cmd == &asender_tbl[P_PING_ACK])
Lars Ellenbergf36af182011-03-09 22:44:55 +01004881 ping_timeout_active = 0;
4882
Philipp Reisnerb411b362009-09-25 16:07:19 -07004883 buf = h;
4884 received = 0;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004885 expect = sizeof(struct p_header);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004886 cmd = NULL;
4887 }
4888 }
4889
4890 if (0) {
4891reconnect:
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004892 conn_request_state(tconn, NS(conn, C_NETWORK_FAILURE), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004893 }
4894 if (0) {
4895disconnect:
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004896 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004897 }
Philipp Reisner32862ec2011-02-08 16:41:01 +01004898 clear_bit(SIGNAL_ASENDER, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004899
Philipp Reisner32862ec2011-02-08 16:41:01 +01004900 conn_info(tconn, "asender terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004901
4902 return 0;
4903}