blob: ba350bd562c698b610918d8996f9851961abd1ae [file] [log] [blame]
Philipp Reisnerb411b362009-09-25 16:07:19 -07001/*
2 drbd_receiver.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23 */
24
25
Philipp Reisnerb411b362009-09-25 16:07:19 -070026#include <linux/module.h>
27
28#include <asm/uaccess.h>
29#include <net/sock.h>
30
Philipp Reisnerb411b362009-09-25 16:07:19 -070031#include <linux/drbd.h>
32#include <linux/fs.h>
33#include <linux/file.h>
34#include <linux/in.h>
35#include <linux/mm.h>
36#include <linux/memcontrol.h>
37#include <linux/mm_inline.h>
38#include <linux/slab.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070039#include <linux/pkt_sched.h>
40#define __KERNEL_SYSCALLS__
41#include <linux/unistd.h>
42#include <linux/vmalloc.h>
43#include <linux/random.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070044#include <linux/string.h>
45#include <linux/scatterlist.h>
46#include "drbd_int.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070047#include "drbd_req.h"
48
49#include "drbd_vli.h"
50
Philipp Reisner77351055b2011-02-07 17:24:26 +010051struct packet_info {
52 enum drbd_packet cmd;
53 int size;
54 int vnr;
55};
56
Philipp Reisnerb411b362009-09-25 16:07:19 -070057enum finish_epoch {
58 FE_STILL_LIVE,
59 FE_DESTROYED,
60 FE_RECYCLED,
61};
62
Philipp Reisnera4fbda82011-03-16 11:13:17 +010063enum mdev_or_conn {
64 MDEV,
65 CONN,
66};
67
Philipp Reisner65d11ed2011-02-07 17:35:59 +010068static int drbd_do_handshake(struct drbd_tconn *tconn);
Philipp Reisner13e60372011-02-08 09:54:40 +010069static int drbd_do_auth(struct drbd_tconn *tconn);
Philipp Reisner360cc742011-02-08 14:29:53 +010070static int drbd_disconnected(int vnr, void *p, void *data);
Philipp Reisnerb411b362009-09-25 16:07:19 -070071
72static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *, struct drbd_epoch *, enum epoch_event);
Philipp Reisner00d56942011-02-09 18:09:48 +010073static int e_end_block(struct drbd_work *, int);
Philipp Reisnerb411b362009-09-25 16:07:19 -070074
Philipp Reisnerb411b362009-09-25 16:07:19 -070075
76#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
77
Lars Ellenberg45bb9122010-05-14 17:10:48 +020078/*
79 * some helper functions to deal with single linked page lists,
80 * page->private being our "next" pointer.
81 */
82
83/* If at least n pages are linked at head, get n pages off.
84 * Otherwise, don't modify head, and return NULL.
85 * Locking is the responsibility of the caller.
86 */
87static struct page *page_chain_del(struct page **head, int n)
88{
89 struct page *page;
90 struct page *tmp;
91
92 BUG_ON(!n);
93 BUG_ON(!head);
94
95 page = *head;
Philipp Reisner23ce4222010-05-20 13:35:31 +020096
97 if (!page)
98 return NULL;
99
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200100 while (page) {
101 tmp = page_chain_next(page);
102 if (--n == 0)
103 break; /* found sufficient pages */
104 if (tmp == NULL)
105 /* insufficient pages, don't use any of them. */
106 return NULL;
107 page = tmp;
108 }
109
110 /* add end of list marker for the returned list */
111 set_page_private(page, 0);
112 /* actual return value, and adjustment of head */
113 page = *head;
114 *head = tmp;
115 return page;
116}
117
118/* may be used outside of locks to find the tail of a (usually short)
119 * "private" page chain, before adding it back to a global chain head
120 * with page_chain_add() under a spinlock. */
121static struct page *page_chain_tail(struct page *page, int *len)
122{
123 struct page *tmp;
124 int i = 1;
125 while ((tmp = page_chain_next(page)))
126 ++i, page = tmp;
127 if (len)
128 *len = i;
129 return page;
130}
131
132static int page_chain_free(struct page *page)
133{
134 struct page *tmp;
135 int i = 0;
136 page_chain_for_each_safe(page, tmp) {
137 put_page(page);
138 ++i;
139 }
140 return i;
141}
142
143static void page_chain_add(struct page **head,
144 struct page *chain_first, struct page *chain_last)
145{
146#if 1
147 struct page *tmp;
148 tmp = page_chain_tail(chain_first, NULL);
149 BUG_ON(tmp != chain_last);
150#endif
151
152 /* add chain to head */
153 set_page_private(chain_last, (unsigned long)*head);
154 *head = chain_first;
155}
156
157static struct page *drbd_pp_first_pages_or_try_alloc(struct drbd_conf *mdev, int number)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700158{
159 struct page *page = NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200160 struct page *tmp = NULL;
161 int i = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700162
163 /* Yes, testing drbd_pp_vacant outside the lock is racy.
164 * So what. It saves a spin_lock. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200165 if (drbd_pp_vacant >= number) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700166 spin_lock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200167 page = page_chain_del(&drbd_pp_pool, number);
168 if (page)
169 drbd_pp_vacant -= number;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700170 spin_unlock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200171 if (page)
172 return page;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700173 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200174
Philipp Reisnerb411b362009-09-25 16:07:19 -0700175 /* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD
176 * "criss-cross" setup, that might cause write-out on some other DRBD,
177 * which in turn might block on the other node at this very place. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200178 for (i = 0; i < number; i++) {
179 tmp = alloc_page(GFP_TRY);
180 if (!tmp)
181 break;
182 set_page_private(tmp, (unsigned long)page);
183 page = tmp;
184 }
185
186 if (i == number)
187 return page;
188
189 /* Not enough pages immediately available this time.
190 * No need to jump around here, drbd_pp_alloc will retry this
191 * function "soon". */
192 if (page) {
193 tmp = page_chain_tail(page, NULL);
194 spin_lock(&drbd_pp_lock);
195 page_chain_add(&drbd_pp_pool, page, tmp);
196 drbd_pp_vacant += i;
197 spin_unlock(&drbd_pp_lock);
198 }
199 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700200}
201
Philipp Reisnerb411b362009-09-25 16:07:19 -0700202static void reclaim_net_ee(struct drbd_conf *mdev, struct list_head *to_be_freed)
203{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100204 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700205 struct list_head *le, *tle;
206
207 /* The EEs are always appended to the end of the list. Since
208 they are sent in order over the wire, they have to finish
209 in order. As soon as we see the first not finished we can
210 stop to examine the list... */
211
212 list_for_each_safe(le, tle, &mdev->net_ee) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100213 peer_req = list_entry(le, struct drbd_peer_request, w.list);
214 if (drbd_ee_has_active_page(peer_req))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700215 break;
216 list_move(le, to_be_freed);
217 }
218}
219
220static void drbd_kick_lo_and_reclaim_net(struct drbd_conf *mdev)
221{
222 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100223 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700224
Philipp Reisner87eeee42011-01-19 14:16:30 +0100225 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700226 reclaim_net_ee(mdev, &reclaimed);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100227 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700228
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100229 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
230 drbd_free_net_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700231}
232
233/**
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200234 * drbd_pp_alloc() - Returns @number pages, retries forever (or until signalled)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700235 * @mdev: DRBD device.
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200236 * @number: number of pages requested
237 * @retry: whether to retry, if not enough pages are available right now
Philipp Reisnerb411b362009-09-25 16:07:19 -0700238 *
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200239 * Tries to allocate number pages, first from our own page pool, then from
240 * the kernel, unless this allocation would exceed the max_buffers setting.
241 * Possibly retry until DRBD frees sufficient pages somewhere else.
242 *
243 * Returns a page chain linked via page->private.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700244 */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200245static struct page *drbd_pp_alloc(struct drbd_conf *mdev, unsigned number, bool retry)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700246{
247 struct page *page = NULL;
248 DEFINE_WAIT(wait);
249
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200250 /* Yes, we may run up to @number over max_buffers. If we
251 * follow it strictly, the admin will get it wrong anyways. */
Philipp Reisner89e58e72011-01-19 13:12:45 +0100252 if (atomic_read(&mdev->pp_in_use) < mdev->tconn->net_conf->max_buffers)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200253 page = drbd_pp_first_pages_or_try_alloc(mdev, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700254
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200255 while (page == NULL) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700256 prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
257
258 drbd_kick_lo_and_reclaim_net(mdev);
259
Philipp Reisner89e58e72011-01-19 13:12:45 +0100260 if (atomic_read(&mdev->pp_in_use) < mdev->tconn->net_conf->max_buffers) {
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200261 page = drbd_pp_first_pages_or_try_alloc(mdev, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700262 if (page)
263 break;
264 }
265
266 if (!retry)
267 break;
268
269 if (signal_pending(current)) {
270 dev_warn(DEV, "drbd_pp_alloc interrupted!\n");
271 break;
272 }
273
274 schedule();
275 }
276 finish_wait(&drbd_pp_wait, &wait);
277
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200278 if (page)
279 atomic_add(number, &mdev->pp_in_use);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700280 return page;
281}
282
283/* Must not be used from irq, as that may deadlock: see drbd_pp_alloc.
Philipp Reisner87eeee42011-01-19 14:16:30 +0100284 * Is also used from inside an other spin_lock_irq(&mdev->tconn->req_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200285 * Either links the page chain back to the global pool,
286 * or returns all pages to the system. */
Lars Ellenberg435f0742010-09-06 12:30:25 +0200287static void drbd_pp_free(struct drbd_conf *mdev, struct page *page, int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700288{
Lars Ellenberg435f0742010-09-06 12:30:25 +0200289 atomic_t *a = is_net ? &mdev->pp_in_use_by_net : &mdev->pp_in_use;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700290 int i;
Lars Ellenberg435f0742010-09-06 12:30:25 +0200291
Philipp Reisner81a5d602011-02-22 19:53:16 -0500292 if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200293 i = page_chain_free(page);
294 else {
295 struct page *tmp;
296 tmp = page_chain_tail(page, &i);
297 spin_lock(&drbd_pp_lock);
298 page_chain_add(&drbd_pp_pool, page, tmp);
299 drbd_pp_vacant += i;
300 spin_unlock(&drbd_pp_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700301 }
Lars Ellenberg435f0742010-09-06 12:30:25 +0200302 i = atomic_sub_return(i, a);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200303 if (i < 0)
Lars Ellenberg435f0742010-09-06 12:30:25 +0200304 dev_warn(DEV, "ASSERTION FAILED: %s: %d < 0\n",
305 is_net ? "pp_in_use_by_net" : "pp_in_use", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700306 wake_up(&drbd_pp_wait);
307}
308
309/*
310You need to hold the req_lock:
311 _drbd_wait_ee_list_empty()
312
313You must not have the req_lock:
314 drbd_free_ee()
315 drbd_alloc_ee()
316 drbd_init_ee()
317 drbd_release_ee()
318 drbd_ee_fix_bhs()
319 drbd_process_done_ee()
320 drbd_clear_done_ee()
321 drbd_wait_ee_list_empty()
322*/
323
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100324struct drbd_peer_request *
325drbd_alloc_ee(struct drbd_conf *mdev, u64 id, sector_t sector,
326 unsigned int data_size, gfp_t gfp_mask) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700327{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100328 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700329 struct page *page;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200330 unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700331
Andreas Gruenbacher0cf9d272010-12-07 10:43:29 +0100332 if (drbd_insert_fault(mdev, DRBD_FAULT_AL_EE))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700333 return NULL;
334
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100335 peer_req = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
336 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700337 if (!(gfp_mask & __GFP_NOWARN))
338 dev_err(DEV, "alloc_ee: Allocation of an EE failed\n");
339 return NULL;
340 }
341
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200342 page = drbd_pp_alloc(mdev, nr_pages, (gfp_mask & __GFP_WAIT));
343 if (!page)
344 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700345
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100346 drbd_clear_interval(&peer_req->i);
347 peer_req->i.size = data_size;
348 peer_req->i.sector = sector;
349 peer_req->i.local = false;
350 peer_req->i.waiting = false;
Andreas Gruenbacher53840642011-01-28 10:31:04 +0100351
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100352 peer_req->epoch = NULL;
Philipp Reisnera21e9292011-02-08 15:08:49 +0100353 peer_req->w.mdev = mdev;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100354 peer_req->pages = page;
355 atomic_set(&peer_req->pending_bios, 0);
356 peer_req->flags = 0;
Andreas Gruenbacher9a8e7752011-01-11 14:04:09 +0100357 /*
358 * The block_id is opaque to the receiver. It is not endianness
359 * converted, and sent back to the sender unchanged.
360 */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100361 peer_req->block_id = id;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700362
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100363 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700364
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200365 fail:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100366 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700367 return NULL;
368}
369
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100370void drbd_free_some_ee(struct drbd_conf *mdev, struct drbd_peer_request *peer_req,
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100371 int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700372{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100373 if (peer_req->flags & EE_HAS_DIGEST)
374 kfree(peer_req->digest);
375 drbd_pp_free(mdev, peer_req->pages, is_net);
376 D_ASSERT(atomic_read(&peer_req->pending_bios) == 0);
377 D_ASSERT(drbd_interval_empty(&peer_req->i));
378 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700379}
380
381int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list)
382{
383 LIST_HEAD(work_list);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100384 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700385 int count = 0;
Lars Ellenberg435f0742010-09-06 12:30:25 +0200386 int is_net = list == &mdev->net_ee;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700387
Philipp Reisner87eeee42011-01-19 14:16:30 +0100388 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700389 list_splice_init(list, &work_list);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100390 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700391
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100392 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
393 drbd_free_some_ee(mdev, peer_req, is_net);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700394 count++;
395 }
396 return count;
397}
398
399
Philipp Reisner32862ec2011-02-08 16:41:01 +0100400/* See also comments in _req_mod(,BARRIER_ACKED)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700401 * and receive_Barrier.
402 *
403 * Move entries from net_ee to done_ee, if ready.
404 * Grab done_ee, call all callbacks, free the entries.
405 * The callbacks typically send out ACKs.
406 */
407static int drbd_process_done_ee(struct drbd_conf *mdev)
408{
409 LIST_HEAD(work_list);
410 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100411 struct drbd_peer_request *peer_req, *t;
Philipp Reisner082a3432011-03-15 16:05:42 +0100412 int ok = 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700413
Philipp Reisner87eeee42011-01-19 14:16:30 +0100414 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700415 reclaim_net_ee(mdev, &reclaimed);
416 list_splice_init(&mdev->done_ee, &work_list);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100417 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700418
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100419 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
420 drbd_free_net_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700421
422 /* possible callbacks here:
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +0100423 * e_end_block, and e_end_resync_block, e_send_discard_write.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700424 * all ignore the last argument.
425 */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100426 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700427 /* list_del not necessary, next/prev members not touched */
Philipp Reisner00d56942011-02-09 18:09:48 +0100428 ok = peer_req->w.cb(&peer_req->w, !ok) && ok;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100429 drbd_free_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700430 }
431 wake_up(&mdev->ee_wait);
432
433 return ok;
434}
435
436void _drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head)
437{
438 DEFINE_WAIT(wait);
439
440 /* avoids spin_lock/unlock
441 * and calling prepare_to_wait in the fast path */
442 while (!list_empty(head)) {
443 prepare_to_wait(&mdev->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100444 spin_unlock_irq(&mdev->tconn->req_lock);
Jens Axboe7eaceac2011-03-10 08:52:07 +0100445 io_schedule();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700446 finish_wait(&mdev->ee_wait, &wait);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100447 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700448 }
449}
450
451void drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head)
452{
Philipp Reisner87eeee42011-01-19 14:16:30 +0100453 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700454 _drbd_wait_ee_list_empty(mdev, head);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100455 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700456}
457
458/* see also kernel_accept; which is only present since 2.6.18.
459 * also we want to log which part of it failed, exactly */
Philipp Reisner76536202011-02-07 14:09:54 +0100460static int drbd_accept(const char **what, struct socket *sock, struct socket **newsock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700461{
462 struct sock *sk = sock->sk;
463 int err = 0;
464
465 *what = "listen";
466 err = sock->ops->listen(sock, 5);
467 if (err < 0)
468 goto out;
469
470 *what = "sock_create_lite";
471 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
472 newsock);
473 if (err < 0)
474 goto out;
475
476 *what = "accept";
477 err = sock->ops->accept(sock, *newsock, 0);
478 if (err < 0) {
479 sock_release(*newsock);
480 *newsock = NULL;
481 goto out;
482 }
483 (*newsock)->ops = sock->ops;
484
485out:
486 return err;
487}
488
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100489static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700490{
491 mm_segment_t oldfs;
492 struct kvec iov = {
493 .iov_base = buf,
494 .iov_len = size,
495 };
496 struct msghdr msg = {
497 .msg_iovlen = 1,
498 .msg_iov = (struct iovec *)&iov,
499 .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
500 };
501 int rv;
502
503 oldfs = get_fs();
504 set_fs(KERNEL_DS);
505 rv = sock_recvmsg(sock, &msg, size, msg.msg_flags);
506 set_fs(oldfs);
507
508 return rv;
509}
510
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100511static int drbd_recv(struct drbd_tconn *tconn, void *buf, size_t size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700512{
513 mm_segment_t oldfs;
514 struct kvec iov = {
515 .iov_base = buf,
516 .iov_len = size,
517 };
518 struct msghdr msg = {
519 .msg_iovlen = 1,
520 .msg_iov = (struct iovec *)&iov,
521 .msg_flags = MSG_WAITALL | MSG_NOSIGNAL
522 };
523 int rv;
524
525 oldfs = get_fs();
526 set_fs(KERNEL_DS);
527
528 for (;;) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100529 rv = sock_recvmsg(tconn->data.socket, &msg, size, msg.msg_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700530 if (rv == size)
531 break;
532
533 /* Note:
534 * ECONNRESET other side closed the connection
535 * ERESTARTSYS (on sock) we got a signal
536 */
537
538 if (rv < 0) {
539 if (rv == -ECONNRESET)
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100540 conn_info(tconn, "sock was reset by peer\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700541 else if (rv != -ERESTARTSYS)
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100542 conn_err(tconn, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700543 break;
544 } else if (rv == 0) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100545 conn_info(tconn, "sock was shut down by peer\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700546 break;
547 } else {
548 /* signal came in, or peer/link went down,
549 * after we read a partial message
550 */
551 /* D_ASSERT(signal_pending(current)); */
552 break;
553 }
554 };
555
556 set_fs(oldfs);
557
558 if (rv != size)
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100559 conn_request_state(tconn, NS(conn, C_BROKEN_PIPE), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700560
561 return rv;
562}
563
Lars Ellenberg5dbf1672010-05-25 16:18:01 +0200564/* quoting tcp(7):
565 * On individual connections, the socket buffer size must be set prior to the
566 * listen(2) or connect(2) calls in order to have it take effect.
567 * This is our wrapper to do so.
568 */
569static void drbd_setbufsize(struct socket *sock, unsigned int snd,
570 unsigned int rcv)
571{
572 /* open coded SO_SNDBUF, SO_RCVBUF */
573 if (snd) {
574 sock->sk->sk_sndbuf = snd;
575 sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
576 }
577 if (rcv) {
578 sock->sk->sk_rcvbuf = rcv;
579 sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
580 }
581}
582
Philipp Reisnereac3e992011-02-07 14:05:07 +0100583static struct socket *drbd_try_connect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700584{
585 const char *what;
586 struct socket *sock;
587 struct sockaddr_in6 src_in6;
588 int err;
589 int disconnect_on_error = 1;
590
Philipp Reisnereac3e992011-02-07 14:05:07 +0100591 if (!get_net_conf(tconn))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700592 return NULL;
593
594 what = "sock_create_kern";
Philipp Reisnereac3e992011-02-07 14:05:07 +0100595 err = sock_create_kern(((struct sockaddr *)tconn->net_conf->my_addr)->sa_family,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700596 SOCK_STREAM, IPPROTO_TCP, &sock);
597 if (err < 0) {
598 sock = NULL;
599 goto out;
600 }
601
602 sock->sk->sk_rcvtimeo =
Philipp Reisnereac3e992011-02-07 14:05:07 +0100603 sock->sk->sk_sndtimeo = tconn->net_conf->try_connect_int*HZ;
604 drbd_setbufsize(sock, tconn->net_conf->sndbuf_size,
605 tconn->net_conf->rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700606
607 /* explicitly bind to the configured IP as source IP
608 * for the outgoing connections.
609 * This is needed for multihomed hosts and to be
610 * able to use lo: interfaces for drbd.
611 * Make sure to use 0 as port number, so linux selects
612 * a free one dynamically.
613 */
Philipp Reisnereac3e992011-02-07 14:05:07 +0100614 memcpy(&src_in6, tconn->net_conf->my_addr,
615 min_t(int, tconn->net_conf->my_addr_len, sizeof(src_in6)));
616 if (((struct sockaddr *)tconn->net_conf->my_addr)->sa_family == AF_INET6)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700617 src_in6.sin6_port = 0;
618 else
619 ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
620
621 what = "bind before connect";
622 err = sock->ops->bind(sock,
623 (struct sockaddr *) &src_in6,
Philipp Reisnereac3e992011-02-07 14:05:07 +0100624 tconn->net_conf->my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700625 if (err < 0)
626 goto out;
627
628 /* connect may fail, peer not yet available.
629 * stay C_WF_CONNECTION, don't go Disconnecting! */
630 disconnect_on_error = 0;
631 what = "connect";
632 err = sock->ops->connect(sock,
Philipp Reisnereac3e992011-02-07 14:05:07 +0100633 (struct sockaddr *)tconn->net_conf->peer_addr,
634 tconn->net_conf->peer_addr_len, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700635
636out:
637 if (err < 0) {
638 if (sock) {
639 sock_release(sock);
640 sock = NULL;
641 }
642 switch (-err) {
643 /* timeout, busy, signal pending */
644 case ETIMEDOUT: case EAGAIN: case EINPROGRESS:
645 case EINTR: case ERESTARTSYS:
646 /* peer not (yet) available, network problem */
647 case ECONNREFUSED: case ENETUNREACH:
648 case EHOSTDOWN: case EHOSTUNREACH:
649 disconnect_on_error = 0;
650 break;
651 default:
Philipp Reisnereac3e992011-02-07 14:05:07 +0100652 conn_err(tconn, "%s failed, err = %d\n", what, err);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700653 }
654 if (disconnect_on_error)
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100655 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700656 }
Philipp Reisnereac3e992011-02-07 14:05:07 +0100657 put_net_conf(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700658 return sock;
659}
660
Philipp Reisner76536202011-02-07 14:09:54 +0100661static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700662{
663 int timeo, err;
664 struct socket *s_estab = NULL, *s_listen;
665 const char *what;
666
Philipp Reisner76536202011-02-07 14:09:54 +0100667 if (!get_net_conf(tconn))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700668 return NULL;
669
670 what = "sock_create_kern";
Philipp Reisner76536202011-02-07 14:09:54 +0100671 err = sock_create_kern(((struct sockaddr *)tconn->net_conf->my_addr)->sa_family,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700672 SOCK_STREAM, IPPROTO_TCP, &s_listen);
673 if (err) {
674 s_listen = NULL;
675 goto out;
676 }
677
Philipp Reisner76536202011-02-07 14:09:54 +0100678 timeo = tconn->net_conf->try_connect_int * HZ;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700679 timeo += (random32() & 1) ? timeo / 7 : -timeo / 7; /* 28.5% random jitter */
680
681 s_listen->sk->sk_reuse = 1; /* SO_REUSEADDR */
682 s_listen->sk->sk_rcvtimeo = timeo;
683 s_listen->sk->sk_sndtimeo = timeo;
Philipp Reisner76536202011-02-07 14:09:54 +0100684 drbd_setbufsize(s_listen, tconn->net_conf->sndbuf_size,
685 tconn->net_conf->rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700686
687 what = "bind before listen";
688 err = s_listen->ops->bind(s_listen,
Philipp Reisner76536202011-02-07 14:09:54 +0100689 (struct sockaddr *) tconn->net_conf->my_addr,
690 tconn->net_conf->my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700691 if (err < 0)
692 goto out;
693
Philipp Reisner76536202011-02-07 14:09:54 +0100694 err = drbd_accept(&what, s_listen, &s_estab);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700695
696out:
697 if (s_listen)
698 sock_release(s_listen);
699 if (err < 0) {
700 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
Philipp Reisner76536202011-02-07 14:09:54 +0100701 conn_err(tconn, "%s failed, err = %d\n", what, err);
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100702 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700703 }
704 }
Philipp Reisner76536202011-02-07 14:09:54 +0100705 put_net_conf(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700706
707 return s_estab;
708}
709
Philipp Reisnerd38e7872011-02-07 15:32:04 +0100710static int drbd_send_fp(struct drbd_tconn *tconn, struct socket *sock, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700711{
Philipp Reisnerd38e7872011-02-07 15:32:04 +0100712 struct p_header *h = &tconn->data.sbuf.header;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700713
Philipp Reisnerd38e7872011-02-07 15:32:04 +0100714 return _conn_send_cmd(tconn, 0, sock, cmd, h, sizeof(*h), 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700715}
716
Philipp Reisnera25b63f2011-02-07 15:43:45 +0100717static enum drbd_packet drbd_recv_fp(struct drbd_tconn *tconn, struct socket *sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700718{
Philipp Reisnera25b63f2011-02-07 15:43:45 +0100719 struct p_header80 *h = &tconn->data.rbuf.header.h80;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700720 int rr;
721
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100722 rr = drbd_recv_short(sock, h, sizeof(*h), 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700723
Andreas Gruenbacherca9bc122011-01-11 13:47:24 +0100724 if (rr == sizeof(*h) && h->magic == cpu_to_be32(DRBD_MAGIC))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700725 return be16_to_cpu(h->command);
726
727 return 0xffff;
728}
729
730/**
731 * drbd_socket_okay() - Free the socket if its connection is not okay
Philipp Reisnerb411b362009-09-25 16:07:19 -0700732 * @sock: pointer to the pointer to the socket.
733 */
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100734static int drbd_socket_okay(struct socket **sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700735{
736 int rr;
737 char tb[4];
738
739 if (!*sock)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100740 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700741
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100742 rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700743
744 if (rr > 0 || rr == -EAGAIN) {
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100745 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700746 } else {
747 sock_release(*sock);
748 *sock = NULL;
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100749 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700750 }
751}
Philipp Reisner2325eb62011-03-15 16:56:18 +0100752/* Gets called if a connection is established, or if a new minor gets created
753 in a connection */
754int drbd_connected(int vnr, void *p, void *data)
Philipp Reisner907599e2011-02-08 11:25:37 +0100755{
756 struct drbd_conf *mdev = (struct drbd_conf *)p;
757 int ok = 1;
758
759 atomic_set(&mdev->packet_seq, 0);
760 mdev->peer_seq = 0;
761
Philipp Reisner8410da82011-02-11 20:11:10 +0100762 mdev->state_mutex = mdev->tconn->agreed_pro_version < 100 ?
763 &mdev->tconn->cstate_mutex :
764 &mdev->own_state_mutex;
765
Philipp Reisner907599e2011-02-08 11:25:37 +0100766 ok &= drbd_send_sync_param(mdev, &mdev->sync_conf);
767 ok &= drbd_send_sizes(mdev, 0, 0);
768 ok &= drbd_send_uuids(mdev);
769 ok &= drbd_send_state(mdev);
770 clear_bit(USE_DEGR_WFC_T, &mdev->flags);
771 clear_bit(RESIZE_PENDING, &mdev->flags);
772
Philipp Reisner8410da82011-02-11 20:11:10 +0100773
Philipp Reisner907599e2011-02-08 11:25:37 +0100774 return !ok;
775}
776
Philipp Reisnerb411b362009-09-25 16:07:19 -0700777/*
778 * return values:
779 * 1 yes, we have a valid connection
780 * 0 oops, did not work out, please try again
781 * -1 peer talks different language,
782 * no point in trying again, please go standalone.
783 * -2 We do not have a network config...
784 */
Philipp Reisner907599e2011-02-08 11:25:37 +0100785static int drbd_connect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700786{
787 struct socket *s, *sock, *msock;
788 int try, h, ok;
789
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100790 if (conn_request_state(tconn, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700791 return -2;
792
Philipp Reisner907599e2011-02-08 11:25:37 +0100793 clear_bit(DISCARD_CONCURRENT, &tconn->flags);
794 tconn->agreed_pro_version = 99;
Philipp Reisnerfd340c12011-01-19 16:57:39 +0100795 /* agreed_pro_version must be smaller than 100 so we send the old
796 header (h80) in the first packet and in the handshake packet. */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700797
798 sock = NULL;
799 msock = NULL;
800
801 do {
802 for (try = 0;;) {
803 /* 3 tries, this should take less than a second! */
Philipp Reisner907599e2011-02-08 11:25:37 +0100804 s = drbd_try_connect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700805 if (s || ++try >= 3)
806 break;
807 /* give the other side time to call bind() & listen() */
Philipp Reisner20ee6392011-01-18 15:28:59 +0100808 schedule_timeout_interruptible(HZ / 10);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700809 }
810
811 if (s) {
812 if (!sock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100813 drbd_send_fp(tconn, s, P_HAND_SHAKE_S);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700814 sock = s;
815 s = NULL;
816 } else if (!msock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100817 drbd_send_fp(tconn, s, P_HAND_SHAKE_M);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700818 msock = s;
819 s = NULL;
820 } else {
Philipp Reisner907599e2011-02-08 11:25:37 +0100821 conn_err(tconn, "Logic error in drbd_connect()\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700822 goto out_release_sockets;
823 }
824 }
825
826 if (sock && msock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100827 schedule_timeout_interruptible(tconn->net_conf->ping_timeo*HZ/10);
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100828 ok = drbd_socket_okay(&sock);
829 ok = drbd_socket_okay(&msock) && ok;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700830 if (ok)
831 break;
832 }
833
834retry:
Philipp Reisner907599e2011-02-08 11:25:37 +0100835 s = drbd_wait_for_connect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700836 if (s) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100837 try = drbd_recv_fp(tconn, s);
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100838 drbd_socket_okay(&sock);
839 drbd_socket_okay(&msock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700840 switch (try) {
841 case P_HAND_SHAKE_S:
842 if (sock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100843 conn_warn(tconn, "initial packet S crossed\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700844 sock_release(sock);
845 }
846 sock = s;
847 break;
848 case P_HAND_SHAKE_M:
849 if (msock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100850 conn_warn(tconn, "initial packet M crossed\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700851 sock_release(msock);
852 }
853 msock = s;
Philipp Reisner907599e2011-02-08 11:25:37 +0100854 set_bit(DISCARD_CONCURRENT, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700855 break;
856 default:
Philipp Reisner907599e2011-02-08 11:25:37 +0100857 conn_warn(tconn, "Error receiving initial packet\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700858 sock_release(s);
859 if (random32() & 1)
860 goto retry;
861 }
862 }
863
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100864 if (tconn->cstate <= C_DISCONNECTING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700865 goto out_release_sockets;
866 if (signal_pending(current)) {
867 flush_signals(current);
868 smp_rmb();
Philipp Reisner907599e2011-02-08 11:25:37 +0100869 if (get_t_state(&tconn->receiver) == EXITING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700870 goto out_release_sockets;
871 }
872
873 if (sock && msock) {
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100874 ok = drbd_socket_okay(&sock);
875 ok = drbd_socket_okay(&msock) && ok;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700876 if (ok)
877 break;
878 }
879 } while (1);
880
881 msock->sk->sk_reuse = 1; /* SO_REUSEADDR */
882 sock->sk->sk_reuse = 1; /* SO_REUSEADDR */
883
884 sock->sk->sk_allocation = GFP_NOIO;
885 msock->sk->sk_allocation = GFP_NOIO;
886
887 sock->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
888 msock->sk->sk_priority = TC_PRIO_INTERACTIVE;
889
Philipp Reisnerb411b362009-09-25 16:07:19 -0700890 /* NOT YET ...
Philipp Reisner907599e2011-02-08 11:25:37 +0100891 * sock->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700892 * sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
893 * first set it to the P_HAND_SHAKE timeout,
894 * which we set to 4x the configured ping_timeout. */
895 sock->sk->sk_sndtimeo =
Philipp Reisner907599e2011-02-08 11:25:37 +0100896 sock->sk->sk_rcvtimeo = tconn->net_conf->ping_timeo*4*HZ/10;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700897
Philipp Reisner907599e2011-02-08 11:25:37 +0100898 msock->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10;
899 msock->sk->sk_rcvtimeo = tconn->net_conf->ping_int*HZ;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700900
901 /* we don't want delays.
Lucas De Marchi25985ed2011-03-30 22:57:33 -0300902 * we use TCP_CORK where appropriate, though */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700903 drbd_tcp_nodelay(sock);
904 drbd_tcp_nodelay(msock);
905
Philipp Reisner907599e2011-02-08 11:25:37 +0100906 tconn->data.socket = sock;
907 tconn->meta.socket = msock;
908 tconn->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700909
Philipp Reisner907599e2011-02-08 11:25:37 +0100910 h = drbd_do_handshake(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700911 if (h <= 0)
912 return h;
913
Philipp Reisner907599e2011-02-08 11:25:37 +0100914 if (tconn->cram_hmac_tfm) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700915 /* drbd_request_state(mdev, NS(conn, WFAuth)); */
Philipp Reisner907599e2011-02-08 11:25:37 +0100916 switch (drbd_do_auth(tconn)) {
Johannes Thomab10d96c2010-01-07 16:02:50 +0100917 case -1:
Philipp Reisner907599e2011-02-08 11:25:37 +0100918 conn_err(tconn, "Authentication of peer failed\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700919 return -1;
Johannes Thomab10d96c2010-01-07 16:02:50 +0100920 case 0:
Philipp Reisner907599e2011-02-08 11:25:37 +0100921 conn_err(tconn, "Authentication of peer failed, trying again.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +0100922 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700923 }
924 }
925
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100926 if (conn_request_state(tconn, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE) < SS_SUCCESS)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700927 return 0;
928
Philipp Reisner907599e2011-02-08 11:25:37 +0100929 sock->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700930 sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
931
Philipp Reisner907599e2011-02-08 11:25:37 +0100932 drbd_thread_start(&tconn->asender);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700933
Philipp Reisner907599e2011-02-08 11:25:37 +0100934 if (drbd_send_protocol(tconn) == -1)
Philipp Reisner7e2455c2010-04-22 14:50:23 +0200935 return -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700936
Philipp Reisner907599e2011-02-08 11:25:37 +0100937 return !idr_for_each(&tconn->volumes, drbd_connected, tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700938
939out_release_sockets:
940 if (sock)
941 sock_release(sock);
942 if (msock)
943 sock_release(msock);
944 return -1;
945}
946
Philipp Reisnerce243852011-02-07 17:27:47 +0100947static bool decode_header(struct drbd_tconn *tconn, struct p_header *h, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700948{
Philipp Reisnerfd340c12011-01-19 16:57:39 +0100949 if (h->h80.magic == cpu_to_be32(DRBD_MAGIC)) {
Philipp Reisner77351055b2011-02-07 17:24:26 +0100950 pi->cmd = be16_to_cpu(h->h80.command);
951 pi->size = be16_to_cpu(h->h80.length);
Philipp Reisnereefc2f72011-02-08 12:55:24 +0100952 pi->vnr = 0;
Andreas Gruenbacherca9bc122011-01-11 13:47:24 +0100953 } else if (h->h95.magic == cpu_to_be16(DRBD_MAGIC_BIG)) {
Philipp Reisner77351055b2011-02-07 17:24:26 +0100954 pi->cmd = be16_to_cpu(h->h95.command);
955 pi->size = be32_to_cpu(h->h95.length) & 0x00ffffff;
956 pi->vnr = 0;
Philipp Reisner02918be2010-08-20 14:35:10 +0200957 } else {
Philipp Reisnerce243852011-02-07 17:27:47 +0100958 conn_err(tconn, "magic?? on data m: 0x%08x c: %d l: %d\n",
Lars Ellenberg004352f2010-10-05 20:13:58 +0200959 be32_to_cpu(h->h80.magic),
960 be16_to_cpu(h->h80.command),
961 be16_to_cpu(h->h80.length));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100962 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700963 }
Philipp Reisner257d0af2011-01-26 12:15:29 +0100964 return true;
965}
966
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100967static int drbd_recv_header(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisner257d0af2011-01-26 12:15:29 +0100968{
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100969 struct p_header *h = &tconn->data.rbuf.header;
Philipp Reisner257d0af2011-01-26 12:15:29 +0100970 int r;
971
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100972 r = drbd_recv(tconn, h, sizeof(*h));
Philipp Reisner257d0af2011-01-26 12:15:29 +0100973 if (unlikely(r != sizeof(*h))) {
974 if (!signal_pending(current))
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100975 conn_warn(tconn, "short read expecting header on sock: r=%d\n", r);
Philipp Reisner257d0af2011-01-26 12:15:29 +0100976 return false;
977 }
978
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100979 r = decode_header(tconn, h, pi);
980 tconn->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700981
Philipp Reisner257d0af2011-01-26 12:15:29 +0100982 return r;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700983}
984
Philipp Reisner2451fc32010-08-24 13:43:11 +0200985static void drbd_flush(struct drbd_conf *mdev)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700986{
987 int rv;
988
989 if (mdev->write_ordering >= WO_bdev_flush && get_ldev(mdev)) {
Dmitry Monakhovfbd9b092010-04-28 17:55:06 +0400990 rv = blkdev_issue_flush(mdev->ldev->backing_bdev, GFP_KERNEL,
Christoph Hellwigdd3932e2010-09-16 20:51:46 +0200991 NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700992 if (rv) {
993 dev_err(DEV, "local disk flush failed with status %d\n", rv);
994 /* would rather check on EOPNOTSUPP, but that is not reliable.
995 * don't try again for ANY return value != 0
996 * if (rv == -EOPNOTSUPP) */
997 drbd_bump_write_ordering(mdev, WO_drain_io);
998 }
999 put_ldev(mdev);
1000 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001001}
1002
1003/**
1004 * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it.
1005 * @mdev: DRBD device.
1006 * @epoch: Epoch object.
1007 * @ev: Epoch event.
1008 */
1009static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev,
1010 struct drbd_epoch *epoch,
1011 enum epoch_event ev)
1012{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001013 int epoch_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001014 struct drbd_epoch *next_epoch;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001015 enum finish_epoch rv = FE_STILL_LIVE;
1016
1017 spin_lock(&mdev->epoch_lock);
1018 do {
1019 next_epoch = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001020
1021 epoch_size = atomic_read(&epoch->epoch_size);
1022
1023 switch (ev & ~EV_CLEANUP) {
1024 case EV_PUT:
1025 atomic_dec(&epoch->active);
1026 break;
1027 case EV_GOT_BARRIER_NR:
1028 set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001029 break;
1030 case EV_BECAME_LAST:
1031 /* nothing to do*/
1032 break;
1033 }
1034
Philipp Reisnerb411b362009-09-25 16:07:19 -07001035 if (epoch_size != 0 &&
1036 atomic_read(&epoch->active) == 0 &&
Philipp Reisner2451fc32010-08-24 13:43:11 +02001037 test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001038 if (!(ev & EV_CLEANUP)) {
1039 spin_unlock(&mdev->epoch_lock);
1040 drbd_send_b_ack(mdev, epoch->barrier_nr, epoch_size);
1041 spin_lock(&mdev->epoch_lock);
1042 }
1043 dec_unacked(mdev);
1044
1045 if (mdev->current_epoch != epoch) {
1046 next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
1047 list_del(&epoch->list);
1048 ev = EV_BECAME_LAST | (ev & EV_CLEANUP);
1049 mdev->epochs--;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001050 kfree(epoch);
1051
1052 if (rv == FE_STILL_LIVE)
1053 rv = FE_DESTROYED;
1054 } else {
1055 epoch->flags = 0;
1056 atomic_set(&epoch->epoch_size, 0);
Uwe Kleine-König698f9312010-07-02 20:41:51 +02001057 /* atomic_set(&epoch->active, 0); is already zero */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001058 if (rv == FE_STILL_LIVE)
1059 rv = FE_RECYCLED;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001060 wake_up(&mdev->ee_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001061 }
1062 }
1063
1064 if (!next_epoch)
1065 break;
1066
1067 epoch = next_epoch;
1068 } while (1);
1069
1070 spin_unlock(&mdev->epoch_lock);
1071
Philipp Reisnerb411b362009-09-25 16:07:19 -07001072 return rv;
1073}
1074
1075/**
1076 * drbd_bump_write_ordering() - Fall back to an other write ordering method
1077 * @mdev: DRBD device.
1078 * @wo: Write ordering method to try.
1079 */
1080void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo) __must_hold(local)
1081{
1082 enum write_ordering_e pwo;
1083 static char *write_ordering_str[] = {
1084 [WO_none] = "none",
1085 [WO_drain_io] = "drain",
1086 [WO_bdev_flush] = "flush",
Philipp Reisnerb411b362009-09-25 16:07:19 -07001087 };
1088
1089 pwo = mdev->write_ordering;
1090 wo = min(pwo, wo);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001091 if (wo == WO_bdev_flush && mdev->ldev->dc.no_disk_flush)
1092 wo = WO_drain_io;
1093 if (wo == WO_drain_io && mdev->ldev->dc.no_disk_drain)
1094 wo = WO_none;
1095 mdev->write_ordering = wo;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001096 if (pwo != mdev->write_ordering || wo == WO_bdev_flush)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001097 dev_info(DEV, "Method to ensure write ordering: %s\n", write_ordering_str[mdev->write_ordering]);
1098}
1099
1100/**
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001101 * drbd_submit_peer_request()
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001102 * @mdev: DRBD device.
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001103 * @peer_req: peer request
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001104 * @rw: flag field, see bio->bi_rw
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001105 *
1106 * May spread the pages to multiple bios,
1107 * depending on bio_add_page restrictions.
1108 *
1109 * Returns 0 if all bios have been submitted,
1110 * -ENOMEM if we could not allocate enough bios,
1111 * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a
1112 * single page to an empty bio (which should never happen and likely indicates
1113 * that the lower level IO stack is in some way broken). This has been observed
1114 * on certain Xen deployments.
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001115 */
1116/* TODO allocate from our own bio_set. */
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001117int drbd_submit_peer_request(struct drbd_conf *mdev,
1118 struct drbd_peer_request *peer_req,
1119 const unsigned rw, const int fault_type)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001120{
1121 struct bio *bios = NULL;
1122 struct bio *bio;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001123 struct page *page = peer_req->pages;
1124 sector_t sector = peer_req->i.sector;
1125 unsigned ds = peer_req->i.size;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001126 unsigned n_bios = 0;
1127 unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT;
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001128 int err = -ENOMEM;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001129
1130 /* In most cases, we will only need one bio. But in case the lower
1131 * level restrictions happen to be different at this offset on this
1132 * side than those of the sending peer, we may need to submit the
Lars Ellenbergda4a75d2011-02-23 17:02:01 +01001133 * request in more than one bio.
1134 *
1135 * Plain bio_alloc is good enough here, this is no DRBD internally
1136 * generated bio, but a bio allocated on behalf of the peer.
1137 */
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001138next_bio:
1139 bio = bio_alloc(GFP_NOIO, nr_pages);
1140 if (!bio) {
1141 dev_err(DEV, "submit_ee: Allocation of a bio failed\n");
1142 goto fail;
1143 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001144 /* > peer_req->i.sector, unless this is the first bio */
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001145 bio->bi_sector = sector;
1146 bio->bi_bdev = mdev->ldev->backing_bdev;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001147 bio->bi_rw = rw;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001148 bio->bi_private = peer_req;
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001149 bio->bi_end_io = drbd_peer_request_endio;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001150
1151 bio->bi_next = bios;
1152 bios = bio;
1153 ++n_bios;
1154
1155 page_chain_for_each(page) {
1156 unsigned len = min_t(unsigned, ds, PAGE_SIZE);
1157 if (!bio_add_page(bio, page, len, 0)) {
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001158 /* A single page must always be possible!
1159 * But in case it fails anyways,
1160 * we deal with it, and complain (below). */
1161 if (bio->bi_vcnt == 0) {
1162 dev_err(DEV,
1163 "bio_add_page failed for len=%u, "
1164 "bi_vcnt=0 (bi_sector=%llu)\n",
1165 len, (unsigned long long)bio->bi_sector);
1166 err = -ENOSPC;
1167 goto fail;
1168 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001169 goto next_bio;
1170 }
1171 ds -= len;
1172 sector += len >> 9;
1173 --nr_pages;
1174 }
1175 D_ASSERT(page == NULL);
1176 D_ASSERT(ds == 0);
1177
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001178 atomic_set(&peer_req->pending_bios, n_bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001179 do {
1180 bio = bios;
1181 bios = bios->bi_next;
1182 bio->bi_next = NULL;
1183
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001184 drbd_generic_make_request(mdev, fault_type, bio);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001185 } while (bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001186 return 0;
1187
1188fail:
1189 while (bios) {
1190 bio = bios;
1191 bios = bios->bi_next;
1192 bio_put(bio);
1193 }
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001194 return err;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001195}
1196
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001197static void drbd_remove_epoch_entry_interval(struct drbd_conf *mdev,
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001198 struct drbd_peer_request *peer_req)
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001199{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001200 struct drbd_interval *i = &peer_req->i;
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001201
1202 drbd_remove_interval(&mdev->write_requests, i);
1203 drbd_clear_interval(i);
1204
Andreas Gruenbacher6c852be2011-02-04 15:38:52 +01001205 /* Wake up any processes waiting for this peer request to complete. */
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001206 if (i->waiting)
1207 wake_up(&mdev->misc_wait);
1208}
1209
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001210static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packet cmd,
1211 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001212{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001213 int rv;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001214 struct p_barrier *p = &mdev->tconn->data.rbuf.barrier;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001215 struct drbd_epoch *epoch;
1216
Philipp Reisnerb411b362009-09-25 16:07:19 -07001217 inc_unacked(mdev);
1218
Philipp Reisnerb411b362009-09-25 16:07:19 -07001219 mdev->current_epoch->barrier_nr = p->barrier;
1220 rv = drbd_may_finish_epoch(mdev, mdev->current_epoch, EV_GOT_BARRIER_NR);
1221
1222 /* P_BARRIER_ACK may imply that the corresponding extent is dropped from
1223 * the activity log, which means it would not be resynced in case the
1224 * R_PRIMARY crashes now.
1225 * Therefore we must send the barrier_ack after the barrier request was
1226 * completed. */
1227 switch (mdev->write_ordering) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001228 case WO_none:
1229 if (rv == FE_RECYCLED)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001230 return true;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001231
1232 /* receiver context, in the writeout path of the other node.
1233 * avoid potential distributed deadlock */
1234 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1235 if (epoch)
1236 break;
1237 else
1238 dev_warn(DEV, "Allocation of an epoch failed, slowing down\n");
1239 /* Fall through */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001240
1241 case WO_bdev_flush:
1242 case WO_drain_io:
Philipp Reisnerb411b362009-09-25 16:07:19 -07001243 drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
Philipp Reisner2451fc32010-08-24 13:43:11 +02001244 drbd_flush(mdev);
1245
1246 if (atomic_read(&mdev->current_epoch->epoch_size)) {
1247 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1248 if (epoch)
1249 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001250 }
1251
Philipp Reisner2451fc32010-08-24 13:43:11 +02001252 epoch = mdev->current_epoch;
1253 wait_event(mdev->ee_wait, atomic_read(&epoch->epoch_size) == 0);
1254
1255 D_ASSERT(atomic_read(&epoch->active) == 0);
1256 D_ASSERT(epoch->flags == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001257
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001258 return true;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001259 default:
1260 dev_err(DEV, "Strangeness in mdev->write_ordering %d\n", mdev->write_ordering);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001261 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001262 }
1263
1264 epoch->flags = 0;
1265 atomic_set(&epoch->epoch_size, 0);
1266 atomic_set(&epoch->active, 0);
1267
1268 spin_lock(&mdev->epoch_lock);
1269 if (atomic_read(&mdev->current_epoch->epoch_size)) {
1270 list_add(&epoch->list, &mdev->current_epoch->list);
1271 mdev->current_epoch = epoch;
1272 mdev->epochs++;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001273 } else {
1274 /* The current_epoch got recycled while we allocated this one... */
1275 kfree(epoch);
1276 }
1277 spin_unlock(&mdev->epoch_lock);
1278
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001279 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001280}
1281
1282/* used from receive_RSDataReply (recv_resync_read)
1283 * and from receive_Data */
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +01001284static struct drbd_peer_request *
1285read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector,
1286 int data_size) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001287{
Lars Ellenberg66660322010-04-06 12:15:04 +02001288 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001289 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001290 struct page *page;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001291 int dgs, ds, rr;
Philipp Reisnera0638452011-01-19 14:31:32 +01001292 void *dig_in = mdev->tconn->int_dig_in;
1293 void *dig_vv = mdev->tconn->int_dig_vv;
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001294 unsigned long *data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001295
Philipp Reisnera0638452011-01-19 14:31:32 +01001296 dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_r_tfm) ?
1297 crypto_hash_digestsize(mdev->tconn->integrity_r_tfm) : 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001298
1299 if (dgs) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001300 rr = drbd_recv(mdev->tconn, dig_in, dgs);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001301 if (rr != dgs) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001302 if (!signal_pending(current))
1303 dev_warn(DEV,
1304 "short read receiving data digest: read %d expected %d\n",
1305 rr, dgs);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001306 return NULL;
1307 }
1308 }
1309
1310 data_size -= dgs;
1311
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01001312 if (!expect(data_size != 0))
1313 return NULL;
1314 if (!expect(IS_ALIGNED(data_size, 512)))
1315 return NULL;
1316 if (!expect(data_size <= DRBD_MAX_BIO_SIZE))
1317 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001318
Lars Ellenberg66660322010-04-06 12:15:04 +02001319 /* even though we trust out peer,
1320 * we sometimes have to double check. */
1321 if (sector + (data_size>>9) > capacity) {
Lars Ellenbergfdda6542011-01-24 15:11:01 +01001322 dev_err(DEV, "request from peer beyond end of local disk: "
1323 "capacity: %llus < sector: %llus + size: %u\n",
Lars Ellenberg66660322010-04-06 12:15:04 +02001324 (unsigned long long)capacity,
1325 (unsigned long long)sector, data_size);
1326 return NULL;
1327 }
1328
Philipp Reisnerb411b362009-09-25 16:07:19 -07001329 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
1330 * "criss-cross" setup, that might cause write-out on some other DRBD,
1331 * which in turn might block on the other node at this very place. */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001332 peer_req = drbd_alloc_ee(mdev, id, sector, data_size, GFP_NOIO);
1333 if (!peer_req)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001334 return NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001335
Philipp Reisnerb411b362009-09-25 16:07:19 -07001336 ds = data_size;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001337 page = peer_req->pages;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001338 page_chain_for_each(page) {
1339 unsigned len = min_t(int, ds, PAGE_SIZE);
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001340 data = kmap(page);
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001341 rr = drbd_recv(mdev->tconn, data, len);
Andreas Gruenbacher0cf9d272010-12-07 10:43:29 +01001342 if (drbd_insert_fault(mdev, DRBD_FAULT_RECEIVE)) {
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001343 dev_err(DEV, "Fault injection: Corrupting data on receive\n");
1344 data[0] = data[0] ^ (unsigned long)-1;
1345 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001346 kunmap(page);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001347 if (rr != len) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001348 drbd_free_ee(mdev, peer_req);
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001349 if (!signal_pending(current))
1350 dev_warn(DEV, "short read receiving data: read %d expected %d\n",
1351 rr, len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001352 return NULL;
1353 }
1354 ds -= rr;
1355 }
1356
1357 if (dgs) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001358 drbd_csum_ee(mdev, mdev->tconn->integrity_r_tfm, peer_req, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001359 if (memcmp(dig_in, dig_vv, dgs)) {
Lars Ellenberg470be442010-11-10 10:36:52 +01001360 dev_err(DEV, "Digest integrity check FAILED: %llus +%u\n",
1361 (unsigned long long)sector, data_size);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001362 drbd_free_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001363 return NULL;
1364 }
1365 }
1366 mdev->recv_cnt += data_size>>9;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001367 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001368}
1369
1370/* drbd_drain_block() just takes a data block
1371 * out of the socket input buffer, and discards it.
1372 */
1373static int drbd_drain_block(struct drbd_conf *mdev, int data_size)
1374{
1375 struct page *page;
1376 int rr, rv = 1;
1377 void *data;
1378
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001379 if (!data_size)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001380 return true;
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001381
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001382 page = drbd_pp_alloc(mdev, 1, 1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001383
1384 data = kmap(page);
1385 while (data_size) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001386 rr = drbd_recv(mdev->tconn, data, min_t(int, data_size, PAGE_SIZE));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001387 if (rr != min_t(int, data_size, PAGE_SIZE)) {
1388 rv = 0;
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001389 if (!signal_pending(current))
1390 dev_warn(DEV,
1391 "short read receiving data: read %d expected %d\n",
1392 rr, min_t(int, data_size, PAGE_SIZE));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001393 break;
1394 }
1395 data_size -= rr;
1396 }
1397 kunmap(page);
Lars Ellenberg435f0742010-09-06 12:30:25 +02001398 drbd_pp_free(mdev, page, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001399 return rv;
1400}
1401
1402static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req,
1403 sector_t sector, int data_size)
1404{
1405 struct bio_vec *bvec;
1406 struct bio *bio;
1407 int dgs, rr, i, expect;
Philipp Reisnera0638452011-01-19 14:31:32 +01001408 void *dig_in = mdev->tconn->int_dig_in;
1409 void *dig_vv = mdev->tconn->int_dig_vv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001410
Philipp Reisnera0638452011-01-19 14:31:32 +01001411 dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_r_tfm) ?
1412 crypto_hash_digestsize(mdev->tconn->integrity_r_tfm) : 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001413
1414 if (dgs) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001415 rr = drbd_recv(mdev->tconn, dig_in, dgs);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001416 if (rr != dgs) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001417 if (!signal_pending(current))
1418 dev_warn(DEV,
1419 "short read receiving data reply digest: read %d expected %d\n",
1420 rr, dgs);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001421 return 0;
1422 }
1423 }
1424
1425 data_size -= dgs;
1426
1427 /* optimistically update recv_cnt. if receiving fails below,
1428 * we disconnect anyways, and counters will be reset. */
1429 mdev->recv_cnt += data_size>>9;
1430
1431 bio = req->master_bio;
1432 D_ASSERT(sector == bio->bi_sector);
1433
1434 bio_for_each_segment(bvec, bio, i) {
1435 expect = min_t(int, data_size, bvec->bv_len);
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001436 rr = drbd_recv(mdev->tconn,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001437 kmap(bvec->bv_page)+bvec->bv_offset,
1438 expect);
1439 kunmap(bvec->bv_page);
1440 if (rr != expect) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001441 if (!signal_pending(current))
1442 dev_warn(DEV, "short read receiving data reply: "
1443 "read %d expected %d\n",
1444 rr, expect);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001445 return 0;
1446 }
1447 data_size -= rr;
1448 }
1449
1450 if (dgs) {
Philipp Reisnera0638452011-01-19 14:31:32 +01001451 drbd_csum_bio(mdev, mdev->tconn->integrity_r_tfm, bio, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001452 if (memcmp(dig_in, dig_vv, dgs)) {
1453 dev_err(DEV, "Digest integrity check FAILED. Broken NICs?\n");
1454 return 0;
1455 }
1456 }
1457
1458 D_ASSERT(data_size == 0);
1459 return 1;
1460}
1461
1462/* e_end_resync_block() is called via
1463 * drbd_process_done_ee() by asender only */
Philipp Reisner00d56942011-02-09 18:09:48 +01001464static int e_end_resync_block(struct drbd_work *w, int unused)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001465{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001466 struct drbd_peer_request *peer_req =
1467 container_of(w, struct drbd_peer_request, w);
Philipp Reisner00d56942011-02-09 18:09:48 +01001468 struct drbd_conf *mdev = w->mdev;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001469 sector_t sector = peer_req->i.sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001470 int ok;
1471
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001472 D_ASSERT(drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001473
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001474 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
1475 drbd_set_in_sync(mdev, sector, peer_req->i.size);
1476 ok = drbd_send_ack(mdev, P_RS_WRITE_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001477 } else {
1478 /* Record failure to sync */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001479 drbd_rs_failed_io(mdev, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001480
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001481 ok = drbd_send_ack(mdev, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001482 }
1483 dec_unacked(mdev);
1484
1485 return ok;
1486}
1487
1488static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_size) __releases(local)
1489{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001490 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001491
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001492 peer_req = read_in_block(mdev, ID_SYNCER, sector, data_size);
1493 if (!peer_req)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001494 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001495
1496 dec_rs_pending(mdev);
1497
Philipp Reisnerb411b362009-09-25 16:07:19 -07001498 inc_unacked(mdev);
1499 /* corresponding dec_unacked() in e_end_resync_block()
1500 * respective _drbd_clear_done_ee */
1501
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001502 peer_req->w.cb = e_end_resync_block;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001503
Philipp Reisner87eeee42011-01-19 14:16:30 +01001504 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001505 list_add(&peer_req->w.list, &mdev->sync_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001506 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001507
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001508 atomic_add(data_size >> 9, &mdev->rs_sect_ev);
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001509 if (drbd_submit_peer_request(mdev, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001510 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001511
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001512 /* don't care for the reason here */
1513 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01001514 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001515 list_del(&peer_req->w.list);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001516 spin_unlock_irq(&mdev->tconn->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02001517
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001518 drbd_free_ee(mdev, peer_req);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001519fail:
1520 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001521 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001522}
1523
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001524static struct drbd_request *
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001525find_request(struct drbd_conf *mdev, struct rb_root *root, u64 id,
1526 sector_t sector, bool missing_ok, const char *func)
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001527{
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001528 struct drbd_request *req;
1529
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001530 /* Request object according to our peer */
1531 req = (struct drbd_request *)(unsigned long)id;
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001532 if (drbd_contains_interval(root, sector, &req->i) && req->i.local)
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001533 return req;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001534 if (!missing_ok) {
1535 dev_err(DEV, "%s: failed to find request %lu, sector %llus\n", func,
1536 (unsigned long)id, (unsigned long long)sector);
1537 }
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001538 return NULL;
1539}
1540
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001541static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packet cmd,
1542 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001543{
1544 struct drbd_request *req;
1545 sector_t sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001546 int ok;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001547 struct p_data *p = &mdev->tconn->data.rbuf.data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001548
1549 sector = be64_to_cpu(p->sector);
1550
Philipp Reisner87eeee42011-01-19 14:16:30 +01001551 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001552 req = find_request(mdev, &mdev->read_requests, p->block_id, sector, false, __func__);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001553 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001554 if (unlikely(!req))
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001555 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001556
Bart Van Assche24c48302011-05-21 18:32:29 +02001557 /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid
Philipp Reisnerb411b362009-09-25 16:07:19 -07001558 * special casing it there for the various failure cases.
1559 * still no race with drbd_fail_pending_reads */
1560 ok = recv_dless_read(mdev, req, sector, data_size);
1561
1562 if (ok)
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001563 req_mod(req, DATA_RECEIVED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001564 /* else: nothing. handled from drbd_disconnect...
1565 * I don't think we may complete this just yet
1566 * in case we are "on-disconnect: freeze" */
1567
1568 return ok;
1569}
1570
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001571static int receive_RSDataReply(struct drbd_conf *mdev, enum drbd_packet cmd,
1572 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001573{
1574 sector_t sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001575 int ok;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001576 struct p_data *p = &mdev->tconn->data.rbuf.data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001577
1578 sector = be64_to_cpu(p->sector);
1579 D_ASSERT(p->block_id == ID_SYNCER);
1580
1581 if (get_ldev(mdev)) {
1582 /* data is submitted to disk within recv_resync_read.
1583 * corresponding put_ldev done below on error,
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001584 * or in drbd_peer_request_endio. */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001585 ok = recv_resync_read(mdev, sector, data_size);
1586 } else {
1587 if (__ratelimit(&drbd_ratelimit_state))
1588 dev_err(DEV, "Can not write resync data to local disk.\n");
1589
1590 ok = drbd_drain_block(mdev, data_size);
1591
Lars Ellenberg2b2bf212010-10-06 11:46:55 +02001592 drbd_send_ack_dp(mdev, P_NEG_ACK, p, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001593 }
1594
Philipp Reisner778f2712010-07-06 11:14:00 +02001595 atomic_add(data_size >> 9, &mdev->rs_sect_in);
1596
Philipp Reisnerb411b362009-09-25 16:07:19 -07001597 return ok;
1598}
1599
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001600static int w_restart_write(struct drbd_work *w, int cancel)
1601{
1602 struct drbd_request *req = container_of(w, struct drbd_request, w);
1603 struct drbd_conf *mdev = w->mdev;
1604 struct bio *bio;
1605 unsigned long start_time;
1606 unsigned long flags;
1607
1608 spin_lock_irqsave(&mdev->tconn->req_lock, flags);
1609 if (!expect(req->rq_state & RQ_POSTPONED)) {
1610 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
1611 return 0;
1612 }
1613 bio = req->master_bio;
1614 start_time = req->start_time;
1615 /* Postponed requests will not have their master_bio completed! */
1616 __req_mod(req, DISCARD_WRITE, NULL);
1617 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
1618
1619 while (__drbd_make_request(mdev, bio, start_time))
1620 /* retry */ ;
1621 return 1;
1622}
1623
1624static void restart_conflicting_writes(struct drbd_conf *mdev,
1625 sector_t sector, int size)
1626{
1627 struct drbd_interval *i;
1628 struct drbd_request *req;
1629
1630 drbd_for_each_overlap(i, &mdev->write_requests, sector, size) {
1631 if (!i->local)
1632 continue;
1633 req = container_of(i, struct drbd_request, i);
1634 if (req->rq_state & RQ_LOCAL_PENDING ||
1635 !(req->rq_state & RQ_POSTPONED))
1636 continue;
1637 if (expect(list_empty(&req->w.list))) {
1638 req->w.mdev = mdev;
1639 req->w.cb = w_restart_write;
1640 drbd_queue_work(&mdev->tconn->data.work, &req->w);
1641 }
1642 }
1643}
1644
Philipp Reisnerb411b362009-09-25 16:07:19 -07001645/* e_end_block() is called via drbd_process_done_ee().
1646 * this means this function only runs in the asender thread
1647 */
Philipp Reisner00d56942011-02-09 18:09:48 +01001648static int e_end_block(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001649{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001650 struct drbd_peer_request *peer_req =
1651 container_of(w, struct drbd_peer_request, w);
Philipp Reisner00d56942011-02-09 18:09:48 +01001652 struct drbd_conf *mdev = w->mdev;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001653 sector_t sector = peer_req->i.sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001654 int ok = 1, pcmd;
1655
Philipp Reisner89e58e72011-01-19 13:12:45 +01001656 if (mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001657 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001658 pcmd = (mdev->state.conn >= C_SYNC_SOURCE &&
1659 mdev->state.conn <= C_PAUSED_SYNC_T &&
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001660 peer_req->flags & EE_MAY_SET_IN_SYNC) ?
Philipp Reisnerb411b362009-09-25 16:07:19 -07001661 P_RS_WRITE_ACK : P_WRITE_ACK;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001662 ok &= drbd_send_ack(mdev, pcmd, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001663 if (pcmd == P_RS_WRITE_ACK)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001664 drbd_set_in_sync(mdev, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001665 } else {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001666 ok = drbd_send_ack(mdev, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001667 /* we expect it to be marked out of sync anyways...
1668 * maybe assert this? */
1669 }
1670 dec_unacked(mdev);
1671 }
1672 /* we delete from the conflict detection hash _after_ we sent out the
1673 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */
Philipp Reisner89e58e72011-01-19 13:12:45 +01001674 if (mdev->tconn->net_conf->two_primaries) {
Philipp Reisner87eeee42011-01-19 14:16:30 +01001675 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001676 D_ASSERT(!drbd_interval_empty(&peer_req->i));
1677 drbd_remove_epoch_entry_interval(mdev, peer_req);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001678 if (peer_req->flags & EE_RESTART_REQUESTS)
1679 restart_conflicting_writes(mdev, sector, peer_req->i.size);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001680 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +01001681 } else
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001682 D_ASSERT(drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001683
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001684 drbd_may_finish_epoch(mdev, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001685
1686 return ok;
1687}
1688
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001689static int e_send_ack(struct drbd_work *w, enum drbd_packet ack)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001690{
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001691 struct drbd_conf *mdev = w->mdev;
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001692 struct drbd_peer_request *peer_req =
1693 container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacher206d3582011-02-26 23:19:15 +01001694 int ok;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001695
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001696 ok = drbd_send_ack(mdev, ack, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001697 dec_unacked(mdev);
1698
1699 return ok;
1700}
1701
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001702static int e_send_discard_write(struct drbd_work *w, int unused)
1703{
1704 return e_send_ack(w, P_DISCARD_WRITE);
1705}
1706
1707static int e_send_retry_write(struct drbd_work *w, int unused)
1708{
1709 struct drbd_tconn *tconn = w->mdev->tconn;
1710
1711 return e_send_ack(w, tconn->agreed_pro_version >= 100 ?
1712 P_RETRY_WRITE : P_DISCARD_WRITE);
1713}
1714
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001715static bool seq_greater(u32 a, u32 b)
1716{
1717 /*
1718 * We assume 32-bit wrap-around here.
1719 * For 24-bit wrap-around, we would have to shift:
1720 * a <<= 8; b <<= 8;
1721 */
1722 return (s32)a - (s32)b > 0;
1723}
1724
1725static u32 seq_max(u32 a, u32 b)
1726{
1727 return seq_greater(a, b) ? a : b;
1728}
1729
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001730static bool need_peer_seq(struct drbd_conf *mdev)
1731{
1732 struct drbd_tconn *tconn = mdev->tconn;
1733
1734 /*
1735 * We only need to keep track of the last packet_seq number of our peer
1736 * if we are in dual-primary mode and we have the discard flag set; see
1737 * handle_write_conflicts().
1738 */
1739 return tconn->net_conf->two_primaries &&
1740 test_bit(DISCARD_CONCURRENT, &tconn->flags);
1741}
1742
Andreas Gruenbacher43ae0772011-02-03 18:42:08 +01001743static void update_peer_seq(struct drbd_conf *mdev, unsigned int peer_seq)
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001744{
Lars Ellenberg3c13b682011-02-23 16:10:01 +01001745 unsigned int newest_peer_seq;
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001746
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001747 if (need_peer_seq(mdev)) {
1748 spin_lock(&mdev->peer_seq_lock);
Lars Ellenberg3c13b682011-02-23 16:10:01 +01001749 newest_peer_seq = seq_max(mdev->peer_seq, peer_seq);
1750 mdev->peer_seq = newest_peer_seq;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001751 spin_unlock(&mdev->peer_seq_lock);
Lars Ellenberg3c13b682011-02-23 16:10:01 +01001752 /* wake up only if we actually changed mdev->peer_seq */
1753 if (peer_seq == newest_peer_seq)
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001754 wake_up(&mdev->seq_wait);
1755 }
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001756}
1757
Philipp Reisnerb411b362009-09-25 16:07:19 -07001758/* Called from receive_Data.
1759 * Synchronize packets on sock with packets on msock.
1760 *
1761 * This is here so even when a P_DATA packet traveling via sock overtook an Ack
1762 * packet traveling on msock, they are still processed in the order they have
1763 * been sent.
1764 *
1765 * Note: we don't care for Ack packets overtaking P_DATA packets.
1766 *
1767 * In case packet_seq is larger than mdev->peer_seq number, there are
1768 * outstanding packets on the msock. We wait for them to arrive.
1769 * In case we are the logically next packet, we update mdev->peer_seq
1770 * ourselves. Correctly handles 32bit wrap around.
1771 *
1772 * Assume we have a 10 GBit connection, that is about 1<<30 byte per second,
1773 * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds
1774 * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have
1775 * 1<<9 == 512 seconds aka ages for the 32bit wrap around...
1776 *
1777 * returns 0 if we may process the packet,
1778 * -ERESTARTSYS if we were interrupted (by disconnect signal). */
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001779static int wait_for_and_update_peer_seq(struct drbd_conf *mdev, const u32 peer_seq)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001780{
1781 DEFINE_WAIT(wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001782 long timeout;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001783 int ret;
1784
1785 if (!need_peer_seq(mdev))
1786 return 0;
1787
Philipp Reisnerb411b362009-09-25 16:07:19 -07001788 spin_lock(&mdev->peer_seq_lock);
1789 for (;;) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001790 if (!seq_greater(peer_seq - 1, mdev->peer_seq)) {
1791 mdev->peer_seq = seq_max(mdev->peer_seq, peer_seq);
1792 ret = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001793 break;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001794 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001795 if (signal_pending(current)) {
1796 ret = -ERESTARTSYS;
1797 break;
1798 }
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001799 prepare_to_wait(&mdev->seq_wait, &wait, TASK_INTERRUPTIBLE);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001800 spin_unlock(&mdev->peer_seq_lock);
Andreas Gruenbacher71b1c1e2011-03-01 15:40:43 +01001801 timeout = mdev->tconn->net_conf->ping_timeo*HZ/10;
1802 timeout = schedule_timeout(timeout);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001803 spin_lock(&mdev->peer_seq_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001804 if (!timeout) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001805 ret = -ETIMEDOUT;
Andreas Gruenbacher71b1c1e2011-03-01 15:40:43 +01001806 dev_err(DEV, "Timed out waiting for missing ack packets; disconnecting\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001807 break;
1808 }
1809 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001810 spin_unlock(&mdev->peer_seq_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001811 finish_wait(&mdev->seq_wait, &wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001812 return ret;
1813}
1814
Lars Ellenberg688593c2010-11-17 22:25:03 +01001815/* see also bio_flags_to_wire()
1816 * DRBD_REQ_*, because we need to semantically map the flags to data packet
1817 * flags and back. We may replicate to other kernel versions. */
1818static unsigned long wire_flags_to_bio(struct drbd_conf *mdev, u32 dpf)
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02001819{
Lars Ellenberg688593c2010-11-17 22:25:03 +01001820 return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
1821 (dpf & DP_FUA ? REQ_FUA : 0) |
1822 (dpf & DP_FLUSH ? REQ_FLUSH : 0) |
1823 (dpf & DP_DISCARD ? REQ_DISCARD : 0);
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02001824}
1825
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001826static void fail_postponed_requests(struct drbd_conf *mdev, sector_t sector,
1827 unsigned int size)
1828{
1829 struct drbd_interval *i;
1830
1831 repeat:
1832 drbd_for_each_overlap(i, &mdev->write_requests, sector, size) {
1833 struct drbd_request *req;
1834 struct bio_and_error m;
1835
1836 if (!i->local)
1837 continue;
1838 req = container_of(i, struct drbd_request, i);
1839 if (!(req->rq_state & RQ_POSTPONED))
1840 continue;
1841 req->rq_state &= ~RQ_POSTPONED;
1842 __req_mod(req, NEG_ACKED, &m);
1843 spin_unlock_irq(&mdev->tconn->req_lock);
1844 if (m.bio)
1845 complete_master_bio(mdev, &m);
1846 spin_lock_irq(&mdev->tconn->req_lock);
1847 goto repeat;
1848 }
1849}
1850
1851static int handle_write_conflicts(struct drbd_conf *mdev,
1852 struct drbd_peer_request *peer_req)
1853{
1854 struct drbd_tconn *tconn = mdev->tconn;
1855 bool resolve_conflicts = test_bit(DISCARD_CONCURRENT, &tconn->flags);
1856 sector_t sector = peer_req->i.sector;
1857 const unsigned int size = peer_req->i.size;
1858 struct drbd_interval *i;
1859 bool equal;
1860 int err;
1861
1862 /*
1863 * Inserting the peer request into the write_requests tree will prevent
1864 * new conflicting local requests from being added.
1865 */
1866 drbd_insert_interval(&mdev->write_requests, &peer_req->i);
1867
1868 repeat:
1869 drbd_for_each_overlap(i, &mdev->write_requests, sector, size) {
1870 if (i == &peer_req->i)
1871 continue;
1872
1873 if (!i->local) {
1874 /*
1875 * Our peer has sent a conflicting remote request; this
1876 * should not happen in a two-node setup. Wait for the
1877 * earlier peer request to complete.
1878 */
1879 err = drbd_wait_misc(mdev, i);
1880 if (err)
1881 goto out;
1882 goto repeat;
1883 }
1884
1885 equal = i->sector == sector && i->size == size;
1886 if (resolve_conflicts) {
1887 /*
1888 * If the peer request is fully contained within the
1889 * overlapping request, it can be discarded; otherwise,
1890 * it will be retried once all overlapping requests
1891 * have completed.
1892 */
1893 bool discard = i->sector <= sector && i->sector +
1894 (i->size >> 9) >= sector + (size >> 9);
1895
1896 if (!equal)
1897 dev_alert(DEV, "Concurrent writes detected: "
1898 "local=%llus +%u, remote=%llus +%u, "
1899 "assuming %s came first\n",
1900 (unsigned long long)i->sector, i->size,
1901 (unsigned long long)sector, size,
1902 discard ? "local" : "remote");
1903
1904 inc_unacked(mdev);
1905 peer_req->w.cb = discard ? e_send_discard_write :
1906 e_send_retry_write;
1907 list_add_tail(&peer_req->w.list, &mdev->done_ee);
1908 wake_asender(mdev->tconn);
1909
1910 err = -ENOENT;
1911 goto out;
1912 } else {
1913 struct drbd_request *req =
1914 container_of(i, struct drbd_request, i);
1915
1916 if (!equal)
1917 dev_alert(DEV, "Concurrent writes detected: "
1918 "local=%llus +%u, remote=%llus +%u\n",
1919 (unsigned long long)i->sector, i->size,
1920 (unsigned long long)sector, size);
1921
1922 if (req->rq_state & RQ_LOCAL_PENDING ||
1923 !(req->rq_state & RQ_POSTPONED)) {
1924 /*
1925 * Wait for the node with the discard flag to
1926 * decide if this request will be discarded or
1927 * retried. Requests that are discarded will
1928 * disappear from the write_requests tree.
1929 *
1930 * In addition, wait for the conflicting
1931 * request to finish locally before submitting
1932 * the conflicting peer request.
1933 */
1934 err = drbd_wait_misc(mdev, &req->i);
1935 if (err) {
1936 _conn_request_state(mdev->tconn,
1937 NS(conn, C_TIMEOUT),
1938 CS_HARD);
1939 fail_postponed_requests(mdev, sector, size);
1940 goto out;
1941 }
1942 goto repeat;
1943 }
1944 /*
1945 * Remember to restart the conflicting requests after
1946 * the new peer request has completed.
1947 */
1948 peer_req->flags |= EE_RESTART_REQUESTS;
1949 }
1950 }
1951 err = 0;
1952
1953 out:
1954 if (err)
1955 drbd_remove_epoch_entry_interval(mdev, peer_req);
1956 return err;
1957}
1958
Philipp Reisnerb411b362009-09-25 16:07:19 -07001959/* mirrored write */
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001960static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd,
1961 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001962{
1963 sector_t sector;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001964 struct drbd_peer_request *peer_req;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001965 struct p_data *p = &mdev->tconn->data.rbuf.data;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001966 u32 peer_seq = be32_to_cpu(p->seq_num);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001967 int rw = WRITE;
1968 u32 dp_flags;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001969 int err;
1970
Philipp Reisnerb411b362009-09-25 16:07:19 -07001971
Philipp Reisnerb411b362009-09-25 16:07:19 -07001972 if (!get_ldev(mdev)) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001973 err = wait_for_and_update_peer_seq(mdev, peer_seq);
Lars Ellenberg2b2bf212010-10-06 11:46:55 +02001974 drbd_send_ack_dp(mdev, P_NEG_ACK, p, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001975 atomic_inc(&mdev->current_epoch->epoch_size);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001976 return drbd_drain_block(mdev, data_size) && err == 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001977 }
1978
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001979 /*
1980 * Corresponding put_ldev done either below (on various errors), or in
1981 * drbd_peer_request_endio, if we successfully submit the data at the
1982 * end of this function.
1983 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001984
1985 sector = be64_to_cpu(p->sector);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001986 peer_req = read_in_block(mdev, p->block_id, sector, data_size);
1987 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001988 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001989 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001990 }
1991
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001992 peer_req->w.cb = e_end_block;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001993
Lars Ellenberg688593c2010-11-17 22:25:03 +01001994 dp_flags = be32_to_cpu(p->dp_flags);
1995 rw |= wire_flags_to_bio(mdev, dp_flags);
1996
1997 if (dp_flags & DP_MAY_SET_IN_SYNC)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001998 peer_req->flags |= EE_MAY_SET_IN_SYNC;
Lars Ellenberg688593c2010-11-17 22:25:03 +01001999
Philipp Reisnerb411b362009-09-25 16:07:19 -07002000 spin_lock(&mdev->epoch_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002001 peer_req->epoch = mdev->current_epoch;
2002 atomic_inc(&peer_req->epoch->epoch_size);
2003 atomic_inc(&peer_req->epoch->active);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002004 spin_unlock(&mdev->epoch_lock);
2005
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002006 if (mdev->tconn->net_conf->two_primaries) {
2007 err = wait_for_and_update_peer_seq(mdev, peer_seq);
2008 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002009 goto out_interrupted;
Philipp Reisner87eeee42011-01-19 14:16:30 +01002010 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002011 err = handle_write_conflicts(mdev, peer_req);
2012 if (err) {
2013 spin_unlock_irq(&mdev->tconn->req_lock);
2014 if (err == -ENOENT) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002015 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002016 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002017 }
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002018 goto out_interrupted;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002019 }
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002020 } else
2021 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002022 list_add(&peer_req->w.list, &mdev->active_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002023 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002024
Philipp Reisner89e58e72011-01-19 13:12:45 +01002025 switch (mdev->tconn->net_conf->wire_protocol) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002026 case DRBD_PROT_C:
2027 inc_unacked(mdev);
2028 /* corresponding dec_unacked() in e_end_block()
2029 * respective _drbd_clear_done_ee */
2030 break;
2031 case DRBD_PROT_B:
2032 /* I really don't like it that the receiver thread
2033 * sends on the msock, but anyways */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002034 drbd_send_ack(mdev, P_RECV_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002035 break;
2036 case DRBD_PROT_A:
2037 /* nothing to do */
2038 break;
2039 }
2040
Lars Ellenberg6719fb02010-10-18 23:04:07 +02002041 if (mdev->state.pdsk < D_INCONSISTENT) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002042 /* In case we have the only disk of the cluster, */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002043 drbd_set_out_of_sync(mdev, peer_req->i.sector, peer_req->i.size);
2044 peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
2045 peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
2046 drbd_al_begin_io(mdev, peer_req->i.sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002047 }
2048
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01002049 if (drbd_submit_peer_request(mdev, peer_req, rw, DRBD_FAULT_DT_WR) == 0)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002050 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002051
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002052 /* don't care for the reason here */
2053 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01002054 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002055 list_del(&peer_req->w.list);
2056 drbd_remove_epoch_entry_interval(mdev, peer_req);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002057 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002058 if (peer_req->flags & EE_CALL_AL_COMPLETE_IO)
2059 drbd_al_complete_io(mdev, peer_req->i.sector);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002060
Philipp Reisnerb411b362009-09-25 16:07:19 -07002061out_interrupted:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002062 drbd_may_finish_epoch(mdev, peer_req->epoch, EV_PUT + EV_CLEANUP);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002063 put_ldev(mdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002064 drbd_free_ee(mdev, peer_req);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002065 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002066}
2067
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002068/* We may throttle resync, if the lower device seems to be busy,
2069 * and current sync rate is above c_min_rate.
2070 *
2071 * To decide whether or not the lower device is busy, we use a scheme similar
2072 * to MD RAID is_mddev_idle(): if the partition stats reveal "significant"
2073 * (more than 64 sectors) of activity we cannot account for with our own resync
2074 * activity, it obviously is "busy".
2075 *
2076 * The current sync rate used here uses only the most recent two step marks,
2077 * to have a short time average so we can react faster.
2078 */
Philipp Reisnere3555d82010-11-07 15:56:29 +01002079int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002080{
2081 struct gendisk *disk = mdev->ldev->backing_bdev->bd_contains->bd_disk;
2082 unsigned long db, dt, dbdt;
Philipp Reisnere3555d82010-11-07 15:56:29 +01002083 struct lc_element *tmp;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002084 int curr_events;
2085 int throttle = 0;
2086
2087 /* feature disabled? */
2088 if (mdev->sync_conf.c_min_rate == 0)
2089 return 0;
2090
Philipp Reisnere3555d82010-11-07 15:56:29 +01002091 spin_lock_irq(&mdev->al_lock);
2092 tmp = lc_find(mdev->resync, BM_SECT_TO_EXT(sector));
2093 if (tmp) {
2094 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
2095 if (test_bit(BME_PRIORITY, &bm_ext->flags)) {
2096 spin_unlock_irq(&mdev->al_lock);
2097 return 0;
2098 }
2099 /* Do not slow down if app IO is already waiting for this extent */
2100 }
2101 spin_unlock_irq(&mdev->al_lock);
2102
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002103 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
2104 (int)part_stat_read(&disk->part0, sectors[1]) -
2105 atomic_read(&mdev->rs_sect_ev);
Philipp Reisnere3555d82010-11-07 15:56:29 +01002106
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002107 if (!mdev->rs_last_events || curr_events - mdev->rs_last_events > 64) {
2108 unsigned long rs_left;
2109 int i;
2110
2111 mdev->rs_last_events = curr_events;
2112
2113 /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
2114 * approx. */
Lars Ellenberg2649f082010-11-05 10:05:47 +01002115 i = (mdev->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
2116
2117 if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T)
2118 rs_left = mdev->ov_left;
2119 else
2120 rs_left = drbd_bm_total_weight(mdev) - mdev->rs_failed;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002121
2122 dt = ((long)jiffies - (long)mdev->rs_mark_time[i]) / HZ;
2123 if (!dt)
2124 dt++;
2125 db = mdev->rs_mark_left[i] - rs_left;
2126 dbdt = Bit2KB(db/dt);
2127
2128 if (dbdt > mdev->sync_conf.c_min_rate)
2129 throttle = 1;
2130 }
2131 return throttle;
2132}
2133
2134
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01002135static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packet cmd,
2136 unsigned int digest_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002137{
2138 sector_t sector;
2139 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002140 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002141 struct digest_info *di = NULL;
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002142 int size, verb;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002143 unsigned int fault_type;
Philipp Reisnere42325a2011-01-19 13:55:45 +01002144 struct p_block_req *p = &mdev->tconn->data.rbuf.block_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002145
2146 sector = be64_to_cpu(p->sector);
2147 size = be32_to_cpu(p->blksize);
2148
Andreas Gruenbacherc670a392011-02-21 12:41:39 +01002149 if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002150 dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2151 (unsigned long long)sector, size);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002152 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002153 }
2154 if (sector + (size>>9) > capacity) {
2155 dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2156 (unsigned long long)sector, size);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002157 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002158 }
2159
2160 if (!get_ldev_if_state(mdev, D_UP_TO_DATE)) {
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002161 verb = 1;
2162 switch (cmd) {
2163 case P_DATA_REQUEST:
2164 drbd_send_ack_rp(mdev, P_NEG_DREPLY, p);
2165 break;
2166 case P_RS_DATA_REQUEST:
2167 case P_CSUM_RS_REQUEST:
2168 case P_OV_REQUEST:
2169 drbd_send_ack_rp(mdev, P_NEG_RS_DREPLY , p);
2170 break;
2171 case P_OV_REPLY:
2172 verb = 0;
2173 dec_rs_pending(mdev);
2174 drbd_send_ack_ex(mdev, P_OV_RESULT, sector, size, ID_IN_SYNC);
2175 break;
2176 default:
2177 dev_err(DEV, "unexpected command (%s) in receive_DataRequest\n",
2178 cmdname(cmd));
2179 }
2180 if (verb && __ratelimit(&drbd_ratelimit_state))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002181 dev_err(DEV, "Can not satisfy peer's read request, "
2182 "no local data.\n");
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002183
Lars Ellenberga821cc42010-09-06 12:31:37 +02002184 /* drain possibly payload */
2185 return drbd_drain_block(mdev, digest_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002186 }
2187
2188 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
2189 * "criss-cross" setup, that might cause write-out on some other DRBD,
2190 * which in turn might block on the other node at this very place. */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002191 peer_req = drbd_alloc_ee(mdev, p->block_id, sector, size, GFP_NOIO);
2192 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002193 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002194 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002195 }
2196
Philipp Reisner02918be2010-08-20 14:35:10 +02002197 switch (cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002198 case P_DATA_REQUEST:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002199 peer_req->w.cb = w_e_end_data_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002200 fault_type = DRBD_FAULT_DT_RD;
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002201 /* application IO, don't drbd_rs_begin_io */
2202 goto submit;
2203
Philipp Reisnerb411b362009-09-25 16:07:19 -07002204 case P_RS_DATA_REQUEST:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002205 peer_req->w.cb = w_e_end_rsdata_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002206 fault_type = DRBD_FAULT_RS_RD;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002207 /* used in the sector offset progress display */
2208 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002209 break;
2210
2211 case P_OV_REPLY:
2212 case P_CSUM_RS_REQUEST:
2213 fault_type = DRBD_FAULT_RS_RD;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002214 di = kmalloc(sizeof(*di) + digest_size, GFP_NOIO);
2215 if (!di)
2216 goto out_free_e;
2217
2218 di->digest_size = digest_size;
2219 di->digest = (((char *)di)+sizeof(struct digest_info));
2220
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002221 peer_req->digest = di;
2222 peer_req->flags |= EE_HAS_DIGEST;
Lars Ellenbergc36c3ce2010-08-11 20:42:55 +02002223
Philipp Reisnerde0ff332011-02-07 16:56:20 +01002224 if (drbd_recv(mdev->tconn, di->digest, digest_size) != digest_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002225 goto out_free_e;
2226
Philipp Reisner02918be2010-08-20 14:35:10 +02002227 if (cmd == P_CSUM_RS_REQUEST) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002228 D_ASSERT(mdev->tconn->agreed_pro_version >= 89);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002229 peer_req->w.cb = w_e_end_csum_rs_req;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002230 /* used in the sector offset progress display */
2231 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
Philipp Reisner02918be2010-08-20 14:35:10 +02002232 } else if (cmd == P_OV_REPLY) {
Lars Ellenberg2649f082010-11-05 10:05:47 +01002233 /* track progress, we may need to throttle */
2234 atomic_add(size >> 9, &mdev->rs_sect_in);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002235 peer_req->w.cb = w_e_end_ov_reply;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002236 dec_rs_pending(mdev);
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002237 /* drbd_rs_begin_io done when we sent this request,
2238 * but accounting still needs to be done. */
2239 goto submit_for_resync;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002240 }
2241 break;
2242
2243 case P_OV_REQUEST:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002244 if (mdev->ov_start_sector == ~(sector_t)0 &&
Philipp Reisner31890f42011-01-19 14:12:51 +01002245 mdev->tconn->agreed_pro_version >= 90) {
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002246 unsigned long now = jiffies;
2247 int i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002248 mdev->ov_start_sector = sector;
2249 mdev->ov_position = sector;
Lars Ellenberg30b743a2010-11-05 09:39:06 +01002250 mdev->ov_left = drbd_bm_bits(mdev) - BM_SECT_TO_BIT(sector);
2251 mdev->rs_total = mdev->ov_left;
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002252 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
2253 mdev->rs_mark_left[i] = mdev->ov_left;
2254 mdev->rs_mark_time[i] = now;
2255 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002256 dev_info(DEV, "Online Verify start sector: %llu\n",
2257 (unsigned long long)sector);
2258 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002259 peer_req->w.cb = w_e_end_ov_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002260 fault_type = DRBD_FAULT_RS_RD;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002261 break;
2262
Philipp Reisnerb411b362009-09-25 16:07:19 -07002263 default:
2264 dev_err(DEV, "unexpected command (%s) in receive_DataRequest\n",
Philipp Reisner02918be2010-08-20 14:35:10 +02002265 cmdname(cmd));
Philipp Reisnerb411b362009-09-25 16:07:19 -07002266 fault_type = DRBD_FAULT_MAX;
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002267 goto out_free_e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002268 }
2269
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002270 /* Throttle, drbd_rs_begin_io and submit should become asynchronous
2271 * wrt the receiver, but it is not as straightforward as it may seem.
2272 * Various places in the resync start and stop logic assume resync
2273 * requests are processed in order, requeuing this on the worker thread
2274 * introduces a bunch of new code for synchronization between threads.
2275 *
2276 * Unlimited throttling before drbd_rs_begin_io may stall the resync
2277 * "forever", throttling after drbd_rs_begin_io will lock that extent
2278 * for application writes for the same time. For now, just throttle
2279 * here, where the rest of the code expects the receiver to sleep for
2280 * a while, anyways.
2281 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002282
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002283 /* Throttle before drbd_rs_begin_io, as that locks out application IO;
2284 * this defers syncer requests for some time, before letting at least
2285 * on request through. The resync controller on the receiving side
2286 * will adapt to the incoming rate accordingly.
2287 *
2288 * We cannot throttle here if remote is Primary/SyncTarget:
2289 * we would also throttle its application reads.
2290 * In that case, throttling is done on the SyncTarget only.
2291 */
Philipp Reisnere3555d82010-11-07 15:56:29 +01002292 if (mdev->state.peer != R_PRIMARY && drbd_rs_should_slow_down(mdev, sector))
2293 schedule_timeout_uninterruptible(HZ/10);
2294 if (drbd_rs_begin_io(mdev, sector))
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002295 goto out_free_e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002296
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002297submit_for_resync:
2298 atomic_add(size >> 9, &mdev->rs_sect_ev);
2299
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002300submit:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002301 inc_unacked(mdev);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002302 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002303 list_add_tail(&peer_req->w.list, &mdev->read_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002304 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002305
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01002306 if (drbd_submit_peer_request(mdev, peer_req, READ, fault_type) == 0)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002307 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002308
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002309 /* don't care for the reason here */
2310 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01002311 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002312 list_del(&peer_req->w.list);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002313 spin_unlock_irq(&mdev->tconn->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002314 /* no drbd_rs_complete_io(), we are dropping the connection anyways */
2315
Philipp Reisnerb411b362009-09-25 16:07:19 -07002316out_free_e:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002317 put_ldev(mdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002318 drbd_free_ee(mdev, peer_req);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002319 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002320}
2321
2322static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local)
2323{
2324 int self, peer, rv = -100;
2325 unsigned long ch_self, ch_peer;
2326
2327 self = mdev->ldev->md.uuid[UI_BITMAP] & 1;
2328 peer = mdev->p_uuid[UI_BITMAP] & 1;
2329
2330 ch_peer = mdev->p_uuid[UI_SIZE];
2331 ch_self = mdev->comm_bm_set;
2332
Philipp Reisner89e58e72011-01-19 13:12:45 +01002333 switch (mdev->tconn->net_conf->after_sb_0p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002334 case ASB_CONSENSUS:
2335 case ASB_DISCARD_SECONDARY:
2336 case ASB_CALL_HELPER:
2337 dev_err(DEV, "Configuration error.\n");
2338 break;
2339 case ASB_DISCONNECT:
2340 break;
2341 case ASB_DISCARD_YOUNGER_PRI:
2342 if (self == 0 && peer == 1) {
2343 rv = -1;
2344 break;
2345 }
2346 if (self == 1 && peer == 0) {
2347 rv = 1;
2348 break;
2349 }
2350 /* Else fall through to one of the other strategies... */
2351 case ASB_DISCARD_OLDER_PRI:
2352 if (self == 0 && peer == 1) {
2353 rv = 1;
2354 break;
2355 }
2356 if (self == 1 && peer == 0) {
2357 rv = -1;
2358 break;
2359 }
2360 /* Else fall through to one of the other strategies... */
Lars Ellenbergad19bf62009-10-14 09:36:49 +02002361 dev_warn(DEV, "Discard younger/older primary did not find a decision\n"
Philipp Reisnerb411b362009-09-25 16:07:19 -07002362 "Using discard-least-changes instead\n");
2363 case ASB_DISCARD_ZERO_CHG:
2364 if (ch_peer == 0 && ch_self == 0) {
Philipp Reisner25703f82011-02-07 14:35:25 +01002365 rv = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002366 ? -1 : 1;
2367 break;
2368 } else {
2369 if (ch_peer == 0) { rv = 1; break; }
2370 if (ch_self == 0) { rv = -1; break; }
2371 }
Philipp Reisner89e58e72011-01-19 13:12:45 +01002372 if (mdev->tconn->net_conf->after_sb_0p == ASB_DISCARD_ZERO_CHG)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002373 break;
2374 case ASB_DISCARD_LEAST_CHG:
2375 if (ch_self < ch_peer)
2376 rv = -1;
2377 else if (ch_self > ch_peer)
2378 rv = 1;
2379 else /* ( ch_self == ch_peer ) */
2380 /* Well, then use something else. */
Philipp Reisner25703f82011-02-07 14:35:25 +01002381 rv = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002382 ? -1 : 1;
2383 break;
2384 case ASB_DISCARD_LOCAL:
2385 rv = -1;
2386 break;
2387 case ASB_DISCARD_REMOTE:
2388 rv = 1;
2389 }
2390
2391 return rv;
2392}
2393
2394static int drbd_asb_recover_1p(struct drbd_conf *mdev) __must_hold(local)
2395{
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002396 int hg, rv = -100;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002397
Philipp Reisner89e58e72011-01-19 13:12:45 +01002398 switch (mdev->tconn->net_conf->after_sb_1p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002399 case ASB_DISCARD_YOUNGER_PRI:
2400 case ASB_DISCARD_OLDER_PRI:
2401 case ASB_DISCARD_LEAST_CHG:
2402 case ASB_DISCARD_LOCAL:
2403 case ASB_DISCARD_REMOTE:
2404 dev_err(DEV, "Configuration error.\n");
2405 break;
2406 case ASB_DISCONNECT:
2407 break;
2408 case ASB_CONSENSUS:
2409 hg = drbd_asb_recover_0p(mdev);
2410 if (hg == -1 && mdev->state.role == R_SECONDARY)
2411 rv = hg;
2412 if (hg == 1 && mdev->state.role == R_PRIMARY)
2413 rv = hg;
2414 break;
2415 case ASB_VIOLENTLY:
2416 rv = drbd_asb_recover_0p(mdev);
2417 break;
2418 case ASB_DISCARD_SECONDARY:
2419 return mdev->state.role == R_PRIMARY ? 1 : -1;
2420 case ASB_CALL_HELPER:
2421 hg = drbd_asb_recover_0p(mdev);
2422 if (hg == -1 && mdev->state.role == R_PRIMARY) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002423 enum drbd_state_rv rv2;
2424
2425 drbd_set_role(mdev, R_SECONDARY, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002426 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2427 * we might be here in C_WF_REPORT_PARAMS which is transient.
2428 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002429 rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
2430 if (rv2 != SS_SUCCESS) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002431 drbd_khelper(mdev, "pri-lost-after-sb");
2432 } else {
2433 dev_warn(DEV, "Successfully gave up primary role.\n");
2434 rv = hg;
2435 }
2436 } else
2437 rv = hg;
2438 }
2439
2440 return rv;
2441}
2442
2443static int drbd_asb_recover_2p(struct drbd_conf *mdev) __must_hold(local)
2444{
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002445 int hg, rv = -100;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002446
Philipp Reisner89e58e72011-01-19 13:12:45 +01002447 switch (mdev->tconn->net_conf->after_sb_2p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002448 case ASB_DISCARD_YOUNGER_PRI:
2449 case ASB_DISCARD_OLDER_PRI:
2450 case ASB_DISCARD_LEAST_CHG:
2451 case ASB_DISCARD_LOCAL:
2452 case ASB_DISCARD_REMOTE:
2453 case ASB_CONSENSUS:
2454 case ASB_DISCARD_SECONDARY:
2455 dev_err(DEV, "Configuration error.\n");
2456 break;
2457 case ASB_VIOLENTLY:
2458 rv = drbd_asb_recover_0p(mdev);
2459 break;
2460 case ASB_DISCONNECT:
2461 break;
2462 case ASB_CALL_HELPER:
2463 hg = drbd_asb_recover_0p(mdev);
2464 if (hg == -1) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002465 enum drbd_state_rv rv2;
2466
Philipp Reisnerb411b362009-09-25 16:07:19 -07002467 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2468 * we might be here in C_WF_REPORT_PARAMS which is transient.
2469 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002470 rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
2471 if (rv2 != SS_SUCCESS) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002472 drbd_khelper(mdev, "pri-lost-after-sb");
2473 } else {
2474 dev_warn(DEV, "Successfully gave up primary role.\n");
2475 rv = hg;
2476 }
2477 } else
2478 rv = hg;
2479 }
2480
2481 return rv;
2482}
2483
2484static void drbd_uuid_dump(struct drbd_conf *mdev, char *text, u64 *uuid,
2485 u64 bits, u64 flags)
2486{
2487 if (!uuid) {
2488 dev_info(DEV, "%s uuid info vanished while I was looking!\n", text);
2489 return;
2490 }
2491 dev_info(DEV, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n",
2492 text,
2493 (unsigned long long)uuid[UI_CURRENT],
2494 (unsigned long long)uuid[UI_BITMAP],
2495 (unsigned long long)uuid[UI_HISTORY_START],
2496 (unsigned long long)uuid[UI_HISTORY_END],
2497 (unsigned long long)bits,
2498 (unsigned long long)flags);
2499}
2500
2501/*
2502 100 after split brain try auto recover
2503 2 C_SYNC_SOURCE set BitMap
2504 1 C_SYNC_SOURCE use BitMap
2505 0 no Sync
2506 -1 C_SYNC_TARGET use BitMap
2507 -2 C_SYNC_TARGET set BitMap
2508 -100 after split brain, disconnect
2509-1000 unrelated data
Philipp Reisner4a23f262011-01-11 17:42:17 +01002510-1091 requires proto 91
2511-1096 requires proto 96
Philipp Reisnerb411b362009-09-25 16:07:19 -07002512 */
2513static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(local)
2514{
2515 u64 self, peer;
2516 int i, j;
2517
2518 self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2519 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
2520
2521 *rule_nr = 10;
2522 if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED)
2523 return 0;
2524
2525 *rule_nr = 20;
2526 if ((self == UUID_JUST_CREATED || self == (u64)0) &&
2527 peer != UUID_JUST_CREATED)
2528 return -2;
2529
2530 *rule_nr = 30;
2531 if (self != UUID_JUST_CREATED &&
2532 (peer == UUID_JUST_CREATED || peer == (u64)0))
2533 return 2;
2534
2535 if (self == peer) {
2536 int rct, dc; /* roles at crash time */
2537
2538 if (mdev->p_uuid[UI_BITMAP] == (u64)0 && mdev->ldev->md.uuid[UI_BITMAP] != (u64)0) {
2539
Philipp Reisner31890f42011-01-19 14:12:51 +01002540 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002541 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002542
2543 if ((mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
2544 (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
2545 dev_info(DEV, "was SyncSource, missed the resync finished event, corrected myself:\n");
2546 drbd_uuid_set_bm(mdev, 0UL);
2547
2548 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
2549 mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
2550 *rule_nr = 34;
2551 } else {
2552 dev_info(DEV, "was SyncSource (peer failed to write sync_uuid)\n");
2553 *rule_nr = 36;
2554 }
2555
2556 return 1;
2557 }
2558
2559 if (mdev->ldev->md.uuid[UI_BITMAP] == (u64)0 && mdev->p_uuid[UI_BITMAP] != (u64)0) {
2560
Philipp Reisner31890f42011-01-19 14:12:51 +01002561 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002562 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002563
2564 if ((mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_BITMAP] & ~((u64)1)) &&
2565 (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
2566 dev_info(DEV, "was SyncTarget, peer missed the resync finished event, corrected peer:\n");
2567
2568 mdev->p_uuid[UI_HISTORY_START + 1] = mdev->p_uuid[UI_HISTORY_START];
2569 mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_BITMAP];
2570 mdev->p_uuid[UI_BITMAP] = 0UL;
2571
2572 drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2573 *rule_nr = 35;
2574 } else {
2575 dev_info(DEV, "was SyncTarget (failed to write sync_uuid)\n");
2576 *rule_nr = 37;
2577 }
2578
2579 return -1;
2580 }
2581
2582 /* Common power [off|failure] */
2583 rct = (test_bit(CRASHED_PRIMARY, &mdev->flags) ? 1 : 0) +
2584 (mdev->p_uuid[UI_FLAGS] & 2);
2585 /* lowest bit is set when we were primary,
2586 * next bit (weight 2) is set when peer was primary */
2587 *rule_nr = 40;
2588
2589 switch (rct) {
2590 case 0: /* !self_pri && !peer_pri */ return 0;
2591 case 1: /* self_pri && !peer_pri */ return 1;
2592 case 2: /* !self_pri && peer_pri */ return -1;
2593 case 3: /* self_pri && peer_pri */
Philipp Reisner25703f82011-02-07 14:35:25 +01002594 dc = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002595 return dc ? -1 : 1;
2596 }
2597 }
2598
2599 *rule_nr = 50;
2600 peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1);
2601 if (self == peer)
2602 return -1;
2603
2604 *rule_nr = 51;
2605 peer = mdev->p_uuid[UI_HISTORY_START] & ~((u64)1);
2606 if (self == peer) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002607 if (mdev->tconn->agreed_pro_version < 96 ?
Philipp Reisner4a23f262011-01-11 17:42:17 +01002608 (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
2609 (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
2610 peer + UUID_NEW_BM_OFFSET == (mdev->p_uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002611 /* The last P_SYNC_UUID did not get though. Undo the last start of
2612 resync as sync source modifications of the peer's UUIDs. */
2613
Philipp Reisner31890f42011-01-19 14:12:51 +01002614 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002615 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002616
2617 mdev->p_uuid[UI_BITMAP] = mdev->p_uuid[UI_HISTORY_START];
2618 mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_HISTORY_START + 1];
Philipp Reisner4a23f262011-01-11 17:42:17 +01002619
2620 dev_info(DEV, "Did not got last syncUUID packet, corrected:\n");
2621 drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2622
Philipp Reisnerb411b362009-09-25 16:07:19 -07002623 return -1;
2624 }
2625 }
2626
2627 *rule_nr = 60;
2628 self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2629 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2630 peer = mdev->p_uuid[i] & ~((u64)1);
2631 if (self == peer)
2632 return -2;
2633 }
2634
2635 *rule_nr = 70;
2636 self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2637 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
2638 if (self == peer)
2639 return 1;
2640
2641 *rule_nr = 71;
2642 self = mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
2643 if (self == peer) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002644 if (mdev->tconn->agreed_pro_version < 96 ?
Philipp Reisner4a23f262011-01-11 17:42:17 +01002645 (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
2646 (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
2647 self + UUID_NEW_BM_OFFSET == (mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002648 /* The last P_SYNC_UUID did not get though. Undo the last start of
2649 resync as sync source modifications of our UUIDs. */
2650
Philipp Reisner31890f42011-01-19 14:12:51 +01002651 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002652 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002653
2654 _drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_HISTORY_START]);
2655 _drbd_uuid_set(mdev, UI_HISTORY_START, mdev->ldev->md.uuid[UI_HISTORY_START + 1]);
2656
Philipp Reisner4a23f262011-01-11 17:42:17 +01002657 dev_info(DEV, "Last syncUUID did not get through, corrected:\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002658 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
2659 mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
2660
2661 return 1;
2662 }
2663 }
2664
2665
2666 *rule_nr = 80;
Philipp Reisnerd8c2a362009-11-18 15:52:51 +01002667 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002668 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2669 self = mdev->ldev->md.uuid[i] & ~((u64)1);
2670 if (self == peer)
2671 return 2;
2672 }
2673
2674 *rule_nr = 90;
2675 self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2676 peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1);
2677 if (self == peer && self != ((u64)0))
2678 return 100;
2679
2680 *rule_nr = 100;
2681 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2682 self = mdev->ldev->md.uuid[i] & ~((u64)1);
2683 for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) {
2684 peer = mdev->p_uuid[j] & ~((u64)1);
2685 if (self == peer)
2686 return -100;
2687 }
2688 }
2689
2690 return -1000;
2691}
2692
2693/* drbd_sync_handshake() returns the new conn state on success, or
2694 CONN_MASK (-1) on failure.
2695 */
2696static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_role peer_role,
2697 enum drbd_disk_state peer_disk) __must_hold(local)
2698{
2699 int hg, rule_nr;
2700 enum drbd_conns rv = C_MASK;
2701 enum drbd_disk_state mydisk;
2702
2703 mydisk = mdev->state.disk;
2704 if (mydisk == D_NEGOTIATING)
2705 mydisk = mdev->new_state_tmp.disk;
2706
2707 dev_info(DEV, "drbd_sync_handshake:\n");
2708 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid, mdev->comm_bm_set, 0);
2709 drbd_uuid_dump(mdev, "peer", mdev->p_uuid,
2710 mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2711
2712 hg = drbd_uuid_compare(mdev, &rule_nr);
2713
2714 dev_info(DEV, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
2715
2716 if (hg == -1000) {
2717 dev_alert(DEV, "Unrelated data, aborting!\n");
2718 return C_MASK;
2719 }
Philipp Reisner4a23f262011-01-11 17:42:17 +01002720 if (hg < -1000) {
2721 dev_alert(DEV, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002722 return C_MASK;
2723 }
2724
2725 if ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) ||
2726 (peer_disk == D_INCONSISTENT && mydisk > D_INCONSISTENT)) {
2727 int f = (hg == -100) || abs(hg) == 2;
2728 hg = mydisk > D_INCONSISTENT ? 1 : -1;
2729 if (f)
2730 hg = hg*2;
2731 dev_info(DEV, "Becoming sync %s due to disk states.\n",
2732 hg > 0 ? "source" : "target");
2733 }
2734
Adam Gandelman3a11a482010-04-08 16:48:23 -07002735 if (abs(hg) == 100)
2736 drbd_khelper(mdev, "initial-split-brain");
2737
Philipp Reisner89e58e72011-01-19 13:12:45 +01002738 if (hg == 100 || (hg == -100 && mdev->tconn->net_conf->always_asbp)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002739 int pcount = (mdev->state.role == R_PRIMARY)
2740 + (peer_role == R_PRIMARY);
2741 int forced = (hg == -100);
2742
2743 switch (pcount) {
2744 case 0:
2745 hg = drbd_asb_recover_0p(mdev);
2746 break;
2747 case 1:
2748 hg = drbd_asb_recover_1p(mdev);
2749 break;
2750 case 2:
2751 hg = drbd_asb_recover_2p(mdev);
2752 break;
2753 }
2754 if (abs(hg) < 100) {
2755 dev_warn(DEV, "Split-Brain detected, %d primaries, "
2756 "automatically solved. Sync from %s node\n",
2757 pcount, (hg < 0) ? "peer" : "this");
2758 if (forced) {
2759 dev_warn(DEV, "Doing a full sync, since"
2760 " UUIDs where ambiguous.\n");
2761 hg = hg*2;
2762 }
2763 }
2764 }
2765
2766 if (hg == -100) {
Philipp Reisner89e58e72011-01-19 13:12:45 +01002767 if (mdev->tconn->net_conf->want_lose && !(mdev->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002768 hg = -1;
Philipp Reisner89e58e72011-01-19 13:12:45 +01002769 if (!mdev->tconn->net_conf->want_lose && (mdev->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002770 hg = 1;
2771
2772 if (abs(hg) < 100)
2773 dev_warn(DEV, "Split-Brain detected, manually solved. "
2774 "Sync from %s node\n",
2775 (hg < 0) ? "peer" : "this");
2776 }
2777
2778 if (hg == -100) {
Lars Ellenberg580b9762010-02-26 23:15:23 +01002779 /* FIXME this log message is not correct if we end up here
2780 * after an attempted attach on a diskless node.
2781 * We just refuse to attach -- well, we drop the "connection"
2782 * to that disk, in a way... */
Adam Gandelman3a11a482010-04-08 16:48:23 -07002783 dev_alert(DEV, "Split-Brain detected but unresolved, dropping connection!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002784 drbd_khelper(mdev, "split-brain");
2785 return C_MASK;
2786 }
2787
2788 if (hg > 0 && mydisk <= D_INCONSISTENT) {
2789 dev_err(DEV, "I shall become SyncSource, but I am inconsistent!\n");
2790 return C_MASK;
2791 }
2792
2793 if (hg < 0 && /* by intention we do not use mydisk here. */
2794 mdev->state.role == R_PRIMARY && mdev->state.disk >= D_CONSISTENT) {
Philipp Reisner89e58e72011-01-19 13:12:45 +01002795 switch (mdev->tconn->net_conf->rr_conflict) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002796 case ASB_CALL_HELPER:
2797 drbd_khelper(mdev, "pri-lost");
2798 /* fall through */
2799 case ASB_DISCONNECT:
2800 dev_err(DEV, "I shall become SyncTarget, but I am primary!\n");
2801 return C_MASK;
2802 case ASB_VIOLENTLY:
2803 dev_warn(DEV, "Becoming SyncTarget, violating the stable-data"
2804 "assumption\n");
2805 }
2806 }
2807
Philipp Reisner8169e412011-03-15 18:40:27 +01002808 if (mdev->tconn->net_conf->dry_run || test_bit(CONN_DRY_RUN, &mdev->tconn->flags)) {
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002809 if (hg == 0)
2810 dev_info(DEV, "dry-run connect: No resync, would become Connected immediately.\n");
2811 else
2812 dev_info(DEV, "dry-run connect: Would become %s, doing a %s resync.",
2813 drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET),
2814 abs(hg) >= 2 ? "full" : "bit-map based");
2815 return C_MASK;
2816 }
2817
Philipp Reisnerb411b362009-09-25 16:07:19 -07002818 if (abs(hg) >= 2) {
2819 dev_info(DEV, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01002820 if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
2821 BM_LOCKED_SET_ALLOWED))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002822 return C_MASK;
2823 }
2824
2825 if (hg > 0) { /* become sync source. */
2826 rv = C_WF_BITMAP_S;
2827 } else if (hg < 0) { /* become sync target */
2828 rv = C_WF_BITMAP_T;
2829 } else {
2830 rv = C_CONNECTED;
2831 if (drbd_bm_total_weight(mdev)) {
2832 dev_info(DEV, "No resync, but %lu bits in bitmap!\n",
2833 drbd_bm_total_weight(mdev));
2834 }
2835 }
2836
2837 return rv;
2838}
2839
2840/* returns 1 if invalid */
2841static int cmp_after_sb(enum drbd_after_sb_p peer, enum drbd_after_sb_p self)
2842{
2843 /* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */
2844 if ((peer == ASB_DISCARD_REMOTE && self == ASB_DISCARD_LOCAL) ||
2845 (self == ASB_DISCARD_REMOTE && peer == ASB_DISCARD_LOCAL))
2846 return 0;
2847
2848 /* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */
2849 if (peer == ASB_DISCARD_REMOTE || peer == ASB_DISCARD_LOCAL ||
2850 self == ASB_DISCARD_REMOTE || self == ASB_DISCARD_LOCAL)
2851 return 1;
2852
2853 /* everything else is valid if they are equal on both sides. */
2854 if (peer == self)
2855 return 0;
2856
2857 /* everything es is invalid. */
2858 return 1;
2859}
2860
Philipp Reisner72046242011-03-15 18:51:47 +01002861static int receive_protocol(struct drbd_tconn *tconn, enum drbd_packet cmd,
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01002862 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002863{
Philipp Reisner72046242011-03-15 18:51:47 +01002864 struct p_protocol *p = &tconn->data.rbuf.protocol;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002865 int p_proto, p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002866 int p_want_lose, p_two_primaries, cf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002867 char p_integrity_alg[SHARED_SECRET_MAX] = "";
2868
Philipp Reisnerb411b362009-09-25 16:07:19 -07002869 p_proto = be32_to_cpu(p->protocol);
2870 p_after_sb_0p = be32_to_cpu(p->after_sb_0p);
2871 p_after_sb_1p = be32_to_cpu(p->after_sb_1p);
2872 p_after_sb_2p = be32_to_cpu(p->after_sb_2p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002873 p_two_primaries = be32_to_cpu(p->two_primaries);
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002874 cf = be32_to_cpu(p->conn_flags);
2875 p_want_lose = cf & CF_WANT_LOSE;
2876
Philipp Reisner72046242011-03-15 18:51:47 +01002877 clear_bit(CONN_DRY_RUN, &tconn->flags);
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002878
2879 if (cf & CF_DRY_RUN)
Philipp Reisner72046242011-03-15 18:51:47 +01002880 set_bit(CONN_DRY_RUN, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002881
Philipp Reisner72046242011-03-15 18:51:47 +01002882 if (p_proto != tconn->net_conf->wire_protocol) {
2883 conn_err(tconn, "incompatible communication protocols\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002884 goto disconnect;
2885 }
2886
Philipp Reisner72046242011-03-15 18:51:47 +01002887 if (cmp_after_sb(p_after_sb_0p, tconn->net_conf->after_sb_0p)) {
2888 conn_err(tconn, "incompatible after-sb-0pri settings\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002889 goto disconnect;
2890 }
2891
Philipp Reisner72046242011-03-15 18:51:47 +01002892 if (cmp_after_sb(p_after_sb_1p, tconn->net_conf->after_sb_1p)) {
2893 conn_err(tconn, "incompatible after-sb-1pri settings\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002894 goto disconnect;
2895 }
2896
Philipp Reisner72046242011-03-15 18:51:47 +01002897 if (cmp_after_sb(p_after_sb_2p, tconn->net_conf->after_sb_2p)) {
2898 conn_err(tconn, "incompatible after-sb-2pri settings\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002899 goto disconnect;
2900 }
2901
Philipp Reisner72046242011-03-15 18:51:47 +01002902 if (p_want_lose && tconn->net_conf->want_lose) {
2903 conn_err(tconn, "both sides have the 'want_lose' flag set\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002904 goto disconnect;
2905 }
2906
Philipp Reisner72046242011-03-15 18:51:47 +01002907 if (p_two_primaries != tconn->net_conf->two_primaries) {
2908 conn_err(tconn, "incompatible setting of the two-primaries options\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002909 goto disconnect;
2910 }
2911
Philipp Reisner72046242011-03-15 18:51:47 +01002912 if (tconn->agreed_pro_version >= 87) {
2913 unsigned char *my_alg = tconn->net_conf->integrity_alg;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002914
Philipp Reisner72046242011-03-15 18:51:47 +01002915 if (drbd_recv(tconn, p_integrity_alg, data_size) != data_size)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002916 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002917
2918 p_integrity_alg[SHARED_SECRET_MAX-1] = 0;
2919 if (strcmp(p_integrity_alg, my_alg)) {
Philipp Reisner72046242011-03-15 18:51:47 +01002920 conn_err(tconn, "incompatible setting of the data-integrity-alg\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002921 goto disconnect;
2922 }
Philipp Reisner72046242011-03-15 18:51:47 +01002923 conn_info(tconn, "data-integrity-alg: %s\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07002924 my_alg[0] ? my_alg : (unsigned char *)"<not-used>");
2925 }
2926
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002927 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002928
2929disconnect:
Philipp Reisner72046242011-03-15 18:51:47 +01002930 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002931 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002932}
2933
2934/* helper function
2935 * input: alg name, feature name
2936 * return: NULL (alg name was "")
2937 * ERR_PTR(error) if something goes wrong
2938 * or the crypto hash ptr, if it worked out ok. */
2939struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_conf *mdev,
2940 const char *alg, const char *name)
2941{
2942 struct crypto_hash *tfm;
2943
2944 if (!alg[0])
2945 return NULL;
2946
2947 tfm = crypto_alloc_hash(alg, 0, CRYPTO_ALG_ASYNC);
2948 if (IS_ERR(tfm)) {
2949 dev_err(DEV, "Can not allocate \"%s\" as %s (reason: %ld)\n",
2950 alg, name, PTR_ERR(tfm));
2951 return tfm;
2952 }
2953 if (!drbd_crypto_is_hash(crypto_hash_tfm(tfm))) {
2954 crypto_free_hash(tfm);
2955 dev_err(DEV, "\"%s\" is not a digest (%s)\n", alg, name);
2956 return ERR_PTR(-EINVAL);
2957 }
2958 return tfm;
2959}
2960
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01002961static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packet cmd,
2962 unsigned int packet_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002963{
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002964 int ok = true;
Philipp Reisnere42325a2011-01-19 13:55:45 +01002965 struct p_rs_param_95 *p = &mdev->tconn->data.rbuf.rs_param_95;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002966 unsigned int header_size, data_size, exp_max_sz;
2967 struct crypto_hash *verify_tfm = NULL;
2968 struct crypto_hash *csums_tfm = NULL;
Philipp Reisner31890f42011-01-19 14:12:51 +01002969 const int apv = mdev->tconn->agreed_pro_version;
Philipp Reisner778f2712010-07-06 11:14:00 +02002970 int *rs_plan_s = NULL;
2971 int fifo_size = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002972
2973 exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param)
2974 : apv == 88 ? sizeof(struct p_rs_param)
2975 + SHARED_SECRET_MAX
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002976 : apv <= 94 ? sizeof(struct p_rs_param_89)
2977 : /* apv >= 95 */ sizeof(struct p_rs_param_95);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002978
Philipp Reisner02918be2010-08-20 14:35:10 +02002979 if (packet_size > exp_max_sz) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002980 dev_err(DEV, "SyncParam packet too long: received %u, expected <= %u bytes\n",
Philipp Reisner02918be2010-08-20 14:35:10 +02002981 packet_size, exp_max_sz);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002982 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002983 }
2984
2985 if (apv <= 88) {
Philipp Reisner257d0af2011-01-26 12:15:29 +01002986 header_size = sizeof(struct p_rs_param) - sizeof(struct p_header);
Philipp Reisner02918be2010-08-20 14:35:10 +02002987 data_size = packet_size - header_size;
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002988 } else if (apv <= 94) {
Philipp Reisner257d0af2011-01-26 12:15:29 +01002989 header_size = sizeof(struct p_rs_param_89) - sizeof(struct p_header);
Philipp Reisner02918be2010-08-20 14:35:10 +02002990 data_size = packet_size - header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002991 D_ASSERT(data_size == 0);
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002992 } else {
Philipp Reisner257d0af2011-01-26 12:15:29 +01002993 header_size = sizeof(struct p_rs_param_95) - sizeof(struct p_header);
Philipp Reisner02918be2010-08-20 14:35:10 +02002994 data_size = packet_size - header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002995 D_ASSERT(data_size == 0);
2996 }
2997
2998 /* initialize verify_alg and csums_alg */
2999 memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
3000
Philipp Reisnerde0ff332011-02-07 16:56:20 +01003001 if (drbd_recv(mdev->tconn, &p->head.payload, header_size) != header_size)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003002 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003003
3004 mdev->sync_conf.rate = be32_to_cpu(p->rate);
3005
3006 if (apv >= 88) {
3007 if (apv == 88) {
3008 if (data_size > SHARED_SECRET_MAX) {
3009 dev_err(DEV, "verify-alg too long, "
3010 "peer wants %u, accepting only %u byte\n",
3011 data_size, SHARED_SECRET_MAX);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003012 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003013 }
3014
Philipp Reisnerde0ff332011-02-07 16:56:20 +01003015 if (drbd_recv(mdev->tconn, p->verify_alg, data_size) != data_size)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003016 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003017
3018 /* we expect NUL terminated string */
3019 /* but just in case someone tries to be evil */
3020 D_ASSERT(p->verify_alg[data_size-1] == 0);
3021 p->verify_alg[data_size-1] = 0;
3022
3023 } else /* apv >= 89 */ {
3024 /* we still expect NUL terminated strings */
3025 /* but just in case someone tries to be evil */
3026 D_ASSERT(p->verify_alg[SHARED_SECRET_MAX-1] == 0);
3027 D_ASSERT(p->csums_alg[SHARED_SECRET_MAX-1] == 0);
3028 p->verify_alg[SHARED_SECRET_MAX-1] = 0;
3029 p->csums_alg[SHARED_SECRET_MAX-1] = 0;
3030 }
3031
3032 if (strcmp(mdev->sync_conf.verify_alg, p->verify_alg)) {
3033 if (mdev->state.conn == C_WF_REPORT_PARAMS) {
3034 dev_err(DEV, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
3035 mdev->sync_conf.verify_alg, p->verify_alg);
3036 goto disconnect;
3037 }
3038 verify_tfm = drbd_crypto_alloc_digest_safe(mdev,
3039 p->verify_alg, "verify-alg");
3040 if (IS_ERR(verify_tfm)) {
3041 verify_tfm = NULL;
3042 goto disconnect;
3043 }
3044 }
3045
3046 if (apv >= 89 && strcmp(mdev->sync_conf.csums_alg, p->csums_alg)) {
3047 if (mdev->state.conn == C_WF_REPORT_PARAMS) {
3048 dev_err(DEV, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
3049 mdev->sync_conf.csums_alg, p->csums_alg);
3050 goto disconnect;
3051 }
3052 csums_tfm = drbd_crypto_alloc_digest_safe(mdev,
3053 p->csums_alg, "csums-alg");
3054 if (IS_ERR(csums_tfm)) {
3055 csums_tfm = NULL;
3056 goto disconnect;
3057 }
3058 }
3059
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003060 if (apv > 94) {
3061 mdev->sync_conf.rate = be32_to_cpu(p->rate);
3062 mdev->sync_conf.c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
3063 mdev->sync_conf.c_delay_target = be32_to_cpu(p->c_delay_target);
3064 mdev->sync_conf.c_fill_target = be32_to_cpu(p->c_fill_target);
3065 mdev->sync_conf.c_max_rate = be32_to_cpu(p->c_max_rate);
Philipp Reisner778f2712010-07-06 11:14:00 +02003066
3067 fifo_size = (mdev->sync_conf.c_plan_ahead * 10 * SLEEP_TIME) / HZ;
3068 if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) {
3069 rs_plan_s = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL);
3070 if (!rs_plan_s) {
3071 dev_err(DEV, "kmalloc of fifo_buffer failed");
3072 goto disconnect;
3073 }
3074 }
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003075 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003076
3077 spin_lock(&mdev->peer_seq_lock);
3078 /* lock against drbd_nl_syncer_conf() */
3079 if (verify_tfm) {
3080 strcpy(mdev->sync_conf.verify_alg, p->verify_alg);
3081 mdev->sync_conf.verify_alg_len = strlen(p->verify_alg) + 1;
3082 crypto_free_hash(mdev->verify_tfm);
3083 mdev->verify_tfm = verify_tfm;
3084 dev_info(DEV, "using verify-alg: \"%s\"\n", p->verify_alg);
3085 }
3086 if (csums_tfm) {
3087 strcpy(mdev->sync_conf.csums_alg, p->csums_alg);
3088 mdev->sync_conf.csums_alg_len = strlen(p->csums_alg) + 1;
3089 crypto_free_hash(mdev->csums_tfm);
3090 mdev->csums_tfm = csums_tfm;
3091 dev_info(DEV, "using csums-alg: \"%s\"\n", p->csums_alg);
3092 }
Philipp Reisner778f2712010-07-06 11:14:00 +02003093 if (fifo_size != mdev->rs_plan_s.size) {
3094 kfree(mdev->rs_plan_s.values);
3095 mdev->rs_plan_s.values = rs_plan_s;
3096 mdev->rs_plan_s.size = fifo_size;
3097 mdev->rs_planed = 0;
3098 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003099 spin_unlock(&mdev->peer_seq_lock);
3100 }
3101
3102 return ok;
3103disconnect:
3104 /* just for completeness: actually not needed,
3105 * as this is not reached if csums_tfm was ok. */
3106 crypto_free_hash(csums_tfm);
3107 /* but free the verify_tfm again, if csums_tfm did not work out */
3108 crypto_free_hash(verify_tfm);
Philipp Reisner38fa9982011-03-15 18:24:49 +01003109 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003110 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003111}
3112
Philipp Reisnerb411b362009-09-25 16:07:19 -07003113/* warn if the arguments differ by more than 12.5% */
3114static void warn_if_differ_considerably(struct drbd_conf *mdev,
3115 const char *s, sector_t a, sector_t b)
3116{
3117 sector_t d;
3118 if (a == 0 || b == 0)
3119 return;
3120 d = (a > b) ? (a - b) : (b - a);
3121 if (d > (a>>3) || d > (b>>3))
3122 dev_warn(DEV, "Considerable difference in %s: %llus vs. %llus\n", s,
3123 (unsigned long long)a, (unsigned long long)b);
3124}
3125
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003126static int receive_sizes(struct drbd_conf *mdev, enum drbd_packet cmd,
3127 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003128{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003129 struct p_sizes *p = &mdev->tconn->data.rbuf.sizes;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003130 enum determine_dev_size dd = unchanged;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003131 sector_t p_size, p_usize, my_usize;
3132 int ldsc = 0; /* local disk size changed */
Philipp Reisnere89b5912010-03-24 17:11:33 +01003133 enum dds_flags ddsf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003134
Philipp Reisnerb411b362009-09-25 16:07:19 -07003135 p_size = be64_to_cpu(p->d_size);
3136 p_usize = be64_to_cpu(p->u_size);
3137
Philipp Reisnerb411b362009-09-25 16:07:19 -07003138 /* just store the peer's disk size for now.
3139 * we still need to figure out whether we accept that. */
3140 mdev->p_size = p_size;
3141
Philipp Reisnerb411b362009-09-25 16:07:19 -07003142 if (get_ldev(mdev)) {
3143 warn_if_differ_considerably(mdev, "lower level device sizes",
3144 p_size, drbd_get_max_capacity(mdev->ldev));
3145 warn_if_differ_considerably(mdev, "user requested size",
3146 p_usize, mdev->ldev->dc.disk_size);
3147
3148 /* if this is the first connect, or an otherwise expected
3149 * param exchange, choose the minimum */
3150 if (mdev->state.conn == C_WF_REPORT_PARAMS)
3151 p_usize = min_not_zero((sector_t)mdev->ldev->dc.disk_size,
3152 p_usize);
3153
3154 my_usize = mdev->ldev->dc.disk_size;
3155
3156 if (mdev->ldev->dc.disk_size != p_usize) {
3157 mdev->ldev->dc.disk_size = p_usize;
3158 dev_info(DEV, "Peer sets u_size to %lu sectors\n",
3159 (unsigned long)mdev->ldev->dc.disk_size);
3160 }
3161
3162 /* Never shrink a device with usable data during connect.
3163 But allow online shrinking if we are connected. */
Philipp Reisnera393db62009-12-22 13:35:52 +01003164 if (drbd_new_dev_size(mdev, mdev->ldev, 0) <
Philipp Reisnerb411b362009-09-25 16:07:19 -07003165 drbd_get_capacity(mdev->this_bdev) &&
3166 mdev->state.disk >= D_OUTDATED &&
3167 mdev->state.conn < C_CONNECTED) {
3168 dev_err(DEV, "The peer's disk size is too small!\n");
Philipp Reisner38fa9982011-03-15 18:24:49 +01003169 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003170 mdev->ldev->dc.disk_size = my_usize;
3171 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003172 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003173 }
3174 put_ldev(mdev);
3175 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003176
Philipp Reisnere89b5912010-03-24 17:11:33 +01003177 ddsf = be16_to_cpu(p->dds_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003178 if (get_ldev(mdev)) {
Bart Van Assche24c48302011-05-21 18:32:29 +02003179 dd = drbd_determine_dev_size(mdev, ddsf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003180 put_ldev(mdev);
3181 if (dd == dev_size_error)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003182 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003183 drbd_md_sync(mdev);
3184 } else {
3185 /* I am diskless, need to accept the peer's size. */
3186 drbd_set_my_capacity(mdev, p_size);
3187 }
3188
Philipp Reisner99432fc2011-05-20 16:39:13 +02003189 mdev->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
3190 drbd_reconsider_max_bio_size(mdev);
3191
Philipp Reisnerb411b362009-09-25 16:07:19 -07003192 if (get_ldev(mdev)) {
3193 if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) {
3194 mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev);
3195 ldsc = 1;
3196 }
3197
Philipp Reisnerb411b362009-09-25 16:07:19 -07003198 put_ldev(mdev);
3199 }
3200
3201 if (mdev->state.conn > C_WF_REPORT_PARAMS) {
3202 if (be64_to_cpu(p->c_size) !=
3203 drbd_get_capacity(mdev->this_bdev) || ldsc) {
3204 /* we have different sizes, probably peer
3205 * needs to know my new size... */
Philipp Reisnere89b5912010-03-24 17:11:33 +01003206 drbd_send_sizes(mdev, 0, ddsf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003207 }
3208 if (test_and_clear_bit(RESIZE_PENDING, &mdev->flags) ||
3209 (dd == grew && mdev->state.conn == C_CONNECTED)) {
3210 if (mdev->state.pdsk >= D_INCONSISTENT &&
Philipp Reisnere89b5912010-03-24 17:11:33 +01003211 mdev->state.disk >= D_INCONSISTENT) {
3212 if (ddsf & DDSF_NO_RESYNC)
3213 dev_info(DEV, "Resync of new storage suppressed with --assume-clean\n");
3214 else
3215 resync_after_online_grow(mdev);
3216 } else
Philipp Reisnerb411b362009-09-25 16:07:19 -07003217 set_bit(RESYNC_AFTER_NEG, &mdev->flags);
3218 }
3219 }
3220
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003221 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003222}
3223
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003224static int receive_uuids(struct drbd_conf *mdev, enum drbd_packet cmd,
3225 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003226{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003227 struct p_uuids *p = &mdev->tconn->data.rbuf.uuids;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003228 u64 *p_uuid;
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003229 int i, updated_uuids = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003230
Philipp Reisnerb411b362009-09-25 16:07:19 -07003231 p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO);
3232
3233 for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
3234 p_uuid[i] = be64_to_cpu(p->uuid[i]);
3235
3236 kfree(mdev->p_uuid);
3237 mdev->p_uuid = p_uuid;
3238
3239 if (mdev->state.conn < C_CONNECTED &&
3240 mdev->state.disk < D_INCONSISTENT &&
3241 mdev->state.role == R_PRIMARY &&
3242 (mdev->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
3243 dev_err(DEV, "Can only connect to data with current UUID=%016llX\n",
3244 (unsigned long long)mdev->ed_uuid);
Philipp Reisner38fa9982011-03-15 18:24:49 +01003245 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003246 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003247 }
3248
3249 if (get_ldev(mdev)) {
3250 int skip_initial_sync =
3251 mdev->state.conn == C_CONNECTED &&
Philipp Reisner31890f42011-01-19 14:12:51 +01003252 mdev->tconn->agreed_pro_version >= 90 &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003253 mdev->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
3254 (p_uuid[UI_FLAGS] & 8);
3255 if (skip_initial_sync) {
3256 dev_info(DEV, "Accepted new current UUID, preparing to skip initial sync\n");
3257 drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write,
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003258 "clear_n_write from receive_uuids",
3259 BM_LOCKED_TEST_ALLOWED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003260 _drbd_uuid_set(mdev, UI_CURRENT, p_uuid[UI_CURRENT]);
3261 _drbd_uuid_set(mdev, UI_BITMAP, 0);
3262 _drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
3263 CS_VERBOSE, NULL);
3264 drbd_md_sync(mdev);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003265 updated_uuids = 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003266 }
3267 put_ldev(mdev);
Philipp Reisner18a50fa2010-06-21 14:14:15 +02003268 } else if (mdev->state.disk < D_INCONSISTENT &&
3269 mdev->state.role == R_PRIMARY) {
3270 /* I am a diskless primary, the peer just created a new current UUID
3271 for me. */
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003272 updated_uuids = drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003273 }
3274
3275 /* Before we test for the disk state, we should wait until an eventually
3276 ongoing cluster wide state change is finished. That is important if
3277 we are primary and are detaching from our disk. We need to see the
3278 new disk state... */
Philipp Reisner8410da82011-02-11 20:11:10 +01003279 mutex_lock(mdev->state_mutex);
3280 mutex_unlock(mdev->state_mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003281 if (mdev->state.conn >= C_CONNECTED && mdev->state.disk < D_INCONSISTENT)
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003282 updated_uuids |= drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
3283
3284 if (updated_uuids)
3285 drbd_print_uuids(mdev, "receiver updated UUIDs to");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003286
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003287 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003288}
3289
3290/**
3291 * convert_state() - Converts the peer's view of the cluster state to our point of view
3292 * @ps: The state as seen by the peer.
3293 */
3294static union drbd_state convert_state(union drbd_state ps)
3295{
3296 union drbd_state ms;
3297
3298 static enum drbd_conns c_tab[] = {
3299 [C_CONNECTED] = C_CONNECTED,
3300
3301 [C_STARTING_SYNC_S] = C_STARTING_SYNC_T,
3302 [C_STARTING_SYNC_T] = C_STARTING_SYNC_S,
3303 [C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */
3304 [C_VERIFY_S] = C_VERIFY_T,
3305 [C_MASK] = C_MASK,
3306 };
3307
3308 ms.i = ps.i;
3309
3310 ms.conn = c_tab[ps.conn];
3311 ms.peer = ps.role;
3312 ms.role = ps.peer;
3313 ms.pdsk = ps.disk;
3314 ms.disk = ps.pdsk;
3315 ms.peer_isp = (ps.aftr_isp | ps.user_isp);
3316
3317 return ms;
3318}
3319
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003320static int receive_req_state(struct drbd_conf *mdev, enum drbd_packet cmd,
3321 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003322{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003323 struct p_req_state *p = &mdev->tconn->data.rbuf.req_state;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003324 union drbd_state mask, val;
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +01003325 enum drbd_state_rv rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003326
Philipp Reisnerb411b362009-09-25 16:07:19 -07003327 mask.i = be32_to_cpu(p->mask);
3328 val.i = be32_to_cpu(p->val);
3329
Philipp Reisner25703f82011-02-07 14:35:25 +01003330 if (test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags) &&
Philipp Reisner8410da82011-02-11 20:11:10 +01003331 mutex_is_locked(mdev->state_mutex)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003332 drbd_send_sr_reply(mdev, SS_CONCURRENT_ST_CHG);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003333 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003334 }
3335
3336 mask = convert_state(mask);
3337 val = convert_state(val);
3338
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003339 rv = drbd_change_state(mdev, CS_VERBOSE, mask, val);
3340 drbd_send_sr_reply(mdev, rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003341
Philipp Reisnerb411b362009-09-25 16:07:19 -07003342 drbd_md_sync(mdev);
3343
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003344 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003345}
3346
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003347static int receive_req_conn_state(struct drbd_tconn *tconn, enum drbd_packet cmd,
3348 unsigned int data_size)
3349{
3350 struct p_req_state *p = &tconn->data.rbuf.req_state;
3351 union drbd_state mask, val;
3352 enum drbd_state_rv rv;
3353
3354 mask.i = be32_to_cpu(p->mask);
3355 val.i = be32_to_cpu(p->val);
3356
3357 if (test_bit(DISCARD_CONCURRENT, &tconn->flags) &&
3358 mutex_is_locked(&tconn->cstate_mutex)) {
3359 conn_send_sr_reply(tconn, SS_CONCURRENT_ST_CHG);
3360 return true;
3361 }
3362
3363 mask = convert_state(mask);
3364 val = convert_state(val);
3365
3366 rv = conn_request_state(tconn, mask, val, CS_VERBOSE | CS_LOCAL_ONLY);
3367 conn_send_sr_reply(tconn, rv);
3368
3369 return true;
3370}
3371
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003372static int receive_state(struct drbd_conf *mdev, enum drbd_packet cmd,
3373 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003374{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003375 struct p_state *p = &mdev->tconn->data.rbuf.state;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003376 union drbd_state os, ns, peer_state;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003377 enum drbd_disk_state real_peer_disk;
Philipp Reisner65d922c2010-06-16 16:18:09 +02003378 enum chg_state_flags cs_flags;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003379 int rv;
3380
Philipp Reisnerb411b362009-09-25 16:07:19 -07003381 peer_state.i = be32_to_cpu(p->state);
3382
3383 real_peer_disk = peer_state.disk;
3384 if (peer_state.disk == D_NEGOTIATING) {
3385 real_peer_disk = mdev->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT;
3386 dev_info(DEV, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
3387 }
3388
Philipp Reisner87eeee42011-01-19 14:16:30 +01003389 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003390 retry:
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003391 os = ns = mdev->state;
Philipp Reisner87eeee42011-01-19 14:16:30 +01003392 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003393
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003394 /* peer says his disk is uptodate, while we think it is inconsistent,
3395 * and this happens while we think we have a sync going on. */
3396 if (os.pdsk == D_INCONSISTENT && real_peer_disk == D_UP_TO_DATE &&
3397 os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
3398 /* If we are (becoming) SyncSource, but peer is still in sync
3399 * preparation, ignore its uptodate-ness to avoid flapping, it
3400 * will change to inconsistent once the peer reaches active
3401 * syncing states.
3402 * It may have changed syncer-paused flags, however, so we
3403 * cannot ignore this completely. */
3404 if (peer_state.conn > C_CONNECTED &&
3405 peer_state.conn < C_SYNC_SOURCE)
3406 real_peer_disk = D_INCONSISTENT;
3407
3408 /* if peer_state changes to connected at the same time,
3409 * it explicitly notifies us that it finished resync.
3410 * Maybe we should finish it up, too? */
3411 else if (os.conn >= C_SYNC_SOURCE &&
3412 peer_state.conn == C_CONNECTED) {
3413 if (drbd_bm_total_weight(mdev) <= mdev->rs_failed)
3414 drbd_resync_finished(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003415 return true;
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003416 }
3417 }
3418
3419 /* peer says his disk is inconsistent, while we think it is uptodate,
3420 * and this happens while the peer still thinks we have a sync going on,
3421 * but we think we are already done with the sync.
3422 * We ignore this to avoid flapping pdsk.
3423 * This should not happen, if the peer is a recent version of drbd. */
3424 if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT &&
3425 os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE)
3426 real_peer_disk = D_UP_TO_DATE;
3427
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003428 if (ns.conn == C_WF_REPORT_PARAMS)
3429 ns.conn = C_CONNECTED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003430
Philipp Reisner67531712010-10-27 12:21:30 +02003431 if (peer_state.conn == C_AHEAD)
3432 ns.conn = C_BEHIND;
3433
Philipp Reisnerb411b362009-09-25 16:07:19 -07003434 if (mdev->p_uuid && peer_state.disk >= D_NEGOTIATING &&
3435 get_ldev_if_state(mdev, D_NEGOTIATING)) {
3436 int cr; /* consider resync */
3437
3438 /* if we established a new connection */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003439 cr = (os.conn < C_CONNECTED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003440 /* if we had an established connection
3441 * and one of the nodes newly attaches a disk */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003442 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003443 (peer_state.disk == D_NEGOTIATING ||
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003444 os.disk == D_NEGOTIATING));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003445 /* if we have both been inconsistent, and the peer has been
3446 * forced to be UpToDate with --overwrite-data */
3447 cr |= test_bit(CONSIDER_RESYNC, &mdev->flags);
3448 /* if we had been plain connected, and the admin requested to
3449 * start a sync by "invalidate" or "invalidate-remote" */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003450 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003451 (peer_state.conn >= C_STARTING_SYNC_S &&
3452 peer_state.conn <= C_WF_BITMAP_T));
3453
3454 if (cr)
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003455 ns.conn = drbd_sync_handshake(mdev, peer_state.role, real_peer_disk);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003456
3457 put_ldev(mdev);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003458 if (ns.conn == C_MASK) {
3459 ns.conn = C_CONNECTED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003460 if (mdev->state.disk == D_NEGOTIATING) {
Lars Ellenberg82f59cc2010-10-16 12:13:47 +02003461 drbd_force_state(mdev, NS(disk, D_FAILED));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003462 } else if (peer_state.disk == D_NEGOTIATING) {
3463 dev_err(DEV, "Disk attach process on the peer node was aborted.\n");
3464 peer_state.disk = D_DISKLESS;
Lars Ellenberg580b9762010-02-26 23:15:23 +01003465 real_peer_disk = D_DISKLESS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003466 } else {
Philipp Reisner8169e412011-03-15 18:40:27 +01003467 if (test_and_clear_bit(CONN_DRY_RUN, &mdev->tconn->flags))
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003468 return false;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003469 D_ASSERT(os.conn == C_WF_REPORT_PARAMS);
Philipp Reisner38fa9982011-03-15 18:24:49 +01003470 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003471 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003472 }
3473 }
3474 }
3475
Philipp Reisner87eeee42011-01-19 14:16:30 +01003476 spin_lock_irq(&mdev->tconn->req_lock);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003477 if (mdev->state.i != os.i)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003478 goto retry;
3479 clear_bit(CONSIDER_RESYNC, &mdev->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003480 ns.peer = peer_state.role;
3481 ns.pdsk = real_peer_disk;
3482 ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003483 if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003484 ns.disk = mdev->new_state_tmp.disk;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003485 cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
3486 if (ns.pdsk == D_CONSISTENT && is_susp(ns) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
Philipp Reisner481c6f52010-06-22 14:03:27 +02003487 test_bit(NEW_CUR_UUID, &mdev->flags)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01003488 /* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this
Philipp Reisner481c6f52010-06-22 14:03:27 +02003489 for temporal network outages! */
Philipp Reisner87eeee42011-01-19 14:16:30 +01003490 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisner481c6f52010-06-22 14:03:27 +02003491 dev_err(DEV, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
Philipp Reisner2f5cdd02011-02-21 14:29:27 +01003492 tl_clear(mdev->tconn);
Philipp Reisner481c6f52010-06-22 14:03:27 +02003493 drbd_uuid_new_current(mdev);
3494 clear_bit(NEW_CUR_UUID, &mdev->flags);
Philipp Reisner38fa9982011-03-15 18:24:49 +01003495 conn_request_state(mdev->tconn, NS2(conn, C_PROTOCOL_ERROR, susp, 0), CS_HARD);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003496 return false;
Philipp Reisner481c6f52010-06-22 14:03:27 +02003497 }
Philipp Reisner65d922c2010-06-16 16:18:09 +02003498 rv = _drbd_set_state(mdev, ns, cs_flags, NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003499 ns = mdev->state;
Philipp Reisner87eeee42011-01-19 14:16:30 +01003500 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003501
3502 if (rv < SS_SUCCESS) {
Philipp Reisner38fa9982011-03-15 18:24:49 +01003503 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003504 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003505 }
3506
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003507 if (os.conn > C_WF_REPORT_PARAMS) {
3508 if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003509 peer_state.disk != D_NEGOTIATING ) {
3510 /* we want resync, peer has not yet decided to sync... */
3511 /* Nowadays only used when forcing a node into primary role and
3512 setting its disk to UpToDate with that */
3513 drbd_send_uuids(mdev);
3514 drbd_send_state(mdev);
3515 }
3516 }
3517
Philipp Reisner89e58e72011-01-19 13:12:45 +01003518 mdev->tconn->net_conf->want_lose = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003519
3520 drbd_md_sync(mdev); /* update connected indicator, la_size, ... */
3521
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003522 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003523}
3524
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003525static int receive_sync_uuid(struct drbd_conf *mdev, enum drbd_packet cmd,
3526 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003527{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003528 struct p_rs_uuid *p = &mdev->tconn->data.rbuf.rs_uuid;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003529
3530 wait_event(mdev->misc_wait,
3531 mdev->state.conn == C_WF_SYNC_UUID ||
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02003532 mdev->state.conn == C_BEHIND ||
Philipp Reisnerb411b362009-09-25 16:07:19 -07003533 mdev->state.conn < C_CONNECTED ||
3534 mdev->state.disk < D_NEGOTIATING);
3535
3536 /* D_ASSERT( mdev->state.conn == C_WF_SYNC_UUID ); */
3537
Philipp Reisnerb411b362009-09-25 16:07:19 -07003538 /* Here the _drbd_uuid_ functions are right, current should
3539 _not_ be rotated into the history */
3540 if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
3541 _drbd_uuid_set(mdev, UI_CURRENT, be64_to_cpu(p->uuid));
3542 _drbd_uuid_set(mdev, UI_BITMAP, 0UL);
3543
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003544 drbd_print_uuids(mdev, "updated sync uuid");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003545 drbd_start_resync(mdev, C_SYNC_TARGET);
3546
3547 put_ldev(mdev);
3548 } else
3549 dev_err(DEV, "Ignoring SyncUUID packet!\n");
3550
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003551 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003552}
3553
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003554/**
3555 * receive_bitmap_plain
3556 *
3557 * Return 0 when done, 1 when another iteration is needed, and a negative error
3558 * code upon failure.
3559 */
3560static int
Philipp Reisner02918be2010-08-20 14:35:10 +02003561receive_bitmap_plain(struct drbd_conf *mdev, unsigned int data_size,
3562 unsigned long *buffer, struct bm_xfer_ctx *c)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003563{
3564 unsigned num_words = min_t(size_t, BM_PACKET_WORDS, c->bm_words - c->word_offset);
3565 unsigned want = num_words * sizeof(long);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003566 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003567
Philipp Reisner02918be2010-08-20 14:35:10 +02003568 if (want != data_size) {
3569 dev_err(DEV, "%s:want (%u) != data_size (%u)\n", __func__, want, data_size);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003570 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003571 }
3572 if (want == 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003573 return 0;
Philipp Reisnerde0ff332011-02-07 16:56:20 +01003574 err = drbd_recv(mdev->tconn, buffer, want);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003575 if (err != want) {
3576 if (err >= 0)
3577 err = -EIO;
3578 return err;
3579 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003580
3581 drbd_bm_merge_lel(mdev, c->word_offset, num_words, buffer);
3582
3583 c->word_offset += num_words;
3584 c->bit_offset = c->word_offset * BITS_PER_LONG;
3585 if (c->bit_offset > c->bm_bits)
3586 c->bit_offset = c->bm_bits;
3587
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003588 return 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003589}
3590
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003591/**
3592 * recv_bm_rle_bits
3593 *
3594 * Return 0 when done, 1 when another iteration is needed, and a negative error
3595 * code upon failure.
3596 */
3597static int
Philipp Reisnerb411b362009-09-25 16:07:19 -07003598recv_bm_rle_bits(struct drbd_conf *mdev,
3599 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003600 struct bm_xfer_ctx *c,
3601 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003602{
3603 struct bitstream bs;
3604 u64 look_ahead;
3605 u64 rl;
3606 u64 tmp;
3607 unsigned long s = c->bit_offset;
3608 unsigned long e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003609 int toggle = DCBP_get_start(p);
3610 int have;
3611 int bits;
3612
3613 bitstream_init(&bs, p->code, len, DCBP_get_pad_bits(p));
3614
3615 bits = bitstream_get_bits(&bs, &look_ahead, 64);
3616 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003617 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003618
3619 for (have = bits; have > 0; s += rl, toggle = !toggle) {
3620 bits = vli_decode_bits(&rl, look_ahead);
3621 if (bits <= 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003622 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003623
3624 if (toggle) {
3625 e = s + rl -1;
3626 if (e >= c->bm_bits) {
3627 dev_err(DEV, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003628 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003629 }
3630 _drbd_bm_set_bits(mdev, s, e);
3631 }
3632
3633 if (have < bits) {
3634 dev_err(DEV, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n",
3635 have, bits, look_ahead,
3636 (unsigned int)(bs.cur.b - p->code),
3637 (unsigned int)bs.buf_len);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003638 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003639 }
3640 look_ahead >>= bits;
3641 have -= bits;
3642
3643 bits = bitstream_get_bits(&bs, &tmp, 64 - have);
3644 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003645 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003646 look_ahead |= tmp << have;
3647 have += bits;
3648 }
3649
3650 c->bit_offset = s;
3651 bm_xfer_ctx_bit_to_word_offset(c);
3652
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003653 return (s != c->bm_bits);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003654}
3655
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003656/**
3657 * decode_bitmap_c
3658 *
3659 * Return 0 when done, 1 when another iteration is needed, and a negative error
3660 * code upon failure.
3661 */
3662static int
Philipp Reisnerb411b362009-09-25 16:07:19 -07003663decode_bitmap_c(struct drbd_conf *mdev,
3664 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003665 struct bm_xfer_ctx *c,
3666 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003667{
3668 if (DCBP_get_code(p) == RLE_VLI_Bits)
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003669 return recv_bm_rle_bits(mdev, p, c, len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003670
3671 /* other variants had been implemented for evaluation,
3672 * but have been dropped as this one turned out to be "best"
3673 * during all our tests. */
3674
3675 dev_err(DEV, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
Philipp Reisner38fa9982011-03-15 18:24:49 +01003676 conn_request_state(mdev->tconn, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003677 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003678}
3679
3680void INFO_bm_xfer_stats(struct drbd_conf *mdev,
3681 const char *direction, struct bm_xfer_ctx *c)
3682{
3683 /* what would it take to transfer it "plaintext" */
Philipp Reisnerc0129492011-01-19 16:58:16 +01003684 unsigned plain = sizeof(struct p_header) *
Philipp Reisnerb411b362009-09-25 16:07:19 -07003685 ((c->bm_words+BM_PACKET_WORDS-1)/BM_PACKET_WORDS+1)
3686 + c->bm_words * sizeof(long);
3687 unsigned total = c->bytes[0] + c->bytes[1];
3688 unsigned r;
3689
3690 /* total can not be zero. but just in case: */
3691 if (total == 0)
3692 return;
3693
3694 /* don't report if not compressed */
3695 if (total >= plain)
3696 return;
3697
3698 /* total < plain. check for overflow, still */
3699 r = (total > UINT_MAX/1000) ? (total / (plain/1000))
3700 : (1000 * total / plain);
3701
3702 if (r > 1000)
3703 r = 1000;
3704
3705 r = 1000 - r;
3706 dev_info(DEV, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
3707 "total %u; compression: %u.%u%%\n",
3708 direction,
3709 c->bytes[1], c->packets[1],
3710 c->bytes[0], c->packets[0],
3711 total, r/10, r % 10);
3712}
3713
3714/* Since we are processing the bitfield from lower addresses to higher,
3715 it does not matter if the process it in 32 bit chunks or 64 bit
3716 chunks as long as it is little endian. (Understand it as byte stream,
3717 beginning with the lowest byte...) If we would use big endian
3718 we would need to process it from the highest address to the lowest,
3719 in order to be agnostic to the 32 vs 64 bits issue.
3720
3721 returns 0 on failure, 1 if we successfully received it. */
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003722static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packet cmd,
3723 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003724{
3725 struct bm_xfer_ctx c;
3726 void *buffer;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003727 int err;
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003728 int ok = false;
Philipp Reisner257d0af2011-01-26 12:15:29 +01003729 struct p_header *h = &mdev->tconn->data.rbuf.header;
Philipp Reisner77351055b2011-02-07 17:24:26 +01003730 struct packet_info pi;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003731
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003732 drbd_bm_lock(mdev, "receive bitmap", BM_LOCKED_SET_ALLOWED);
3733 /* you are supposed to send additional out-of-sync information
3734 * if you actually set bits during this phase */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003735
3736 /* maybe we should use some per thread scratch page,
3737 * and allocate that during initial device creation? */
3738 buffer = (unsigned long *) __get_free_page(GFP_NOIO);
3739 if (!buffer) {
3740 dev_err(DEV, "failed to allocate one page buffer in %s\n", __func__);
3741 goto out;
3742 }
3743
3744 c = (struct bm_xfer_ctx) {
3745 .bm_bits = drbd_bm_bits(mdev),
3746 .bm_words = drbd_bm_words(mdev),
3747 };
3748
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003749 for(;;) {
Philipp Reisner02918be2010-08-20 14:35:10 +02003750 if (cmd == P_BITMAP) {
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003751 err = receive_bitmap_plain(mdev, data_size, buffer, &c);
Philipp Reisner02918be2010-08-20 14:35:10 +02003752 } else if (cmd == P_COMPRESSED_BITMAP) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003753 /* MAYBE: sanity check that we speak proto >= 90,
3754 * and the feature is enabled! */
3755 struct p_compressed_bm *p;
3756
Philipp Reisner02918be2010-08-20 14:35:10 +02003757 if (data_size > BM_PACKET_PAYLOAD_BYTES) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003758 dev_err(DEV, "ReportCBitmap packet too large\n");
3759 goto out;
3760 }
3761 /* use the page buff */
3762 p = buffer;
3763 memcpy(p, h, sizeof(*h));
Philipp Reisnerde0ff332011-02-07 16:56:20 +01003764 if (drbd_recv(mdev->tconn, p->head.payload, data_size) != data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003765 goto out;
Lars Ellenberg004352f2010-10-05 20:13:58 +02003766 if (data_size <= (sizeof(*p) - sizeof(p->head))) {
3767 dev_err(DEV, "ReportCBitmap packet too small (l:%u)\n", data_size);
Andreas Gruenbacher78fcbda2010-12-10 22:18:27 +01003768 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003769 }
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003770 err = decode_bitmap_c(mdev, p, &c, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003771 } else {
Philipp Reisner02918be2010-08-20 14:35:10 +02003772 dev_warn(DEV, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003773 goto out;
3774 }
3775
Philipp Reisner02918be2010-08-20 14:35:10 +02003776 c.packets[cmd == P_BITMAP]++;
Philipp Reisner257d0af2011-01-26 12:15:29 +01003777 c.bytes[cmd == P_BITMAP] += sizeof(struct p_header) + data_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003778
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003779 if (err <= 0) {
3780 if (err < 0)
3781 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003782 break;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003783 }
Philipp Reisner9ba7aa02011-02-07 17:32:41 +01003784 if (!drbd_recv_header(mdev->tconn, &pi))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003785 goto out;
Philipp Reisner77351055b2011-02-07 17:24:26 +01003786 cmd = pi.cmd;
3787 data_size = pi.size;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003788 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003789
3790 INFO_bm_xfer_stats(mdev, "receive", &c);
3791
3792 if (mdev->state.conn == C_WF_BITMAP_T) {
Andreas Gruenbacherde1f8e42010-12-10 21:04:00 +01003793 enum drbd_state_rv rv;
3794
Philipp Reisnerb411b362009-09-25 16:07:19 -07003795 ok = !drbd_send_bitmap(mdev);
3796 if (!ok)
3797 goto out;
3798 /* Omit CS_ORDERED with this state transition to avoid deadlocks. */
Andreas Gruenbacherde1f8e42010-12-10 21:04:00 +01003799 rv = _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
3800 D_ASSERT(rv == SS_SUCCESS);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003801 } else if (mdev->state.conn != C_WF_BITMAP_S) {
3802 /* admin may have requested C_DISCONNECTING,
3803 * other threads may have noticed network errors */
3804 dev_info(DEV, "unexpected cstate (%s) in receive_bitmap\n",
3805 drbd_conn_str(mdev->state.conn));
3806 }
3807
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003808 ok = true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003809 out:
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003810 drbd_bm_unlock(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003811 if (ok && mdev->state.conn == C_WF_BITMAP_S)
3812 drbd_start_resync(mdev, C_SYNC_SOURCE);
3813 free_page((unsigned long) buffer);
3814 return ok;
3815}
3816
Philipp Reisner2de876e2011-03-15 14:38:01 +01003817static int _tconn_receive_skip(struct drbd_tconn *tconn, unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003818{
3819 /* TODO zero copy sink :) */
3820 static char sink[128];
3821 int size, want, r;
3822
Philipp Reisner02918be2010-08-20 14:35:10 +02003823 size = data_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003824 while (size > 0) {
3825 want = min_t(int, size, sizeof(sink));
Philipp Reisner2de876e2011-03-15 14:38:01 +01003826 r = drbd_recv(tconn, sink, want);
3827 if (r <= 0)
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01003828 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003829 size -= r;
3830 }
3831 return size == 0;
3832}
3833
Philipp Reisner2de876e2011-03-15 14:38:01 +01003834static int receive_skip(struct drbd_conf *mdev, enum drbd_packet cmd,
3835 unsigned int data_size)
3836{
3837 dev_warn(DEV, "skipping unknown optional packet type %d, l: %d!\n",
3838 cmd, data_size);
3839
3840 return _tconn_receive_skip(mdev->tconn, data_size);
3841}
3842
3843static int tconn_receive_skip(struct drbd_tconn *tconn, enum drbd_packet cmd, unsigned int data_size)
3844{
3845 conn_warn(tconn, "skipping packet for non existing volume type %d, l: %d!\n",
3846 cmd, data_size);
3847
3848 return _tconn_receive_skip(tconn, data_size);
3849}
3850
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003851static int receive_UnplugRemote(struct drbd_conf *mdev, enum drbd_packet cmd,
3852 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003853{
Philipp Reisnerb411b362009-09-25 16:07:19 -07003854 /* Make sure we've acked all the TCP data associated
3855 * with the data requests being unplugged */
Philipp Reisnere42325a2011-01-19 13:55:45 +01003856 drbd_tcp_quickack(mdev->tconn->data.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003857
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003858 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003859}
3860
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003861static int receive_out_of_sync(struct drbd_conf *mdev, enum drbd_packet cmd,
3862 unsigned int data_size)
Philipp Reisner73a01a12010-10-27 14:33:00 +02003863{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003864 struct p_block_desc *p = &mdev->tconn->data.rbuf.block_desc;
Philipp Reisner73a01a12010-10-27 14:33:00 +02003865
Lars Ellenbergf735e3632010-12-17 21:06:18 +01003866 switch (mdev->state.conn) {
3867 case C_WF_SYNC_UUID:
3868 case C_WF_BITMAP_T:
3869 case C_BEHIND:
3870 break;
3871 default:
3872 dev_err(DEV, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n",
3873 drbd_conn_str(mdev->state.conn));
3874 }
3875
Philipp Reisner73a01a12010-10-27 14:33:00 +02003876 drbd_set_out_of_sync(mdev, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
3877
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003878 return true;
Philipp Reisner73a01a12010-10-27 14:33:00 +02003879}
3880
Philipp Reisner02918be2010-08-20 14:35:10 +02003881struct data_cmd {
3882 int expect_payload;
3883 size_t pkt_size;
Philipp Reisnera4fbda82011-03-16 11:13:17 +01003884 enum mdev_or_conn fa_type; /* first argument's type */
Philipp Reisnerd9ae84e2011-03-15 18:50:22 +01003885 union {
3886 int (*mdev_fn)(struct drbd_conf *, enum drbd_packet cmd,
3887 unsigned int to_receive);
3888 int (*conn_fn)(struct drbd_tconn *, enum drbd_packet cmd,
3889 unsigned int to_receive);
3890 };
Philipp Reisnerb411b362009-09-25 16:07:19 -07003891};
3892
Philipp Reisner02918be2010-08-20 14:35:10 +02003893static struct data_cmd drbd_cmd_handler[] = {
Philipp Reisnerd9ae84e2011-03-15 18:50:22 +01003894 [P_DATA] = { 1, sizeof(struct p_data), MDEV, { receive_Data } },
3895 [P_DATA_REPLY] = { 1, sizeof(struct p_data), MDEV, { receive_DataReply } },
3896 [P_RS_DATA_REPLY] = { 1, sizeof(struct p_data), MDEV, { receive_RSDataReply } } ,
3897 [P_BARRIER] = { 0, sizeof(struct p_barrier), MDEV, { receive_Barrier } } ,
3898 [P_BITMAP] = { 1, sizeof(struct p_header), MDEV, { receive_bitmap } } ,
3899 [P_COMPRESSED_BITMAP] = { 1, sizeof(struct p_header), MDEV, { receive_bitmap } } ,
3900 [P_UNPLUG_REMOTE] = { 0, sizeof(struct p_header), MDEV, { receive_UnplugRemote } },
3901 [P_DATA_REQUEST] = { 0, sizeof(struct p_block_req), MDEV, { receive_DataRequest } },
3902 [P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), MDEV, { receive_DataRequest } },
3903 [P_SYNC_PARAM] = { 1, sizeof(struct p_header), MDEV, { receive_SyncParam } },
3904 [P_SYNC_PARAM89] = { 1, sizeof(struct p_header), MDEV, { receive_SyncParam } },
Philipp Reisner72046242011-03-15 18:51:47 +01003905 [P_PROTOCOL] = { 1, sizeof(struct p_protocol), CONN, { .conn_fn = receive_protocol } },
Philipp Reisnerd9ae84e2011-03-15 18:50:22 +01003906 [P_UUIDS] = { 0, sizeof(struct p_uuids), MDEV, { receive_uuids } },
3907 [P_SIZES] = { 0, sizeof(struct p_sizes), MDEV, { receive_sizes } },
3908 [P_STATE] = { 0, sizeof(struct p_state), MDEV, { receive_state } },
3909 [P_STATE_CHG_REQ] = { 0, sizeof(struct p_req_state), MDEV, { receive_req_state } },
3910 [P_SYNC_UUID] = { 0, sizeof(struct p_rs_uuid), MDEV, { receive_sync_uuid } },
3911 [P_OV_REQUEST] = { 0, sizeof(struct p_block_req), MDEV, { receive_DataRequest } },
3912 [P_OV_REPLY] = { 1, sizeof(struct p_block_req), MDEV, { receive_DataRequest } },
3913 [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), MDEV, { receive_DataRequest } },
3914 [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), MDEV, { receive_skip } },
3915 [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), MDEV, { receive_out_of_sync } },
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003916 [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), CONN, { .conn_fn = receive_req_conn_state } },
Philipp Reisner02918be2010-08-20 14:35:10 +02003917};
3918
3919/* All handler functions that expect a sub-header get that sub-heder in
Philipp Reisnere42325a2011-01-19 13:55:45 +01003920 mdev->tconn->data.rbuf.header.head.payload.
Philipp Reisner02918be2010-08-20 14:35:10 +02003921
Philipp Reisnere42325a2011-01-19 13:55:45 +01003922 Usually in mdev->tconn->data.rbuf.header.head the callback can find the usual
Philipp Reisner02918be2010-08-20 14:35:10 +02003923 p_header, but they may not rely on that. Since there is also p_header95 !
3924 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003925
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003926static void drbdd(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003927{
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003928 struct p_header *header = &tconn->data.rbuf.header;
Philipp Reisner77351055b2011-02-07 17:24:26 +01003929 struct packet_info pi;
Philipp Reisner02918be2010-08-20 14:35:10 +02003930 size_t shs; /* sub header size */
3931 int rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003932
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003933 while (get_t_state(&tconn->receiver) == RUNNING) {
3934 drbd_thread_current_set_cpu(&tconn->receiver);
3935 if (!drbd_recv_header(tconn, &pi))
Philipp Reisner02918be2010-08-20 14:35:10 +02003936 goto err_out;
3937
Andreas Gruenbacher6e849ce2011-03-14 17:27:45 +01003938 if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) ||
Philipp Reisnerd9ae84e2011-03-15 18:50:22 +01003939 !drbd_cmd_handler[pi.cmd].mdev_fn)) {
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003940 conn_err(tconn, "unknown packet type %d, l: %d!\n", pi.cmd, pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02003941 goto err_out;
Lars Ellenberg0b33a912009-11-16 15:58:04 +01003942 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003943
Philipp Reisner77351055b2011-02-07 17:24:26 +01003944 shs = drbd_cmd_handler[pi.cmd].pkt_size - sizeof(struct p_header);
3945 if (pi.size - shs > 0 && !drbd_cmd_handler[pi.cmd].expect_payload) {
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003946 conn_err(tconn, "No payload expected %s l:%d\n", cmdname(pi.cmd), pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02003947 goto err_out;
3948 }
3949
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02003950 if (shs) {
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003951 rv = drbd_recv(tconn, &header->payload, shs);
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02003952 if (unlikely(rv != shs)) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01003953 if (!signal_pending(current))
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003954 conn_warn(tconn, "short read while reading sub header: rv=%d\n", rv);
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02003955 goto err_out;
3956 }
3957 }
3958
Philipp Reisnera4fbda82011-03-16 11:13:17 +01003959 if (drbd_cmd_handler[pi.cmd].fa_type == CONN) {
Philipp Reisnerd9ae84e2011-03-15 18:50:22 +01003960 rv = drbd_cmd_handler[pi.cmd].conn_fn(tconn, pi.cmd, pi.size - shs);
3961 } else {
3962 struct drbd_conf *mdev = vnr_to_mdev(tconn, pi.vnr);
3963 rv = mdev ?
3964 drbd_cmd_handler[pi.cmd].mdev_fn(mdev, pi.cmd, pi.size - shs) :
3965 tconn_receive_skip(tconn, pi.cmd, pi.size - shs);
3966 }
Philipp Reisner02918be2010-08-20 14:35:10 +02003967
3968 if (unlikely(!rv)) {
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003969 conn_err(tconn, "error receiving %s, l: %d!\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01003970 cmdname(pi.cmd), pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02003971 goto err_out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003972 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003973 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003974
Philipp Reisner02918be2010-08-20 14:35:10 +02003975 if (0) {
3976 err_out:
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01003977 conn_request_state(tconn, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003978 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003979}
3980
Philipp Reisner0e29d162011-02-18 14:23:11 +01003981void conn_flush_workqueue(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003982{
3983 struct drbd_wq_barrier barr;
3984
3985 barr.w.cb = w_prev_work_done;
Philipp Reisner0e29d162011-02-18 14:23:11 +01003986 barr.w.tconn = tconn;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003987 init_completion(&barr.done);
Philipp Reisner0e29d162011-02-18 14:23:11 +01003988 drbd_queue_work(&tconn->data.work, &barr.w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003989 wait_for_completion(&barr.done);
3990}
3991
Philipp Reisner360cc742011-02-08 14:29:53 +01003992static void drbd_disconnect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003993{
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01003994 enum drbd_conns oc;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003995 int rv = SS_UNKNOWN_ERROR;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003996
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01003997 if (tconn->cstate == C_STANDALONE)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003998 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003999
4000 /* asender does not clean up anything. it must not interfere, either */
Philipp Reisner360cc742011-02-08 14:29:53 +01004001 drbd_thread_stop(&tconn->asender);
4002 drbd_free_sock(tconn);
4003
4004 idr_for_each(&tconn->volumes, drbd_disconnected, tconn);
4005
4006 conn_info(tconn, "Connection closed\n");
4007
4008 spin_lock_irq(&tconn->req_lock);
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004009 oc = tconn->cstate;
4010 if (oc >= C_UNCONNECTED)
4011 rv = _conn_request_state(tconn, NS(conn, C_UNCONNECTED), CS_VERBOSE);
4012
Philipp Reisner360cc742011-02-08 14:29:53 +01004013 spin_unlock_irq(&tconn->req_lock);
4014
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004015 if (oc == C_DISCONNECTING) {
Philipp Reisner360cc742011-02-08 14:29:53 +01004016 wait_event(tconn->net_cnt_wait, atomic_read(&tconn->net_cnt) == 0);
4017
4018 crypto_free_hash(tconn->cram_hmac_tfm);
4019 tconn->cram_hmac_tfm = NULL;
4020
4021 kfree(tconn->net_conf);
4022 tconn->net_conf = NULL;
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004023 conn_request_state(tconn, NS(conn, C_STANDALONE), CS_VERBOSE);
Philipp Reisner360cc742011-02-08 14:29:53 +01004024 }
4025}
4026
4027static int drbd_disconnected(int vnr, void *p, void *data)
4028{
4029 struct drbd_conf *mdev = (struct drbd_conf *)p;
4030 enum drbd_fencing_p fp;
4031 unsigned int i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004032
Philipp Reisner85719572010-07-21 10:20:17 +02004033 /* wait for current activity to cease. */
Philipp Reisner87eeee42011-01-19 14:16:30 +01004034 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004035 _drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
4036 _drbd_wait_ee_list_empty(mdev, &mdev->sync_ee);
4037 _drbd_wait_ee_list_empty(mdev, &mdev->read_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01004038 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004039
4040 /* We do not have data structures that would allow us to
4041 * get the rs_pending_cnt down to 0 again.
4042 * * On C_SYNC_TARGET we do not have any data structures describing
4043 * the pending RSDataRequest's we have sent.
4044 * * On C_SYNC_SOURCE there is no data structure that tracks
4045 * the P_RS_DATA_REPLY blocks that we sent to the SyncTarget.
4046 * And no, it is not the sum of the reference counts in the
4047 * resync_LRU. The resync_LRU tracks the whole operation including
4048 * the disk-IO, while the rs_pending_cnt only tracks the blocks
4049 * on the fly. */
4050 drbd_rs_cancel_all(mdev);
4051 mdev->rs_total = 0;
4052 mdev->rs_failed = 0;
4053 atomic_set(&mdev->rs_pending_cnt, 0);
4054 wake_up(&mdev->misc_wait);
4055
Philipp Reisner7fde2be2011-03-01 11:08:28 +01004056 del_timer(&mdev->request_timer);
4057
Philipp Reisnerb411b362009-09-25 16:07:19 -07004058 del_timer_sync(&mdev->resync_timer);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004059 resync_timer_fn((unsigned long)mdev);
4060
Philipp Reisnerb411b362009-09-25 16:07:19 -07004061 /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
4062 * w_make_resync_request etc. which may still be on the worker queue
4063 * to be "canceled" */
Philipp Reisnera21e9292011-02-08 15:08:49 +01004064 drbd_flush_workqueue(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004065
4066 /* This also does reclaim_net_ee(). If we do this too early, we might
4067 * miss some resync ee and pages.*/
4068 drbd_process_done_ee(mdev);
4069
4070 kfree(mdev->p_uuid);
4071 mdev->p_uuid = NULL;
4072
Philipp Reisnerfb22c402010-09-08 23:20:21 +02004073 if (!is_susp(mdev->state))
Philipp Reisner2f5cdd02011-02-21 14:29:27 +01004074 tl_clear(mdev->tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004075
Philipp Reisnerb411b362009-09-25 16:07:19 -07004076 drbd_md_sync(mdev);
4077
4078 fp = FP_DONT_CARE;
4079 if (get_ldev(mdev)) {
4080 fp = mdev->ldev->dc.fencing;
4081 put_ldev(mdev);
4082 }
4083
Philipp Reisner87f7be42010-06-11 13:56:33 +02004084 if (mdev->state.role == R_PRIMARY && fp >= FP_RESOURCE && mdev->state.pdsk >= D_UNKNOWN)
4085 drbd_try_outdate_peer_async(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004086
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004087 /* serialize with bitmap writeout triggered by the state change,
4088 * if any. */
4089 wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
4090
Philipp Reisnerb411b362009-09-25 16:07:19 -07004091 /* tcp_close and release of sendpage pages can be deferred. I don't
4092 * want to use SO_LINGER, because apparently it can be deferred for
4093 * more than 20 seconds (longest time I checked).
4094 *
4095 * Actually we don't care for exactly when the network stack does its
4096 * put_page(), but release our reference on these pages right here.
4097 */
4098 i = drbd_release_ee(mdev, &mdev->net_ee);
4099 if (i)
4100 dev_info(DEV, "net_ee not empty, killed %u entries\n", i);
Lars Ellenberg435f0742010-09-06 12:30:25 +02004101 i = atomic_read(&mdev->pp_in_use_by_net);
4102 if (i)
4103 dev_info(DEV, "pp_in_use_by_net = %d, expected 0\n", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004104 i = atomic_read(&mdev->pp_in_use);
4105 if (i)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02004106 dev_info(DEV, "pp_in_use = %d, expected 0\n", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004107
4108 D_ASSERT(list_empty(&mdev->read_ee));
4109 D_ASSERT(list_empty(&mdev->active_ee));
4110 D_ASSERT(list_empty(&mdev->sync_ee));
4111 D_ASSERT(list_empty(&mdev->done_ee));
4112
4113 /* ok, no more ee's on the fly, it is safe to reset the epoch_size */
4114 atomic_set(&mdev->current_epoch->epoch_size, 0);
4115 D_ASSERT(list_empty(&mdev->current_epoch->list));
Philipp Reisner360cc742011-02-08 14:29:53 +01004116
4117 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004118}
4119
4120/*
4121 * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version
4122 * we can agree on is stored in agreed_pro_version.
4123 *
4124 * feature flags and the reserved array should be enough room for future
4125 * enhancements of the handshake protocol, and possible plugins...
4126 *
4127 * for now, they are expected to be zero, but ignored.
4128 */
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01004129static int drbd_send_handshake(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004130{
Philipp Reisnere6b3ea82011-01-19 14:02:01 +01004131 /* ASSERT current == mdev->tconn->receiver ... */
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01004132 struct p_handshake *p = &tconn->data.sbuf.handshake;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004133 int ok;
4134
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01004135 if (mutex_lock_interruptible(&tconn->data.mutex)) {
4136 conn_err(tconn, "interrupted during initial handshake\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004137 return 0; /* interrupted. not ok. */
4138 }
4139
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01004140 if (tconn->data.socket == NULL) {
4141 mutex_unlock(&tconn->data.mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004142 return 0;
4143 }
4144
4145 memset(p, 0, sizeof(*p));
4146 p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
4147 p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01004148 ok = _conn_send_cmd(tconn, 0, tconn->data.socket, P_HAND_SHAKE,
4149 &p->head, sizeof(*p), 0);
4150 mutex_unlock(&tconn->data.mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004151 return ok;
4152}
4153
4154/*
4155 * return values:
4156 * 1 yes, we have a valid connection
4157 * 0 oops, did not work out, please try again
4158 * -1 peer talks different language,
4159 * no point in trying again, please go standalone.
4160 */
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004161static int drbd_do_handshake(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004162{
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004163 /* ASSERT current == tconn->receiver ... */
4164 struct p_handshake *p = &tconn->data.rbuf.handshake;
Philipp Reisner02918be2010-08-20 14:35:10 +02004165 const int expect = sizeof(struct p_handshake) - sizeof(struct p_header80);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004166 struct packet_info pi;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004167 int rv;
4168
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004169 rv = drbd_send_handshake(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004170 if (!rv)
4171 return 0;
4172
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004173 rv = drbd_recv_header(tconn, &pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004174 if (!rv)
4175 return 0;
4176
Philipp Reisner77351055b2011-02-07 17:24:26 +01004177 if (pi.cmd != P_HAND_SHAKE) {
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004178 conn_err(tconn, "expected HandShake packet, received: %s (0x%04x)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004179 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004180 return -1;
4181 }
4182
Philipp Reisner77351055b2011-02-07 17:24:26 +01004183 if (pi.size != expect) {
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004184 conn_err(tconn, "expected HandShake length: %u, received: %u\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004185 expect, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004186 return -1;
4187 }
4188
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004189 rv = drbd_recv(tconn, &p->head.payload, expect);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004190
4191 if (rv != expect) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01004192 if (!signal_pending(current))
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004193 conn_warn(tconn, "short read receiving handshake packet: l=%u\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004194 return 0;
4195 }
4196
Philipp Reisnerb411b362009-09-25 16:07:19 -07004197 p->protocol_min = be32_to_cpu(p->protocol_min);
4198 p->protocol_max = be32_to_cpu(p->protocol_max);
4199 if (p->protocol_max == 0)
4200 p->protocol_max = p->protocol_min;
4201
4202 if (PRO_VERSION_MAX < p->protocol_min ||
4203 PRO_VERSION_MIN > p->protocol_max)
4204 goto incompat;
4205
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004206 tconn->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004207
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004208 conn_info(tconn, "Handshake successful: "
4209 "Agreed network protocol version %d\n", tconn->agreed_pro_version);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004210
4211 return 1;
4212
4213 incompat:
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004214 conn_err(tconn, "incompatible DRBD dialects: "
Philipp Reisnerb411b362009-09-25 16:07:19 -07004215 "I support %d-%d, peer supports %d-%d\n",
4216 PRO_VERSION_MIN, PRO_VERSION_MAX,
4217 p->protocol_min, p->protocol_max);
4218 return -1;
4219}
4220
4221#if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
Philipp Reisner13e60372011-02-08 09:54:40 +01004222static int drbd_do_auth(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004223{
4224 dev_err(DEV, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
4225 dev_err(DEV, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004226 return -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004227}
4228#else
4229#define CHALLENGE_LEN 64
Johannes Thomab10d96c2010-01-07 16:02:50 +01004230
4231/* Return value:
4232 1 - auth succeeded,
4233 0 - failed, try again (network error),
4234 -1 - auth failed, don't try again.
4235*/
4236
Philipp Reisner13e60372011-02-08 09:54:40 +01004237static int drbd_do_auth(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004238{
4239 char my_challenge[CHALLENGE_LEN]; /* 64 Bytes... */
4240 struct scatterlist sg;
4241 char *response = NULL;
4242 char *right_response = NULL;
4243 char *peers_ch = NULL;
Philipp Reisner13e60372011-02-08 09:54:40 +01004244 unsigned int key_len = strlen(tconn->net_conf->shared_secret);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004245 unsigned int resp_size;
4246 struct hash_desc desc;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004247 struct packet_info pi;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004248 int rv;
4249
Philipp Reisner13e60372011-02-08 09:54:40 +01004250 desc.tfm = tconn->cram_hmac_tfm;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004251 desc.flags = 0;
4252
Philipp Reisner13e60372011-02-08 09:54:40 +01004253 rv = crypto_hash_setkey(tconn->cram_hmac_tfm,
4254 (u8 *)tconn->net_conf->shared_secret, key_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004255 if (rv) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004256 conn_err(tconn, "crypto_hash_setkey() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004257 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004258 goto fail;
4259 }
4260
4261 get_random_bytes(my_challenge, CHALLENGE_LEN);
4262
Philipp Reisner13e60372011-02-08 09:54:40 +01004263 rv = conn_send_cmd2(tconn, P_AUTH_CHALLENGE, my_challenge, CHALLENGE_LEN);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004264 if (!rv)
4265 goto fail;
4266
Philipp Reisner13e60372011-02-08 09:54:40 +01004267 rv = drbd_recv_header(tconn, &pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004268 if (!rv)
4269 goto fail;
4270
Philipp Reisner77351055b2011-02-07 17:24:26 +01004271 if (pi.cmd != P_AUTH_CHALLENGE) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004272 conn_err(tconn, "expected AuthChallenge packet, received: %s (0x%04x)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004273 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004274 rv = 0;
4275 goto fail;
4276 }
4277
Philipp Reisner77351055b2011-02-07 17:24:26 +01004278 if (pi.size > CHALLENGE_LEN * 2) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004279 conn_err(tconn, "expected AuthChallenge payload too big.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004280 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004281 goto fail;
4282 }
4283
Philipp Reisner77351055b2011-02-07 17:24:26 +01004284 peers_ch = kmalloc(pi.size, GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004285 if (peers_ch == NULL) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004286 conn_err(tconn, "kmalloc of peers_ch failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004287 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004288 goto fail;
4289 }
4290
Philipp Reisner13e60372011-02-08 09:54:40 +01004291 rv = drbd_recv(tconn, peers_ch, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004292
Philipp Reisner77351055b2011-02-07 17:24:26 +01004293 if (rv != pi.size) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01004294 if (!signal_pending(current))
Philipp Reisner13e60372011-02-08 09:54:40 +01004295 conn_warn(tconn, "short read AuthChallenge: l=%u\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004296 rv = 0;
4297 goto fail;
4298 }
4299
Philipp Reisner13e60372011-02-08 09:54:40 +01004300 resp_size = crypto_hash_digestsize(tconn->cram_hmac_tfm);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004301 response = kmalloc(resp_size, GFP_NOIO);
4302 if (response == NULL) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004303 conn_err(tconn, "kmalloc of response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004304 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004305 goto fail;
4306 }
4307
4308 sg_init_table(&sg, 1);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004309 sg_set_buf(&sg, peers_ch, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004310
4311 rv = crypto_hash_digest(&desc, &sg, sg.length, response);
4312 if (rv) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004313 conn_err(tconn, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004314 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004315 goto fail;
4316 }
4317
Philipp Reisner13e60372011-02-08 09:54:40 +01004318 rv = conn_send_cmd2(tconn, P_AUTH_RESPONSE, response, resp_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004319 if (!rv)
4320 goto fail;
4321
Philipp Reisner13e60372011-02-08 09:54:40 +01004322 rv = drbd_recv_header(tconn, &pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004323 if (!rv)
4324 goto fail;
4325
Philipp Reisner77351055b2011-02-07 17:24:26 +01004326 if (pi.cmd != P_AUTH_RESPONSE) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004327 conn_err(tconn, "expected AuthResponse packet, received: %s (0x%04x)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004328 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004329 rv = 0;
4330 goto fail;
4331 }
4332
Philipp Reisner77351055b2011-02-07 17:24:26 +01004333 if (pi.size != resp_size) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004334 conn_err(tconn, "expected AuthResponse payload of wrong size\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004335 rv = 0;
4336 goto fail;
4337 }
4338
Philipp Reisner13e60372011-02-08 09:54:40 +01004339 rv = drbd_recv(tconn, response , resp_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004340
4341 if (rv != resp_size) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01004342 if (!signal_pending(current))
Philipp Reisner13e60372011-02-08 09:54:40 +01004343 conn_warn(tconn, "short read receiving AuthResponse: l=%u\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004344 rv = 0;
4345 goto fail;
4346 }
4347
4348 right_response = kmalloc(resp_size, GFP_NOIO);
Julia Lawall2d1ee872009-12-27 22:27:11 +01004349 if (right_response == NULL) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004350 conn_err(tconn, "kmalloc of right_response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004351 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004352 goto fail;
4353 }
4354
4355 sg_set_buf(&sg, my_challenge, CHALLENGE_LEN);
4356
4357 rv = crypto_hash_digest(&desc, &sg, sg.length, right_response);
4358 if (rv) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004359 conn_err(tconn, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004360 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004361 goto fail;
4362 }
4363
4364 rv = !memcmp(response, right_response, resp_size);
4365
4366 if (rv)
Philipp Reisner13e60372011-02-08 09:54:40 +01004367 conn_info(tconn, "Peer authenticated using %d bytes of '%s' HMAC\n",
4368 resp_size, tconn->net_conf->cram_hmac_alg);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004369 else
4370 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004371
4372 fail:
4373 kfree(peers_ch);
4374 kfree(response);
4375 kfree(right_response);
4376
4377 return rv;
4378}
4379#endif
4380
4381int drbdd_init(struct drbd_thread *thi)
4382{
Philipp Reisner392c8802011-02-09 10:33:31 +01004383 struct drbd_tconn *tconn = thi->tconn;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004384 int h;
4385
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004386 conn_info(tconn, "receiver (re)started\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004387
4388 do {
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004389 h = drbd_connect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004390 if (h == 0) {
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004391 drbd_disconnect(tconn);
Philipp Reisner20ee6392011-01-18 15:28:59 +01004392 schedule_timeout_interruptible(HZ);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004393 }
4394 if (h == -1) {
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004395 conn_warn(tconn, "Discarding network configuration.\n");
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004396 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004397 }
4398 } while (h == 0);
4399
4400 if (h > 0) {
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004401 if (get_net_conf(tconn)) {
4402 drbdd(tconn);
4403 put_net_conf(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004404 }
4405 }
4406
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004407 drbd_disconnect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004408
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004409 conn_info(tconn, "receiver terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004410 return 0;
4411}
4412
4413/* ********* acknowledge sender ******** */
4414
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004415static int got_RqSReply(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004416{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004417 struct p_req_state_reply *p = &mdev->tconn->meta.rbuf.req_state_reply;
Philipp Reisnerfc3b10a2011-02-15 11:07:59 +01004418 struct drbd_tconn *tconn = mdev->tconn;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004419
4420 int retcode = be32_to_cpu(p->retcode);
4421
Philipp Reisnerfc3b10a2011-02-15 11:07:59 +01004422 if (cmd == P_STATE_CHG_REPLY) {
4423 if (retcode >= SS_SUCCESS) {
4424 set_bit(CL_ST_CHG_SUCCESS, &mdev->flags);
4425 } else {
4426 set_bit(CL_ST_CHG_FAIL, &mdev->flags);
4427 dev_err(DEV, "Requested state change failed by peer: %s (%d)\n",
4428 drbd_set_st_err_str(retcode), retcode);
4429 }
4430 wake_up(&mdev->state_wait);
4431 } else /* conn == P_CONN_ST_CHG_REPLY */ {
4432 if (retcode >= SS_SUCCESS) {
4433 set_bit(CONN_WD_ST_CHG_OKAY, &tconn->flags);
4434 } else {
4435 set_bit(CONN_WD_ST_CHG_FAIL, &tconn->flags);
4436 conn_err(tconn, "Requested state change failed by peer: %s (%d)\n",
4437 drbd_set_st_err_str(retcode), retcode);
4438 }
4439 wake_up(&tconn->ping_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004440 }
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004441 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004442}
4443
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004444static int got_Ping(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004445{
Philipp Reisner2a67d8b2011-02-09 14:10:32 +01004446 return drbd_send_ping_ack(mdev->tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004447
4448}
4449
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004450static int got_PingAck(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004451{
Philipp Reisner2a67d8b2011-02-09 14:10:32 +01004452 struct drbd_tconn *tconn = mdev->tconn;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004453 /* restore idle timeout */
Philipp Reisner2a67d8b2011-02-09 14:10:32 +01004454 tconn->meta.socket->sk->sk_rcvtimeo = tconn->net_conf->ping_int*HZ;
4455 if (!test_and_set_bit(GOT_PING_ACK, &tconn->flags))
4456 wake_up(&tconn->ping_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004457
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004458 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004459}
4460
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004461static int got_IsInSync(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004462{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004463 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004464 sector_t sector = be64_to_cpu(p->sector);
4465 int blksize = be32_to_cpu(p->blksize);
4466
Philipp Reisner31890f42011-01-19 14:12:51 +01004467 D_ASSERT(mdev->tconn->agreed_pro_version >= 89);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004468
4469 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4470
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004471 if (get_ldev(mdev)) {
4472 drbd_rs_complete_io(mdev, sector);
4473 drbd_set_in_sync(mdev, sector, blksize);
4474 /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
4475 mdev->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
4476 put_ldev(mdev);
4477 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004478 dec_rs_pending(mdev);
Philipp Reisner778f2712010-07-06 11:14:00 +02004479 atomic_add(blksize >> 9, &mdev->rs_sect_in);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004480
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004481 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004482}
4483
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004484static int
4485validate_req_change_req_state(struct drbd_conf *mdev, u64 id, sector_t sector,
4486 struct rb_root *root, const char *func,
4487 enum drbd_req_event what, bool missing_ok)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004488{
4489 struct drbd_request *req;
4490 struct bio_and_error m;
4491
Philipp Reisner87eeee42011-01-19 14:16:30 +01004492 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004493 req = find_request(mdev, root, id, sector, missing_ok, func);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004494 if (unlikely(!req)) {
Philipp Reisner87eeee42011-01-19 14:16:30 +01004495 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004496 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004497 }
4498 __req_mod(req, what, &m);
Philipp Reisner87eeee42011-01-19 14:16:30 +01004499 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004500
4501 if (m.bio)
4502 complete_master_bio(mdev, &m);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004503 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004504}
4505
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004506static int got_BlockAck(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004507{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004508 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004509 sector_t sector = be64_to_cpu(p->sector);
4510 int blksize = be32_to_cpu(p->blksize);
4511 enum drbd_req_event what;
4512
4513 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4514
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01004515 if (p->block_id == ID_SYNCER) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004516 drbd_set_in_sync(mdev, sector, blksize);
4517 dec_rs_pending(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004518 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004519 }
Philipp Reisner257d0af2011-01-26 12:15:29 +01004520 switch (cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004521 case P_RS_WRITE_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004522 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004523 what = WRITE_ACKED_BY_PEER_AND_SIS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004524 break;
4525 case P_WRITE_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004526 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004527 what = WRITE_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004528 break;
4529 case P_RECV_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004530 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_B);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004531 what = RECV_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004532 break;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01004533 case P_DISCARD_WRITE:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004534 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01004535 what = DISCARD_WRITE;
4536 break;
4537 case P_RETRY_WRITE:
4538 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
4539 what = POSTPONE_WRITE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004540 break;
4541 default:
4542 D_ASSERT(0);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004543 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004544 }
4545
4546 return validate_req_change_req_state(mdev, p->block_id, sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004547 &mdev->write_requests, __func__,
4548 what, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004549}
4550
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004551static int got_NegAck(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004552{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004553 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004554 sector_t sector = be64_to_cpu(p->sector);
Philipp Reisner2deb8332011-01-17 18:39:18 +01004555 int size = be32_to_cpu(p->blksize);
Philipp Reisner89e58e72011-01-19 13:12:45 +01004556 bool missing_ok = mdev->tconn->net_conf->wire_protocol == DRBD_PROT_A ||
4557 mdev->tconn->net_conf->wire_protocol == DRBD_PROT_B;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004558 bool found;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004559
4560 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4561
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01004562 if (p->block_id == ID_SYNCER) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004563 dec_rs_pending(mdev);
4564 drbd_rs_failed_io(mdev, sector, size);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004565 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004566 }
Philipp Reisner2deb8332011-01-17 18:39:18 +01004567
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004568 found = validate_req_change_req_state(mdev, p->block_id, sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004569 &mdev->write_requests, __func__,
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004570 NEG_ACKED, missing_ok);
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004571 if (!found) {
4572 /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs.
4573 The master bio might already be completed, therefore the
4574 request is no longer in the collision hash. */
4575 /* In Protocol B we might already have got a P_RECV_ACK
4576 but then get a P_NEG_ACK afterwards. */
4577 if (!missing_ok)
Philipp Reisner2deb8332011-01-17 18:39:18 +01004578 return false;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004579 drbd_set_out_of_sync(mdev, sector, size);
Philipp Reisner2deb8332011-01-17 18:39:18 +01004580 }
Philipp Reisner2deb8332011-01-17 18:39:18 +01004581 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004582}
4583
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004584static int got_NegDReply(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004585{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004586 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004587 sector_t sector = be64_to_cpu(p->sector);
4588
4589 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01004590
Philipp Reisnerb411b362009-09-25 16:07:19 -07004591 dev_err(DEV, "Got NegDReply; Sector %llus, len %u; Fail original request.\n",
4592 (unsigned long long)sector, be32_to_cpu(p->blksize));
4593
4594 return validate_req_change_req_state(mdev, p->block_id, sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004595 &mdev->read_requests, __func__,
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004596 NEG_ACKED, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004597}
4598
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004599static int got_NegRSDReply(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004600{
4601 sector_t sector;
4602 int size;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004603 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004604
4605 sector = be64_to_cpu(p->sector);
4606 size = be32_to_cpu(p->blksize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004607
4608 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4609
4610 dec_rs_pending(mdev);
4611
4612 if (get_ldev_if_state(mdev, D_FAILED)) {
4613 drbd_rs_complete_io(mdev, sector);
Philipp Reisner257d0af2011-01-26 12:15:29 +01004614 switch (cmd) {
Philipp Reisnerd612d302010-12-27 10:53:28 +01004615 case P_NEG_RS_DREPLY:
4616 drbd_rs_failed_io(mdev, sector, size);
4617 case P_RS_CANCEL:
4618 break;
4619 default:
4620 D_ASSERT(0);
4621 put_ldev(mdev);
4622 return false;
4623 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004624 put_ldev(mdev);
4625 }
4626
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004627 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004628}
4629
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004630static int got_BarrierAck(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004631{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004632 struct p_barrier_ack *p = &mdev->tconn->meta.rbuf.barrier_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004633
Philipp Reisner2f5cdd02011-02-21 14:29:27 +01004634 tl_release(mdev->tconn, p->barrier, be32_to_cpu(p->set_size));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004635
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02004636 if (mdev->state.conn == C_AHEAD &&
4637 atomic_read(&mdev->ap_in_flight) == 0 &&
Philipp Reisner370a43e2011-01-14 16:03:11 +01004638 !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &mdev->current_epoch->flags)) {
4639 mdev->start_resync_timer.expires = jiffies + HZ;
4640 add_timer(&mdev->start_resync_timer);
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02004641 }
4642
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004643 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004644}
4645
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004646static int got_OVResult(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004647{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004648 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004649 struct drbd_work *w;
4650 sector_t sector;
4651 int size;
4652
4653 sector = be64_to_cpu(p->sector);
4654 size = be32_to_cpu(p->blksize);
4655
4656 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4657
4658 if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
4659 drbd_ov_oos_found(mdev, sector, size);
4660 else
4661 ov_oos_print(mdev);
4662
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004663 if (!get_ldev(mdev))
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004664 return true;
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004665
Philipp Reisnerb411b362009-09-25 16:07:19 -07004666 drbd_rs_complete_io(mdev, sector);
4667 dec_rs_pending(mdev);
4668
Lars Ellenbergea5442a2010-11-05 09:48:01 +01004669 --mdev->ov_left;
4670
4671 /* let's advance progress step marks only for every other megabyte */
4672 if ((mdev->ov_left & 0x200) == 0x200)
4673 drbd_advance_rs_marks(mdev, mdev->ov_left);
4674
4675 if (mdev->ov_left == 0) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004676 w = kmalloc(sizeof(*w), GFP_NOIO);
4677 if (w) {
4678 w->cb = w_ov_finished;
Philipp Reisnera21e9292011-02-08 15:08:49 +01004679 w->mdev = mdev;
Philipp Reisnere42325a2011-01-19 13:55:45 +01004680 drbd_queue_work_front(&mdev->tconn->data.work, w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004681 } else {
4682 dev_err(DEV, "kmalloc(w) failed.");
4683 ov_oos_print(mdev);
4684 drbd_resync_finished(mdev);
4685 }
4686 }
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004687 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004688 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004689}
4690
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004691static int got_skip(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisner0ced55a2010-04-30 15:26:20 +02004692{
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004693 return true;
Philipp Reisner0ced55a2010-04-30 15:26:20 +02004694}
4695
Philipp Reisner32862ec2011-02-08 16:41:01 +01004696static int tconn_process_done_ee(struct drbd_tconn *tconn)
4697{
Philipp Reisner082a3432011-03-15 16:05:42 +01004698 struct drbd_conf *mdev;
4699 int i, not_empty = 0;
Philipp Reisner32862ec2011-02-08 16:41:01 +01004700
4701 do {
4702 clear_bit(SIGNAL_ASENDER, &tconn->flags);
4703 flush_signals(current);
Philipp Reisner082a3432011-03-15 16:05:42 +01004704 idr_for_each_entry(&tconn->volumes, mdev, i) {
4705 if (!drbd_process_done_ee(mdev))
4706 return 1; /* error */
4707 }
Philipp Reisner32862ec2011-02-08 16:41:01 +01004708 set_bit(SIGNAL_ASENDER, &tconn->flags);
Philipp Reisner082a3432011-03-15 16:05:42 +01004709
4710 spin_lock_irq(&tconn->req_lock);
4711 idr_for_each_entry(&tconn->volumes, mdev, i) {
4712 not_empty = !list_empty(&mdev->done_ee);
4713 if (not_empty)
4714 break;
4715 }
4716 spin_unlock_irq(&tconn->req_lock);
Philipp Reisner32862ec2011-02-08 16:41:01 +01004717 } while (not_empty);
4718
4719 return 0;
4720}
4721
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01004722struct asender_cmd {
4723 size_t pkt_size;
Philipp Reisnera4fbda82011-03-16 11:13:17 +01004724 enum mdev_or_conn fa_type; /* first argument's type */
4725 union {
4726 int (*mdev_fn)(struct drbd_conf *mdev, enum drbd_packet cmd);
4727 int (*conn_fn)(struct drbd_tconn *tconn, enum drbd_packet cmd);
4728 };
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01004729};
4730
4731static struct asender_cmd asender_tbl[] = {
Philipp Reisnera4fbda82011-03-16 11:13:17 +01004732 [P_PING] = { sizeof(struct p_header), MDEV, { got_Ping } },
4733 [P_PING_ACK] = { sizeof(struct p_header), MDEV, { got_PingAck } },
4734 [P_RECV_ACK] = { sizeof(struct p_block_ack), MDEV, { got_BlockAck } },
4735 [P_WRITE_ACK] = { sizeof(struct p_block_ack), MDEV, { got_BlockAck } },
4736 [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), MDEV, { got_BlockAck } },
4737 [P_DISCARD_WRITE] = { sizeof(struct p_block_ack), MDEV, { got_BlockAck } },
4738 [P_NEG_ACK] = { sizeof(struct p_block_ack), MDEV, { got_NegAck } },
4739 [P_NEG_DREPLY] = { sizeof(struct p_block_ack), MDEV, { got_NegDReply } },
4740 [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), MDEV, { got_NegRSDReply } },
4741 [P_OV_RESULT] = { sizeof(struct p_block_ack), MDEV, { got_OVResult } },
4742 [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), MDEV, { got_BarrierAck } },
4743 [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), MDEV, { got_RqSReply } },
4744 [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), MDEV, { got_IsInSync } },
4745 [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), MDEV, { got_skip } },
4746 [P_RS_CANCEL] = { sizeof(struct p_block_ack), MDEV, { got_NegRSDReply } },
4747 [P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), MDEV, { got_RqSReply } },
4748 [P_RETRY_WRITE] = { sizeof(struct p_block_ack), MDEV, { got_BlockAck } },
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01004749};
4750
Philipp Reisnerb411b362009-09-25 16:07:19 -07004751int drbd_asender(struct drbd_thread *thi)
4752{
Philipp Reisner392c8802011-02-09 10:33:31 +01004753 struct drbd_tconn *tconn = thi->tconn;
Philipp Reisner32862ec2011-02-08 16:41:01 +01004754 struct p_header *h = &tconn->meta.rbuf.header;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004755 struct asender_cmd *cmd = NULL;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004756 struct packet_info pi;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004757 int rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004758 void *buf = h;
4759 int received = 0;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004760 int expect = sizeof(struct p_header);
Lars Ellenbergf36af182011-03-09 22:44:55 +01004761 int ping_timeout_active = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004762
Philipp Reisnerb411b362009-09-25 16:07:19 -07004763 current->policy = SCHED_RR; /* Make this a realtime task! */
4764 current->rt_priority = 2; /* more important than all other tasks */
4765
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +01004766 while (get_t_state(thi) == RUNNING) {
Philipp Reisner80822282011-02-08 12:46:30 +01004767 drbd_thread_current_set_cpu(thi);
Philipp Reisner32862ec2011-02-08 16:41:01 +01004768 if (test_and_clear_bit(SEND_PING, &tconn->flags)) {
Philipp Reisner2a67d8b2011-02-09 14:10:32 +01004769 if (!drbd_send_ping(tconn)) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004770 conn_err(tconn, "drbd_send_ping has failed\n");
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01004771 goto reconnect;
4772 }
Philipp Reisner32862ec2011-02-08 16:41:01 +01004773 tconn->meta.socket->sk->sk_rcvtimeo =
4774 tconn->net_conf->ping_timeo*HZ/10;
Lars Ellenbergf36af182011-03-09 22:44:55 +01004775 ping_timeout_active = 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004776 }
4777
Philipp Reisner32862ec2011-02-08 16:41:01 +01004778 /* TODO: conditionally cork; it may hurt latency if we cork without
4779 much to send */
4780 if (!tconn->net_conf->no_cork)
4781 drbd_tcp_cork(tconn->meta.socket);
Philipp Reisner082a3432011-03-15 16:05:42 +01004782 if (tconn_process_done_ee(tconn)) {
4783 conn_err(tconn, "tconn_process_done_ee() failed\n");
Philipp Reisner32862ec2011-02-08 16:41:01 +01004784 goto reconnect;
Philipp Reisner082a3432011-03-15 16:05:42 +01004785 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004786 /* but unconditionally uncork unless disabled */
Philipp Reisner32862ec2011-02-08 16:41:01 +01004787 if (!tconn->net_conf->no_cork)
4788 drbd_tcp_uncork(tconn->meta.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004789
4790 /* short circuit, recv_msg would return EINTR anyways. */
4791 if (signal_pending(current))
4792 continue;
4793
Philipp Reisner32862ec2011-02-08 16:41:01 +01004794 rv = drbd_recv_short(tconn->meta.socket, buf, expect-received, 0);
4795 clear_bit(SIGNAL_ASENDER, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004796
4797 flush_signals(current);
4798
4799 /* Note:
4800 * -EINTR (on meta) we got a signal
4801 * -EAGAIN (on meta) rcvtimeo expired
4802 * -ECONNRESET other side closed the connection
4803 * -ERESTARTSYS (on data) we got a signal
4804 * rv < 0 other than above: unexpected error!
4805 * rv == expected: full header or command
4806 * rv < expected: "woken" by signal during receive
4807 * rv == 0 : "connection shut down by peer"
4808 */
4809 if (likely(rv > 0)) {
4810 received += rv;
4811 buf += rv;
4812 } else if (rv == 0) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004813 conn_err(tconn, "meta connection shut down by peer.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004814 goto reconnect;
4815 } else if (rv == -EAGAIN) {
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02004816 /* If the data socket received something meanwhile,
4817 * that is good enough: peer is still alive. */
Philipp Reisner32862ec2011-02-08 16:41:01 +01004818 if (time_after(tconn->last_received,
4819 jiffies - tconn->meta.socket->sk->sk_rcvtimeo))
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02004820 continue;
Lars Ellenbergf36af182011-03-09 22:44:55 +01004821 if (ping_timeout_active) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004822 conn_err(tconn, "PingAck did not arrive in time.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004823 goto reconnect;
4824 }
Philipp Reisner32862ec2011-02-08 16:41:01 +01004825 set_bit(SEND_PING, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004826 continue;
4827 } else if (rv == -EINTR) {
4828 continue;
4829 } else {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004830 conn_err(tconn, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004831 goto reconnect;
4832 }
4833
4834 if (received == expect && cmd == NULL) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004835 if (!decode_header(tconn, h, &pi))
Philipp Reisnerb411b362009-09-25 16:07:19 -07004836 goto reconnect;
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01004837 cmd = &asender_tbl[pi.cmd];
4838 if (pi.cmd >= ARRAY_SIZE(asender_tbl) || !cmd) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004839 conn_err(tconn, "unknown command %d on meta (l: %d)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004840 pi.cmd, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004841 goto disconnect;
4842 }
4843 expect = cmd->pkt_size;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004844 if (pi.size != expect - sizeof(struct p_header)) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004845 conn_err(tconn, "Wrong packet size on meta (c: %d, l: %d)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004846 pi.cmd, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004847 goto reconnect;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004848 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004849 }
4850 if (received == expect) {
Philipp Reisnera4fbda82011-03-16 11:13:17 +01004851 bool rv;
4852
4853 if (cmd->fa_type == CONN) {
4854 rv = cmd->conn_fn(tconn, pi.cmd);
4855 } else {
4856 struct drbd_conf *mdev = vnr_to_mdev(tconn, pi.vnr);
4857 rv = cmd->mdev_fn(mdev, pi.cmd);
4858 }
4859
4860 if (!rv)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004861 goto reconnect;
4862
Philipp Reisnera4fbda82011-03-16 11:13:17 +01004863 tconn->last_received = jiffies;
4864
Lars Ellenbergf36af182011-03-09 22:44:55 +01004865 /* the idle_timeout (ping-int)
4866 * has been restored in got_PingAck() */
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01004867 if (cmd == &asender_tbl[P_PING_ACK])
Lars Ellenbergf36af182011-03-09 22:44:55 +01004868 ping_timeout_active = 0;
4869
Philipp Reisnerb411b362009-09-25 16:07:19 -07004870 buf = h;
4871 received = 0;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004872 expect = sizeof(struct p_header);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004873 cmd = NULL;
4874 }
4875 }
4876
4877 if (0) {
4878reconnect:
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004879 conn_request_state(tconn, NS(conn, C_NETWORK_FAILURE), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004880 }
4881 if (0) {
4882disconnect:
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004883 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004884 }
Philipp Reisner32862ec2011-02-08 16:41:01 +01004885 clear_bit(SIGNAL_ASENDER, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004886
Philipp Reisner32862ec2011-02-08 16:41:01 +01004887 conn_info(tconn, "asender terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004888
4889 return 0;
4890}