blob: 6b00650d2805440f979c6642ecd1e8c8d2efd1b6 [file] [log] [blame]
Philipp Reisnerb411b362009-09-25 16:07:19 -07001/*
2 drbd_receiver.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23 */
24
25
Philipp Reisnerb411b362009-09-25 16:07:19 -070026#include <linux/module.h>
27
28#include <asm/uaccess.h>
29#include <net/sock.h>
30
Philipp Reisnerb411b362009-09-25 16:07:19 -070031#include <linux/drbd.h>
32#include <linux/fs.h>
33#include <linux/file.h>
34#include <linux/in.h>
35#include <linux/mm.h>
36#include <linux/memcontrol.h>
37#include <linux/mm_inline.h>
38#include <linux/slab.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070039#include <linux/pkt_sched.h>
40#define __KERNEL_SYSCALLS__
41#include <linux/unistd.h>
42#include <linux/vmalloc.h>
43#include <linux/random.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070044#include <linux/string.h>
45#include <linux/scatterlist.h>
46#include "drbd_int.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070047#include "drbd_req.h"
48
49#include "drbd_vli.h"
50
Philipp Reisner77351055b2011-02-07 17:24:26 +010051struct packet_info {
52 enum drbd_packet cmd;
53 int size;
54 int vnr;
55};
56
Philipp Reisnerb411b362009-09-25 16:07:19 -070057enum finish_epoch {
58 FE_STILL_LIVE,
59 FE_DESTROYED,
60 FE_RECYCLED,
61};
62
Philipp Reisner65d11ed2011-02-07 17:35:59 +010063static int drbd_do_handshake(struct drbd_tconn *tconn);
Philipp Reisner13e60372011-02-08 09:54:40 +010064static int drbd_do_auth(struct drbd_tconn *tconn);
Philipp Reisner360cc742011-02-08 14:29:53 +010065static int drbd_disconnected(int vnr, void *p, void *data);
Philipp Reisnerb411b362009-09-25 16:07:19 -070066
67static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *, struct drbd_epoch *, enum epoch_event);
Philipp Reisner00d56942011-02-09 18:09:48 +010068static int e_end_block(struct drbd_work *, int);
Philipp Reisnerb411b362009-09-25 16:07:19 -070069
Philipp Reisnerb411b362009-09-25 16:07:19 -070070
71#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
72
Lars Ellenberg45bb9122010-05-14 17:10:48 +020073/*
74 * some helper functions to deal with single linked page lists,
75 * page->private being our "next" pointer.
76 */
77
78/* If at least n pages are linked at head, get n pages off.
79 * Otherwise, don't modify head, and return NULL.
80 * Locking is the responsibility of the caller.
81 */
82static struct page *page_chain_del(struct page **head, int n)
83{
84 struct page *page;
85 struct page *tmp;
86
87 BUG_ON(!n);
88 BUG_ON(!head);
89
90 page = *head;
Philipp Reisner23ce4222010-05-20 13:35:31 +020091
92 if (!page)
93 return NULL;
94
Lars Ellenberg45bb9122010-05-14 17:10:48 +020095 while (page) {
96 tmp = page_chain_next(page);
97 if (--n == 0)
98 break; /* found sufficient pages */
99 if (tmp == NULL)
100 /* insufficient pages, don't use any of them. */
101 return NULL;
102 page = tmp;
103 }
104
105 /* add end of list marker for the returned list */
106 set_page_private(page, 0);
107 /* actual return value, and adjustment of head */
108 page = *head;
109 *head = tmp;
110 return page;
111}
112
113/* may be used outside of locks to find the tail of a (usually short)
114 * "private" page chain, before adding it back to a global chain head
115 * with page_chain_add() under a spinlock. */
116static struct page *page_chain_tail(struct page *page, int *len)
117{
118 struct page *tmp;
119 int i = 1;
120 while ((tmp = page_chain_next(page)))
121 ++i, page = tmp;
122 if (len)
123 *len = i;
124 return page;
125}
126
127static int page_chain_free(struct page *page)
128{
129 struct page *tmp;
130 int i = 0;
131 page_chain_for_each_safe(page, tmp) {
132 put_page(page);
133 ++i;
134 }
135 return i;
136}
137
138static void page_chain_add(struct page **head,
139 struct page *chain_first, struct page *chain_last)
140{
141#if 1
142 struct page *tmp;
143 tmp = page_chain_tail(chain_first, NULL);
144 BUG_ON(tmp != chain_last);
145#endif
146
147 /* add chain to head */
148 set_page_private(chain_last, (unsigned long)*head);
149 *head = chain_first;
150}
151
152static struct page *drbd_pp_first_pages_or_try_alloc(struct drbd_conf *mdev, int number)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700153{
154 struct page *page = NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200155 struct page *tmp = NULL;
156 int i = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700157
158 /* Yes, testing drbd_pp_vacant outside the lock is racy.
159 * So what. It saves a spin_lock. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200160 if (drbd_pp_vacant >= number) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700161 spin_lock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200162 page = page_chain_del(&drbd_pp_pool, number);
163 if (page)
164 drbd_pp_vacant -= number;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700165 spin_unlock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200166 if (page)
167 return page;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700168 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200169
Philipp Reisnerb411b362009-09-25 16:07:19 -0700170 /* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD
171 * "criss-cross" setup, that might cause write-out on some other DRBD,
172 * which in turn might block on the other node at this very place. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200173 for (i = 0; i < number; i++) {
174 tmp = alloc_page(GFP_TRY);
175 if (!tmp)
176 break;
177 set_page_private(tmp, (unsigned long)page);
178 page = tmp;
179 }
180
181 if (i == number)
182 return page;
183
184 /* Not enough pages immediately available this time.
185 * No need to jump around here, drbd_pp_alloc will retry this
186 * function "soon". */
187 if (page) {
188 tmp = page_chain_tail(page, NULL);
189 spin_lock(&drbd_pp_lock);
190 page_chain_add(&drbd_pp_pool, page, tmp);
191 drbd_pp_vacant += i;
192 spin_unlock(&drbd_pp_lock);
193 }
194 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700195}
196
Philipp Reisnerb411b362009-09-25 16:07:19 -0700197static void reclaim_net_ee(struct drbd_conf *mdev, struct list_head *to_be_freed)
198{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100199 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700200 struct list_head *le, *tle;
201
202 /* The EEs are always appended to the end of the list. Since
203 they are sent in order over the wire, they have to finish
204 in order. As soon as we see the first not finished we can
205 stop to examine the list... */
206
207 list_for_each_safe(le, tle, &mdev->net_ee) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100208 peer_req = list_entry(le, struct drbd_peer_request, w.list);
209 if (drbd_ee_has_active_page(peer_req))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700210 break;
211 list_move(le, to_be_freed);
212 }
213}
214
215static void drbd_kick_lo_and_reclaim_net(struct drbd_conf *mdev)
216{
217 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100218 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700219
Philipp Reisner87eeee42011-01-19 14:16:30 +0100220 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700221 reclaim_net_ee(mdev, &reclaimed);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100222 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700223
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100224 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
225 drbd_free_net_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700226}
227
228/**
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200229 * drbd_pp_alloc() - Returns @number pages, retries forever (or until signalled)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700230 * @mdev: DRBD device.
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200231 * @number: number of pages requested
232 * @retry: whether to retry, if not enough pages are available right now
Philipp Reisnerb411b362009-09-25 16:07:19 -0700233 *
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200234 * Tries to allocate number pages, first from our own page pool, then from
235 * the kernel, unless this allocation would exceed the max_buffers setting.
236 * Possibly retry until DRBD frees sufficient pages somewhere else.
237 *
238 * Returns a page chain linked via page->private.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700239 */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200240static struct page *drbd_pp_alloc(struct drbd_conf *mdev, unsigned number, bool retry)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700241{
242 struct page *page = NULL;
243 DEFINE_WAIT(wait);
244
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200245 /* Yes, we may run up to @number over max_buffers. If we
246 * follow it strictly, the admin will get it wrong anyways. */
Philipp Reisner89e58e72011-01-19 13:12:45 +0100247 if (atomic_read(&mdev->pp_in_use) < mdev->tconn->net_conf->max_buffers)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200248 page = drbd_pp_first_pages_or_try_alloc(mdev, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700249
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200250 while (page == NULL) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700251 prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
252
253 drbd_kick_lo_and_reclaim_net(mdev);
254
Philipp Reisner89e58e72011-01-19 13:12:45 +0100255 if (atomic_read(&mdev->pp_in_use) < mdev->tconn->net_conf->max_buffers) {
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200256 page = drbd_pp_first_pages_or_try_alloc(mdev, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700257 if (page)
258 break;
259 }
260
261 if (!retry)
262 break;
263
264 if (signal_pending(current)) {
265 dev_warn(DEV, "drbd_pp_alloc interrupted!\n");
266 break;
267 }
268
269 schedule();
270 }
271 finish_wait(&drbd_pp_wait, &wait);
272
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200273 if (page)
274 atomic_add(number, &mdev->pp_in_use);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700275 return page;
276}
277
278/* Must not be used from irq, as that may deadlock: see drbd_pp_alloc.
Philipp Reisner87eeee42011-01-19 14:16:30 +0100279 * Is also used from inside an other spin_lock_irq(&mdev->tconn->req_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200280 * Either links the page chain back to the global pool,
281 * or returns all pages to the system. */
Lars Ellenberg435f0742010-09-06 12:30:25 +0200282static void drbd_pp_free(struct drbd_conf *mdev, struct page *page, int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700283{
Lars Ellenberg435f0742010-09-06 12:30:25 +0200284 atomic_t *a = is_net ? &mdev->pp_in_use_by_net : &mdev->pp_in_use;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700285 int i;
Lars Ellenberg435f0742010-09-06 12:30:25 +0200286
Lars Ellenberg1816a2b2010-11-11 15:19:07 +0100287 if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE)*minor_count)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200288 i = page_chain_free(page);
289 else {
290 struct page *tmp;
291 tmp = page_chain_tail(page, &i);
292 spin_lock(&drbd_pp_lock);
293 page_chain_add(&drbd_pp_pool, page, tmp);
294 drbd_pp_vacant += i;
295 spin_unlock(&drbd_pp_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700296 }
Lars Ellenberg435f0742010-09-06 12:30:25 +0200297 i = atomic_sub_return(i, a);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200298 if (i < 0)
Lars Ellenberg435f0742010-09-06 12:30:25 +0200299 dev_warn(DEV, "ASSERTION FAILED: %s: %d < 0\n",
300 is_net ? "pp_in_use_by_net" : "pp_in_use", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700301 wake_up(&drbd_pp_wait);
302}
303
304/*
305You need to hold the req_lock:
306 _drbd_wait_ee_list_empty()
307
308You must not have the req_lock:
309 drbd_free_ee()
310 drbd_alloc_ee()
311 drbd_init_ee()
312 drbd_release_ee()
313 drbd_ee_fix_bhs()
314 drbd_process_done_ee()
315 drbd_clear_done_ee()
316 drbd_wait_ee_list_empty()
317*/
318
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100319struct drbd_peer_request *
320drbd_alloc_ee(struct drbd_conf *mdev, u64 id, sector_t sector,
321 unsigned int data_size, gfp_t gfp_mask) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700322{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100323 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700324 struct page *page;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200325 unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700326
Andreas Gruenbacher0cf9d272010-12-07 10:43:29 +0100327 if (drbd_insert_fault(mdev, DRBD_FAULT_AL_EE))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700328 return NULL;
329
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100330 peer_req = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
331 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700332 if (!(gfp_mask & __GFP_NOWARN))
333 dev_err(DEV, "alloc_ee: Allocation of an EE failed\n");
334 return NULL;
335 }
336
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200337 page = drbd_pp_alloc(mdev, nr_pages, (gfp_mask & __GFP_WAIT));
338 if (!page)
339 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700340
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100341 drbd_clear_interval(&peer_req->i);
342 peer_req->i.size = data_size;
343 peer_req->i.sector = sector;
344 peer_req->i.local = false;
345 peer_req->i.waiting = false;
Andreas Gruenbacher53840642011-01-28 10:31:04 +0100346
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100347 peer_req->epoch = NULL;
Philipp Reisnera21e9292011-02-08 15:08:49 +0100348 peer_req->w.mdev = mdev;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100349 peer_req->pages = page;
350 atomic_set(&peer_req->pending_bios, 0);
351 peer_req->flags = 0;
Andreas Gruenbacher9a8e7752011-01-11 14:04:09 +0100352 /*
353 * The block_id is opaque to the receiver. It is not endianness
354 * converted, and sent back to the sender unchanged.
355 */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100356 peer_req->block_id = id;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700357
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100358 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700359
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200360 fail:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100361 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700362 return NULL;
363}
364
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100365void drbd_free_some_ee(struct drbd_conf *mdev, struct drbd_peer_request *peer_req,
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100366 int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700367{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100368 if (peer_req->flags & EE_HAS_DIGEST)
369 kfree(peer_req->digest);
370 drbd_pp_free(mdev, peer_req->pages, is_net);
371 D_ASSERT(atomic_read(&peer_req->pending_bios) == 0);
372 D_ASSERT(drbd_interval_empty(&peer_req->i));
373 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700374}
375
376int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list)
377{
378 LIST_HEAD(work_list);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100379 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700380 int count = 0;
Lars Ellenberg435f0742010-09-06 12:30:25 +0200381 int is_net = list == &mdev->net_ee;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700382
Philipp Reisner87eeee42011-01-19 14:16:30 +0100383 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700384 list_splice_init(list, &work_list);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100385 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700386
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100387 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
388 drbd_free_some_ee(mdev, peer_req, is_net);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700389 count++;
390 }
391 return count;
392}
393
394
Philipp Reisner32862ec2011-02-08 16:41:01 +0100395/* See also comments in _req_mod(,BARRIER_ACKED)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700396 * and receive_Barrier.
397 *
398 * Move entries from net_ee to done_ee, if ready.
399 * Grab done_ee, call all callbacks, free the entries.
400 * The callbacks typically send out ACKs.
401 */
402static int drbd_process_done_ee(struct drbd_conf *mdev)
403{
404 LIST_HEAD(work_list);
405 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100406 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700407 int ok = (mdev->state.conn >= C_WF_REPORT_PARAMS);
408
Philipp Reisner87eeee42011-01-19 14:16:30 +0100409 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700410 reclaim_net_ee(mdev, &reclaimed);
411 list_splice_init(&mdev->done_ee, &work_list);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100412 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700413
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100414 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
415 drbd_free_net_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700416
417 /* possible callbacks here:
418 * e_end_block, and e_end_resync_block, e_send_discard_ack.
419 * all ignore the last argument.
420 */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100421 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700422 /* list_del not necessary, next/prev members not touched */
Philipp Reisner00d56942011-02-09 18:09:48 +0100423 ok = peer_req->w.cb(&peer_req->w, !ok) && ok;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100424 drbd_free_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700425 }
426 wake_up(&mdev->ee_wait);
427
428 return ok;
429}
430
431void _drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head)
432{
433 DEFINE_WAIT(wait);
434
435 /* avoids spin_lock/unlock
436 * and calling prepare_to_wait in the fast path */
437 while (!list_empty(head)) {
438 prepare_to_wait(&mdev->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100439 spin_unlock_irq(&mdev->tconn->req_lock);
Jens Axboe7eaceac2011-03-10 08:52:07 +0100440 io_schedule();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700441 finish_wait(&mdev->ee_wait, &wait);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100442 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700443 }
444}
445
446void drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head)
447{
Philipp Reisner87eeee42011-01-19 14:16:30 +0100448 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700449 _drbd_wait_ee_list_empty(mdev, head);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100450 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700451}
452
453/* see also kernel_accept; which is only present since 2.6.18.
454 * also we want to log which part of it failed, exactly */
Philipp Reisner76536202011-02-07 14:09:54 +0100455static int drbd_accept(const char **what, struct socket *sock, struct socket **newsock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700456{
457 struct sock *sk = sock->sk;
458 int err = 0;
459
460 *what = "listen";
461 err = sock->ops->listen(sock, 5);
462 if (err < 0)
463 goto out;
464
465 *what = "sock_create_lite";
466 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
467 newsock);
468 if (err < 0)
469 goto out;
470
471 *what = "accept";
472 err = sock->ops->accept(sock, *newsock, 0);
473 if (err < 0) {
474 sock_release(*newsock);
475 *newsock = NULL;
476 goto out;
477 }
478 (*newsock)->ops = sock->ops;
479
480out:
481 return err;
482}
483
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100484static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700485{
486 mm_segment_t oldfs;
487 struct kvec iov = {
488 .iov_base = buf,
489 .iov_len = size,
490 };
491 struct msghdr msg = {
492 .msg_iovlen = 1,
493 .msg_iov = (struct iovec *)&iov,
494 .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
495 };
496 int rv;
497
498 oldfs = get_fs();
499 set_fs(KERNEL_DS);
500 rv = sock_recvmsg(sock, &msg, size, msg.msg_flags);
501 set_fs(oldfs);
502
503 return rv;
504}
505
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100506static int drbd_recv(struct drbd_tconn *tconn, void *buf, size_t size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700507{
508 mm_segment_t oldfs;
509 struct kvec iov = {
510 .iov_base = buf,
511 .iov_len = size,
512 };
513 struct msghdr msg = {
514 .msg_iovlen = 1,
515 .msg_iov = (struct iovec *)&iov,
516 .msg_flags = MSG_WAITALL | MSG_NOSIGNAL
517 };
518 int rv;
519
520 oldfs = get_fs();
521 set_fs(KERNEL_DS);
522
523 for (;;) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100524 rv = sock_recvmsg(tconn->data.socket, &msg, size, msg.msg_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700525 if (rv == size)
526 break;
527
528 /* Note:
529 * ECONNRESET other side closed the connection
530 * ERESTARTSYS (on sock) we got a signal
531 */
532
533 if (rv < 0) {
534 if (rv == -ECONNRESET)
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100535 conn_info(tconn, "sock was reset by peer\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700536 else if (rv != -ERESTARTSYS)
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100537 conn_err(tconn, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700538 break;
539 } else if (rv == 0) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100540 conn_info(tconn, "sock was shut down by peer\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700541 break;
542 } else {
543 /* signal came in, or peer/link went down,
544 * after we read a partial message
545 */
546 /* D_ASSERT(signal_pending(current)); */
547 break;
548 }
549 };
550
551 set_fs(oldfs);
552
553 if (rv != size)
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100554 conn_request_state(tconn, NS(conn, C_BROKEN_PIPE), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700555
556 return rv;
557}
558
Lars Ellenberg5dbf1672010-05-25 16:18:01 +0200559/* quoting tcp(7):
560 * On individual connections, the socket buffer size must be set prior to the
561 * listen(2) or connect(2) calls in order to have it take effect.
562 * This is our wrapper to do so.
563 */
564static void drbd_setbufsize(struct socket *sock, unsigned int snd,
565 unsigned int rcv)
566{
567 /* open coded SO_SNDBUF, SO_RCVBUF */
568 if (snd) {
569 sock->sk->sk_sndbuf = snd;
570 sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
571 }
572 if (rcv) {
573 sock->sk->sk_rcvbuf = rcv;
574 sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
575 }
576}
577
Philipp Reisnereac3e992011-02-07 14:05:07 +0100578static struct socket *drbd_try_connect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700579{
580 const char *what;
581 struct socket *sock;
582 struct sockaddr_in6 src_in6;
583 int err;
584 int disconnect_on_error = 1;
585
Philipp Reisnereac3e992011-02-07 14:05:07 +0100586 if (!get_net_conf(tconn))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700587 return NULL;
588
589 what = "sock_create_kern";
Philipp Reisnereac3e992011-02-07 14:05:07 +0100590 err = sock_create_kern(((struct sockaddr *)tconn->net_conf->my_addr)->sa_family,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700591 SOCK_STREAM, IPPROTO_TCP, &sock);
592 if (err < 0) {
593 sock = NULL;
594 goto out;
595 }
596
597 sock->sk->sk_rcvtimeo =
Philipp Reisnereac3e992011-02-07 14:05:07 +0100598 sock->sk->sk_sndtimeo = tconn->net_conf->try_connect_int*HZ;
599 drbd_setbufsize(sock, tconn->net_conf->sndbuf_size,
600 tconn->net_conf->rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700601
602 /* explicitly bind to the configured IP as source IP
603 * for the outgoing connections.
604 * This is needed for multihomed hosts and to be
605 * able to use lo: interfaces for drbd.
606 * Make sure to use 0 as port number, so linux selects
607 * a free one dynamically.
608 */
Philipp Reisnereac3e992011-02-07 14:05:07 +0100609 memcpy(&src_in6, tconn->net_conf->my_addr,
610 min_t(int, tconn->net_conf->my_addr_len, sizeof(src_in6)));
611 if (((struct sockaddr *)tconn->net_conf->my_addr)->sa_family == AF_INET6)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700612 src_in6.sin6_port = 0;
613 else
614 ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
615
616 what = "bind before connect";
617 err = sock->ops->bind(sock,
618 (struct sockaddr *) &src_in6,
Philipp Reisnereac3e992011-02-07 14:05:07 +0100619 tconn->net_conf->my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700620 if (err < 0)
621 goto out;
622
623 /* connect may fail, peer not yet available.
624 * stay C_WF_CONNECTION, don't go Disconnecting! */
625 disconnect_on_error = 0;
626 what = "connect";
627 err = sock->ops->connect(sock,
Philipp Reisnereac3e992011-02-07 14:05:07 +0100628 (struct sockaddr *)tconn->net_conf->peer_addr,
629 tconn->net_conf->peer_addr_len, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700630
631out:
632 if (err < 0) {
633 if (sock) {
634 sock_release(sock);
635 sock = NULL;
636 }
637 switch (-err) {
638 /* timeout, busy, signal pending */
639 case ETIMEDOUT: case EAGAIN: case EINPROGRESS:
640 case EINTR: case ERESTARTSYS:
641 /* peer not (yet) available, network problem */
642 case ECONNREFUSED: case ENETUNREACH:
643 case EHOSTDOWN: case EHOSTUNREACH:
644 disconnect_on_error = 0;
645 break;
646 default:
Philipp Reisnereac3e992011-02-07 14:05:07 +0100647 conn_err(tconn, "%s failed, err = %d\n", what, err);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700648 }
649 if (disconnect_on_error)
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100650 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700651 }
Philipp Reisnereac3e992011-02-07 14:05:07 +0100652 put_net_conf(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700653 return sock;
654}
655
Philipp Reisner76536202011-02-07 14:09:54 +0100656static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700657{
658 int timeo, err;
659 struct socket *s_estab = NULL, *s_listen;
660 const char *what;
661
Philipp Reisner76536202011-02-07 14:09:54 +0100662 if (!get_net_conf(tconn))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700663 return NULL;
664
665 what = "sock_create_kern";
Philipp Reisner76536202011-02-07 14:09:54 +0100666 err = sock_create_kern(((struct sockaddr *)tconn->net_conf->my_addr)->sa_family,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700667 SOCK_STREAM, IPPROTO_TCP, &s_listen);
668 if (err) {
669 s_listen = NULL;
670 goto out;
671 }
672
Philipp Reisner76536202011-02-07 14:09:54 +0100673 timeo = tconn->net_conf->try_connect_int * HZ;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700674 timeo += (random32() & 1) ? timeo / 7 : -timeo / 7; /* 28.5% random jitter */
675
676 s_listen->sk->sk_reuse = 1; /* SO_REUSEADDR */
677 s_listen->sk->sk_rcvtimeo = timeo;
678 s_listen->sk->sk_sndtimeo = timeo;
Philipp Reisner76536202011-02-07 14:09:54 +0100679 drbd_setbufsize(s_listen, tconn->net_conf->sndbuf_size,
680 tconn->net_conf->rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700681
682 what = "bind before listen";
683 err = s_listen->ops->bind(s_listen,
Philipp Reisner76536202011-02-07 14:09:54 +0100684 (struct sockaddr *) tconn->net_conf->my_addr,
685 tconn->net_conf->my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700686 if (err < 0)
687 goto out;
688
Philipp Reisner76536202011-02-07 14:09:54 +0100689 err = drbd_accept(&what, s_listen, &s_estab);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700690
691out:
692 if (s_listen)
693 sock_release(s_listen);
694 if (err < 0) {
695 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
Philipp Reisner76536202011-02-07 14:09:54 +0100696 conn_err(tconn, "%s failed, err = %d\n", what, err);
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100697 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700698 }
699 }
Philipp Reisner76536202011-02-07 14:09:54 +0100700 put_net_conf(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700701
702 return s_estab;
703}
704
Philipp Reisnerd38e7872011-02-07 15:32:04 +0100705static int drbd_send_fp(struct drbd_tconn *tconn, struct socket *sock, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700706{
Philipp Reisnerd38e7872011-02-07 15:32:04 +0100707 struct p_header *h = &tconn->data.sbuf.header;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700708
Philipp Reisnerd38e7872011-02-07 15:32:04 +0100709 return _conn_send_cmd(tconn, 0, sock, cmd, h, sizeof(*h), 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700710}
711
Philipp Reisnera25b63f2011-02-07 15:43:45 +0100712static enum drbd_packet drbd_recv_fp(struct drbd_tconn *tconn, struct socket *sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700713{
Philipp Reisnera25b63f2011-02-07 15:43:45 +0100714 struct p_header80 *h = &tconn->data.rbuf.header.h80;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700715 int rr;
716
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100717 rr = drbd_recv_short(sock, h, sizeof(*h), 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700718
Andreas Gruenbacherca9bc122011-01-11 13:47:24 +0100719 if (rr == sizeof(*h) && h->magic == cpu_to_be32(DRBD_MAGIC))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700720 return be16_to_cpu(h->command);
721
722 return 0xffff;
723}
724
725/**
726 * drbd_socket_okay() - Free the socket if its connection is not okay
Philipp Reisnerb411b362009-09-25 16:07:19 -0700727 * @sock: pointer to the pointer to the socket.
728 */
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100729static int drbd_socket_okay(struct socket **sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700730{
731 int rr;
732 char tb[4];
733
734 if (!*sock)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100735 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700736
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100737 rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700738
739 if (rr > 0 || rr == -EAGAIN) {
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100740 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700741 } else {
742 sock_release(*sock);
743 *sock = NULL;
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100744 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700745 }
746}
747
Philipp Reisner907599e2011-02-08 11:25:37 +0100748static int drbd_connected(int vnr, void *p, void *data)
749{
750 struct drbd_conf *mdev = (struct drbd_conf *)p;
751 int ok = 1;
752
753 atomic_set(&mdev->packet_seq, 0);
754 mdev->peer_seq = 0;
755
Philipp Reisner8410da82011-02-11 20:11:10 +0100756 mdev->state_mutex = mdev->tconn->agreed_pro_version < 100 ?
757 &mdev->tconn->cstate_mutex :
758 &mdev->own_state_mutex;
759
Philipp Reisner907599e2011-02-08 11:25:37 +0100760 ok &= drbd_send_sync_param(mdev, &mdev->sync_conf);
761 ok &= drbd_send_sizes(mdev, 0, 0);
762 ok &= drbd_send_uuids(mdev);
763 ok &= drbd_send_state(mdev);
764 clear_bit(USE_DEGR_WFC_T, &mdev->flags);
765 clear_bit(RESIZE_PENDING, &mdev->flags);
766
Philipp Reisner8410da82011-02-11 20:11:10 +0100767
Philipp Reisner907599e2011-02-08 11:25:37 +0100768 return !ok;
769}
770
Philipp Reisnerb411b362009-09-25 16:07:19 -0700771/*
772 * return values:
773 * 1 yes, we have a valid connection
774 * 0 oops, did not work out, please try again
775 * -1 peer talks different language,
776 * no point in trying again, please go standalone.
777 * -2 We do not have a network config...
778 */
Philipp Reisner907599e2011-02-08 11:25:37 +0100779static int drbd_connect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700780{
781 struct socket *s, *sock, *msock;
782 int try, h, ok;
783
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100784 if (conn_request_state(tconn, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700785 return -2;
786
Philipp Reisner907599e2011-02-08 11:25:37 +0100787 clear_bit(DISCARD_CONCURRENT, &tconn->flags);
788 tconn->agreed_pro_version = 99;
Philipp Reisnerfd340c12011-01-19 16:57:39 +0100789 /* agreed_pro_version must be smaller than 100 so we send the old
790 header (h80) in the first packet and in the handshake packet. */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700791
792 sock = NULL;
793 msock = NULL;
794
795 do {
796 for (try = 0;;) {
797 /* 3 tries, this should take less than a second! */
Philipp Reisner907599e2011-02-08 11:25:37 +0100798 s = drbd_try_connect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700799 if (s || ++try >= 3)
800 break;
801 /* give the other side time to call bind() & listen() */
Philipp Reisner20ee6392011-01-18 15:28:59 +0100802 schedule_timeout_interruptible(HZ / 10);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700803 }
804
805 if (s) {
806 if (!sock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100807 drbd_send_fp(tconn, s, P_HAND_SHAKE_S);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700808 sock = s;
809 s = NULL;
810 } else if (!msock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100811 drbd_send_fp(tconn, s, P_HAND_SHAKE_M);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700812 msock = s;
813 s = NULL;
814 } else {
Philipp Reisner907599e2011-02-08 11:25:37 +0100815 conn_err(tconn, "Logic error in drbd_connect()\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700816 goto out_release_sockets;
817 }
818 }
819
820 if (sock && msock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100821 schedule_timeout_interruptible(tconn->net_conf->ping_timeo*HZ/10);
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100822 ok = drbd_socket_okay(&sock);
823 ok = drbd_socket_okay(&msock) && ok;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700824 if (ok)
825 break;
826 }
827
828retry:
Philipp Reisner907599e2011-02-08 11:25:37 +0100829 s = drbd_wait_for_connect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700830 if (s) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100831 try = drbd_recv_fp(tconn, s);
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100832 drbd_socket_okay(&sock);
833 drbd_socket_okay(&msock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700834 switch (try) {
835 case P_HAND_SHAKE_S:
836 if (sock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100837 conn_warn(tconn, "initial packet S crossed\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700838 sock_release(sock);
839 }
840 sock = s;
841 break;
842 case P_HAND_SHAKE_M:
843 if (msock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100844 conn_warn(tconn, "initial packet M crossed\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700845 sock_release(msock);
846 }
847 msock = s;
Philipp Reisner907599e2011-02-08 11:25:37 +0100848 set_bit(DISCARD_CONCURRENT, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700849 break;
850 default:
Philipp Reisner907599e2011-02-08 11:25:37 +0100851 conn_warn(tconn, "Error receiving initial packet\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700852 sock_release(s);
853 if (random32() & 1)
854 goto retry;
855 }
856 }
857
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100858 if (tconn->cstate <= C_DISCONNECTING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700859 goto out_release_sockets;
860 if (signal_pending(current)) {
861 flush_signals(current);
862 smp_rmb();
Philipp Reisner907599e2011-02-08 11:25:37 +0100863 if (get_t_state(&tconn->receiver) == EXITING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700864 goto out_release_sockets;
865 }
866
867 if (sock && msock) {
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100868 ok = drbd_socket_okay(&sock);
869 ok = drbd_socket_okay(&msock) && ok;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700870 if (ok)
871 break;
872 }
873 } while (1);
874
875 msock->sk->sk_reuse = 1; /* SO_REUSEADDR */
876 sock->sk->sk_reuse = 1; /* SO_REUSEADDR */
877
878 sock->sk->sk_allocation = GFP_NOIO;
879 msock->sk->sk_allocation = GFP_NOIO;
880
881 sock->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
882 msock->sk->sk_priority = TC_PRIO_INTERACTIVE;
883
Philipp Reisnerb411b362009-09-25 16:07:19 -0700884 /* NOT YET ...
Philipp Reisner907599e2011-02-08 11:25:37 +0100885 * sock->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700886 * sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
887 * first set it to the P_HAND_SHAKE timeout,
888 * which we set to 4x the configured ping_timeout. */
889 sock->sk->sk_sndtimeo =
Philipp Reisner907599e2011-02-08 11:25:37 +0100890 sock->sk->sk_rcvtimeo = tconn->net_conf->ping_timeo*4*HZ/10;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700891
Philipp Reisner907599e2011-02-08 11:25:37 +0100892 msock->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10;
893 msock->sk->sk_rcvtimeo = tconn->net_conf->ping_int*HZ;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700894
895 /* we don't want delays.
Lucas De Marchi25985ed2011-03-30 22:57:33 -0300896 * we use TCP_CORK where appropriate, though */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700897 drbd_tcp_nodelay(sock);
898 drbd_tcp_nodelay(msock);
899
Philipp Reisner907599e2011-02-08 11:25:37 +0100900 tconn->data.socket = sock;
901 tconn->meta.socket = msock;
902 tconn->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700903
Philipp Reisner907599e2011-02-08 11:25:37 +0100904 h = drbd_do_handshake(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700905 if (h <= 0)
906 return h;
907
Philipp Reisner907599e2011-02-08 11:25:37 +0100908 if (tconn->cram_hmac_tfm) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700909 /* drbd_request_state(mdev, NS(conn, WFAuth)); */
Philipp Reisner907599e2011-02-08 11:25:37 +0100910 switch (drbd_do_auth(tconn)) {
Johannes Thomab10d96c2010-01-07 16:02:50 +0100911 case -1:
Philipp Reisner907599e2011-02-08 11:25:37 +0100912 conn_err(tconn, "Authentication of peer failed\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700913 return -1;
Johannes Thomab10d96c2010-01-07 16:02:50 +0100914 case 0:
Philipp Reisner907599e2011-02-08 11:25:37 +0100915 conn_err(tconn, "Authentication of peer failed, trying again.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +0100916 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700917 }
918 }
919
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100920 if (conn_request_state(tconn, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE) < SS_SUCCESS)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700921 return 0;
922
Philipp Reisner907599e2011-02-08 11:25:37 +0100923 sock->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700924 sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
925
Philipp Reisner907599e2011-02-08 11:25:37 +0100926 drbd_thread_start(&tconn->asender);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700927
Philipp Reisner907599e2011-02-08 11:25:37 +0100928 if (drbd_send_protocol(tconn) == -1)
Philipp Reisner7e2455c2010-04-22 14:50:23 +0200929 return -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700930
Philipp Reisner907599e2011-02-08 11:25:37 +0100931 return !idr_for_each(&tconn->volumes, drbd_connected, tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700932
933out_release_sockets:
934 if (sock)
935 sock_release(sock);
936 if (msock)
937 sock_release(msock);
938 return -1;
939}
940
Philipp Reisnerce243852011-02-07 17:27:47 +0100941static bool decode_header(struct drbd_tconn *tconn, struct p_header *h, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700942{
Philipp Reisnerfd340c12011-01-19 16:57:39 +0100943 if (h->h80.magic == cpu_to_be32(DRBD_MAGIC)) {
Philipp Reisner77351055b2011-02-07 17:24:26 +0100944 pi->cmd = be16_to_cpu(h->h80.command);
945 pi->size = be16_to_cpu(h->h80.length);
Philipp Reisnereefc2f72011-02-08 12:55:24 +0100946 pi->vnr = 0;
Andreas Gruenbacherca9bc122011-01-11 13:47:24 +0100947 } else if (h->h95.magic == cpu_to_be16(DRBD_MAGIC_BIG)) {
Philipp Reisner77351055b2011-02-07 17:24:26 +0100948 pi->cmd = be16_to_cpu(h->h95.command);
949 pi->size = be32_to_cpu(h->h95.length) & 0x00ffffff;
950 pi->vnr = 0;
Philipp Reisner02918be2010-08-20 14:35:10 +0200951 } else {
Philipp Reisnerce243852011-02-07 17:27:47 +0100952 conn_err(tconn, "magic?? on data m: 0x%08x c: %d l: %d\n",
Lars Ellenberg004352f2010-10-05 20:13:58 +0200953 be32_to_cpu(h->h80.magic),
954 be16_to_cpu(h->h80.command),
955 be16_to_cpu(h->h80.length));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100956 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700957 }
Philipp Reisner257d0af2011-01-26 12:15:29 +0100958 return true;
959}
960
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100961static int drbd_recv_header(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisner257d0af2011-01-26 12:15:29 +0100962{
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100963 struct p_header *h = &tconn->data.rbuf.header;
Philipp Reisner257d0af2011-01-26 12:15:29 +0100964 int r;
965
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100966 r = drbd_recv(tconn, h, sizeof(*h));
Philipp Reisner257d0af2011-01-26 12:15:29 +0100967 if (unlikely(r != sizeof(*h))) {
968 if (!signal_pending(current))
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100969 conn_warn(tconn, "short read expecting header on sock: r=%d\n", r);
Philipp Reisner257d0af2011-01-26 12:15:29 +0100970 return false;
971 }
972
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100973 r = decode_header(tconn, h, pi);
974 tconn->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700975
Philipp Reisner257d0af2011-01-26 12:15:29 +0100976 return r;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700977}
978
Philipp Reisner2451fc32010-08-24 13:43:11 +0200979static void drbd_flush(struct drbd_conf *mdev)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700980{
981 int rv;
982
983 if (mdev->write_ordering >= WO_bdev_flush && get_ldev(mdev)) {
Dmitry Monakhovfbd9b092010-04-28 17:55:06 +0400984 rv = blkdev_issue_flush(mdev->ldev->backing_bdev, GFP_KERNEL,
Christoph Hellwigdd3932e2010-09-16 20:51:46 +0200985 NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700986 if (rv) {
987 dev_err(DEV, "local disk flush failed with status %d\n", rv);
988 /* would rather check on EOPNOTSUPP, but that is not reliable.
989 * don't try again for ANY return value != 0
990 * if (rv == -EOPNOTSUPP) */
991 drbd_bump_write_ordering(mdev, WO_drain_io);
992 }
993 put_ldev(mdev);
994 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700995}
996
997/**
998 * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it.
999 * @mdev: DRBD device.
1000 * @epoch: Epoch object.
1001 * @ev: Epoch event.
1002 */
1003static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev,
1004 struct drbd_epoch *epoch,
1005 enum epoch_event ev)
1006{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001007 int epoch_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001008 struct drbd_epoch *next_epoch;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001009 enum finish_epoch rv = FE_STILL_LIVE;
1010
1011 spin_lock(&mdev->epoch_lock);
1012 do {
1013 next_epoch = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001014
1015 epoch_size = atomic_read(&epoch->epoch_size);
1016
1017 switch (ev & ~EV_CLEANUP) {
1018 case EV_PUT:
1019 atomic_dec(&epoch->active);
1020 break;
1021 case EV_GOT_BARRIER_NR:
1022 set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001023 break;
1024 case EV_BECAME_LAST:
1025 /* nothing to do*/
1026 break;
1027 }
1028
Philipp Reisnerb411b362009-09-25 16:07:19 -07001029 if (epoch_size != 0 &&
1030 atomic_read(&epoch->active) == 0 &&
Philipp Reisner2451fc32010-08-24 13:43:11 +02001031 test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001032 if (!(ev & EV_CLEANUP)) {
1033 spin_unlock(&mdev->epoch_lock);
1034 drbd_send_b_ack(mdev, epoch->barrier_nr, epoch_size);
1035 spin_lock(&mdev->epoch_lock);
1036 }
1037 dec_unacked(mdev);
1038
1039 if (mdev->current_epoch != epoch) {
1040 next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
1041 list_del(&epoch->list);
1042 ev = EV_BECAME_LAST | (ev & EV_CLEANUP);
1043 mdev->epochs--;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001044 kfree(epoch);
1045
1046 if (rv == FE_STILL_LIVE)
1047 rv = FE_DESTROYED;
1048 } else {
1049 epoch->flags = 0;
1050 atomic_set(&epoch->epoch_size, 0);
Uwe Kleine-König698f9312010-07-02 20:41:51 +02001051 /* atomic_set(&epoch->active, 0); is already zero */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001052 if (rv == FE_STILL_LIVE)
1053 rv = FE_RECYCLED;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001054 wake_up(&mdev->ee_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001055 }
1056 }
1057
1058 if (!next_epoch)
1059 break;
1060
1061 epoch = next_epoch;
1062 } while (1);
1063
1064 spin_unlock(&mdev->epoch_lock);
1065
Philipp Reisnerb411b362009-09-25 16:07:19 -07001066 return rv;
1067}
1068
1069/**
1070 * drbd_bump_write_ordering() - Fall back to an other write ordering method
1071 * @mdev: DRBD device.
1072 * @wo: Write ordering method to try.
1073 */
1074void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo) __must_hold(local)
1075{
1076 enum write_ordering_e pwo;
1077 static char *write_ordering_str[] = {
1078 [WO_none] = "none",
1079 [WO_drain_io] = "drain",
1080 [WO_bdev_flush] = "flush",
Philipp Reisnerb411b362009-09-25 16:07:19 -07001081 };
1082
1083 pwo = mdev->write_ordering;
1084 wo = min(pwo, wo);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001085 if (wo == WO_bdev_flush && mdev->ldev->dc.no_disk_flush)
1086 wo = WO_drain_io;
1087 if (wo == WO_drain_io && mdev->ldev->dc.no_disk_drain)
1088 wo = WO_none;
1089 mdev->write_ordering = wo;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001090 if (pwo != mdev->write_ordering || wo == WO_bdev_flush)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001091 dev_info(DEV, "Method to ensure write ordering: %s\n", write_ordering_str[mdev->write_ordering]);
1092}
1093
1094/**
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001095 * drbd_submit_peer_request()
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001096 * @mdev: DRBD device.
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001097 * @peer_req: peer request
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001098 * @rw: flag field, see bio->bi_rw
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001099 *
1100 * May spread the pages to multiple bios,
1101 * depending on bio_add_page restrictions.
1102 *
1103 * Returns 0 if all bios have been submitted,
1104 * -ENOMEM if we could not allocate enough bios,
1105 * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a
1106 * single page to an empty bio (which should never happen and likely indicates
1107 * that the lower level IO stack is in some way broken). This has been observed
1108 * on certain Xen deployments.
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001109 */
1110/* TODO allocate from our own bio_set. */
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001111int drbd_submit_peer_request(struct drbd_conf *mdev,
1112 struct drbd_peer_request *peer_req,
1113 const unsigned rw, const int fault_type)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001114{
1115 struct bio *bios = NULL;
1116 struct bio *bio;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001117 struct page *page = peer_req->pages;
1118 sector_t sector = peer_req->i.sector;
1119 unsigned ds = peer_req->i.size;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001120 unsigned n_bios = 0;
1121 unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT;
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001122 int err = -ENOMEM;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001123
1124 /* In most cases, we will only need one bio. But in case the lower
1125 * level restrictions happen to be different at this offset on this
1126 * side than those of the sending peer, we may need to submit the
1127 * request in more than one bio. */
1128next_bio:
1129 bio = bio_alloc(GFP_NOIO, nr_pages);
1130 if (!bio) {
1131 dev_err(DEV, "submit_ee: Allocation of a bio failed\n");
1132 goto fail;
1133 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001134 /* > peer_req->i.sector, unless this is the first bio */
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001135 bio->bi_sector = sector;
1136 bio->bi_bdev = mdev->ldev->backing_bdev;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001137 bio->bi_rw = rw;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001138 bio->bi_private = peer_req;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001139 bio->bi_end_io = drbd_endio_sec;
1140
1141 bio->bi_next = bios;
1142 bios = bio;
1143 ++n_bios;
1144
1145 page_chain_for_each(page) {
1146 unsigned len = min_t(unsigned, ds, PAGE_SIZE);
1147 if (!bio_add_page(bio, page, len, 0)) {
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001148 /* A single page must always be possible!
1149 * But in case it fails anyways,
1150 * we deal with it, and complain (below). */
1151 if (bio->bi_vcnt == 0) {
1152 dev_err(DEV,
1153 "bio_add_page failed for len=%u, "
1154 "bi_vcnt=0 (bi_sector=%llu)\n",
1155 len, (unsigned long long)bio->bi_sector);
1156 err = -ENOSPC;
1157 goto fail;
1158 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001159 goto next_bio;
1160 }
1161 ds -= len;
1162 sector += len >> 9;
1163 --nr_pages;
1164 }
1165 D_ASSERT(page == NULL);
1166 D_ASSERT(ds == 0);
1167
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001168 atomic_set(&peer_req->pending_bios, n_bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001169 do {
1170 bio = bios;
1171 bios = bios->bi_next;
1172 bio->bi_next = NULL;
1173
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001174 drbd_generic_make_request(mdev, fault_type, bio);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001175 } while (bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001176 return 0;
1177
1178fail:
1179 while (bios) {
1180 bio = bios;
1181 bios = bios->bi_next;
1182 bio_put(bio);
1183 }
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001184 return err;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001185}
1186
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001187static void drbd_remove_epoch_entry_interval(struct drbd_conf *mdev,
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001188 struct drbd_peer_request *peer_req)
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001189{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001190 struct drbd_interval *i = &peer_req->i;
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001191
1192 drbd_remove_interval(&mdev->write_requests, i);
1193 drbd_clear_interval(i);
1194
Andreas Gruenbacher6c852be2011-02-04 15:38:52 +01001195 /* Wake up any processes waiting for this peer request to complete. */
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001196 if (i->waiting)
1197 wake_up(&mdev->misc_wait);
1198}
1199
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001200static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packet cmd,
1201 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001202{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001203 int rv;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001204 struct p_barrier *p = &mdev->tconn->data.rbuf.barrier;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001205 struct drbd_epoch *epoch;
1206
Philipp Reisnerb411b362009-09-25 16:07:19 -07001207 inc_unacked(mdev);
1208
Philipp Reisnerb411b362009-09-25 16:07:19 -07001209 mdev->current_epoch->barrier_nr = p->barrier;
1210 rv = drbd_may_finish_epoch(mdev, mdev->current_epoch, EV_GOT_BARRIER_NR);
1211
1212 /* P_BARRIER_ACK may imply that the corresponding extent is dropped from
1213 * the activity log, which means it would not be resynced in case the
1214 * R_PRIMARY crashes now.
1215 * Therefore we must send the barrier_ack after the barrier request was
1216 * completed. */
1217 switch (mdev->write_ordering) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001218 case WO_none:
1219 if (rv == FE_RECYCLED)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001220 return true;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001221
1222 /* receiver context, in the writeout path of the other node.
1223 * avoid potential distributed deadlock */
1224 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1225 if (epoch)
1226 break;
1227 else
1228 dev_warn(DEV, "Allocation of an epoch failed, slowing down\n");
1229 /* Fall through */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001230
1231 case WO_bdev_flush:
1232 case WO_drain_io:
Philipp Reisnerb411b362009-09-25 16:07:19 -07001233 drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
Philipp Reisner2451fc32010-08-24 13:43:11 +02001234 drbd_flush(mdev);
1235
1236 if (atomic_read(&mdev->current_epoch->epoch_size)) {
1237 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1238 if (epoch)
1239 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001240 }
1241
Philipp Reisner2451fc32010-08-24 13:43:11 +02001242 epoch = mdev->current_epoch;
1243 wait_event(mdev->ee_wait, atomic_read(&epoch->epoch_size) == 0);
1244
1245 D_ASSERT(atomic_read(&epoch->active) == 0);
1246 D_ASSERT(epoch->flags == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001247
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001248 return true;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001249 default:
1250 dev_err(DEV, "Strangeness in mdev->write_ordering %d\n", mdev->write_ordering);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001251 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001252 }
1253
1254 epoch->flags = 0;
1255 atomic_set(&epoch->epoch_size, 0);
1256 atomic_set(&epoch->active, 0);
1257
1258 spin_lock(&mdev->epoch_lock);
1259 if (atomic_read(&mdev->current_epoch->epoch_size)) {
1260 list_add(&epoch->list, &mdev->current_epoch->list);
1261 mdev->current_epoch = epoch;
1262 mdev->epochs++;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001263 } else {
1264 /* The current_epoch got recycled while we allocated this one... */
1265 kfree(epoch);
1266 }
1267 spin_unlock(&mdev->epoch_lock);
1268
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001269 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001270}
1271
1272/* used from receive_RSDataReply (recv_resync_read)
1273 * and from receive_Data */
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +01001274static struct drbd_peer_request *
1275read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector,
1276 int data_size) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001277{
Lars Ellenberg66660322010-04-06 12:15:04 +02001278 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001279 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001280 struct page *page;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001281 int dgs, ds, rr;
Philipp Reisnera0638452011-01-19 14:31:32 +01001282 void *dig_in = mdev->tconn->int_dig_in;
1283 void *dig_vv = mdev->tconn->int_dig_vv;
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001284 unsigned long *data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001285
Philipp Reisnera0638452011-01-19 14:31:32 +01001286 dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_r_tfm) ?
1287 crypto_hash_digestsize(mdev->tconn->integrity_r_tfm) : 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001288
1289 if (dgs) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001290 rr = drbd_recv(mdev->tconn, dig_in, dgs);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001291 if (rr != dgs) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001292 if (!signal_pending(current))
1293 dev_warn(DEV,
1294 "short read receiving data digest: read %d expected %d\n",
1295 rr, dgs);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001296 return NULL;
1297 }
1298 }
1299
1300 data_size -= dgs;
1301
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01001302 if (!expect(data_size != 0))
1303 return NULL;
1304 if (!expect(IS_ALIGNED(data_size, 512)))
1305 return NULL;
1306 if (!expect(data_size <= DRBD_MAX_BIO_SIZE))
1307 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001308
Lars Ellenberg66660322010-04-06 12:15:04 +02001309 /* even though we trust out peer,
1310 * we sometimes have to double check. */
1311 if (sector + (data_size>>9) > capacity) {
Lars Ellenbergfdda6542011-01-24 15:11:01 +01001312 dev_err(DEV, "request from peer beyond end of local disk: "
1313 "capacity: %llus < sector: %llus + size: %u\n",
Lars Ellenberg66660322010-04-06 12:15:04 +02001314 (unsigned long long)capacity,
1315 (unsigned long long)sector, data_size);
1316 return NULL;
1317 }
1318
Philipp Reisnerb411b362009-09-25 16:07:19 -07001319 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
1320 * "criss-cross" setup, that might cause write-out on some other DRBD,
1321 * which in turn might block on the other node at this very place. */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001322 peer_req = drbd_alloc_ee(mdev, id, sector, data_size, GFP_NOIO);
1323 if (!peer_req)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001324 return NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001325
Philipp Reisnerb411b362009-09-25 16:07:19 -07001326 ds = data_size;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001327 page = peer_req->pages;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001328 page_chain_for_each(page) {
1329 unsigned len = min_t(int, ds, PAGE_SIZE);
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001330 data = kmap(page);
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001331 rr = drbd_recv(mdev->tconn, data, len);
Andreas Gruenbacher0cf9d272010-12-07 10:43:29 +01001332 if (drbd_insert_fault(mdev, DRBD_FAULT_RECEIVE)) {
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001333 dev_err(DEV, "Fault injection: Corrupting data on receive\n");
1334 data[0] = data[0] ^ (unsigned long)-1;
1335 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001336 kunmap(page);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001337 if (rr != len) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001338 drbd_free_ee(mdev, peer_req);
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001339 if (!signal_pending(current))
1340 dev_warn(DEV, "short read receiving data: read %d expected %d\n",
1341 rr, len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001342 return NULL;
1343 }
1344 ds -= rr;
1345 }
1346
1347 if (dgs) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001348 drbd_csum_ee(mdev, mdev->tconn->integrity_r_tfm, peer_req, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001349 if (memcmp(dig_in, dig_vv, dgs)) {
Lars Ellenberg470be442010-11-10 10:36:52 +01001350 dev_err(DEV, "Digest integrity check FAILED: %llus +%u\n",
1351 (unsigned long long)sector, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001352 drbd_bcast_ee(mdev, "digest failed",
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001353 dgs, dig_in, dig_vv, peer_req);
1354 drbd_free_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001355 return NULL;
1356 }
1357 }
1358 mdev->recv_cnt += data_size>>9;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001359 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001360}
1361
1362/* drbd_drain_block() just takes a data block
1363 * out of the socket input buffer, and discards it.
1364 */
1365static int drbd_drain_block(struct drbd_conf *mdev, int data_size)
1366{
1367 struct page *page;
1368 int rr, rv = 1;
1369 void *data;
1370
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001371 if (!data_size)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001372 return true;
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001373
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001374 page = drbd_pp_alloc(mdev, 1, 1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001375
1376 data = kmap(page);
1377 while (data_size) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001378 rr = drbd_recv(mdev->tconn, data, min_t(int, data_size, PAGE_SIZE));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001379 if (rr != min_t(int, data_size, PAGE_SIZE)) {
1380 rv = 0;
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001381 if (!signal_pending(current))
1382 dev_warn(DEV,
1383 "short read receiving data: read %d expected %d\n",
1384 rr, min_t(int, data_size, PAGE_SIZE));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001385 break;
1386 }
1387 data_size -= rr;
1388 }
1389 kunmap(page);
Lars Ellenberg435f0742010-09-06 12:30:25 +02001390 drbd_pp_free(mdev, page, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001391 return rv;
1392}
1393
1394static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req,
1395 sector_t sector, int data_size)
1396{
1397 struct bio_vec *bvec;
1398 struct bio *bio;
1399 int dgs, rr, i, expect;
Philipp Reisnera0638452011-01-19 14:31:32 +01001400 void *dig_in = mdev->tconn->int_dig_in;
1401 void *dig_vv = mdev->tconn->int_dig_vv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001402
Philipp Reisnera0638452011-01-19 14:31:32 +01001403 dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_r_tfm) ?
1404 crypto_hash_digestsize(mdev->tconn->integrity_r_tfm) : 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001405
1406 if (dgs) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001407 rr = drbd_recv(mdev->tconn, dig_in, dgs);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001408 if (rr != dgs) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001409 if (!signal_pending(current))
1410 dev_warn(DEV,
1411 "short read receiving data reply digest: read %d expected %d\n",
1412 rr, dgs);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001413 return 0;
1414 }
1415 }
1416
1417 data_size -= dgs;
1418
1419 /* optimistically update recv_cnt. if receiving fails below,
1420 * we disconnect anyways, and counters will be reset. */
1421 mdev->recv_cnt += data_size>>9;
1422
1423 bio = req->master_bio;
1424 D_ASSERT(sector == bio->bi_sector);
1425
1426 bio_for_each_segment(bvec, bio, i) {
1427 expect = min_t(int, data_size, bvec->bv_len);
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001428 rr = drbd_recv(mdev->tconn,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001429 kmap(bvec->bv_page)+bvec->bv_offset,
1430 expect);
1431 kunmap(bvec->bv_page);
1432 if (rr != expect) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001433 if (!signal_pending(current))
1434 dev_warn(DEV, "short read receiving data reply: "
1435 "read %d expected %d\n",
1436 rr, expect);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001437 return 0;
1438 }
1439 data_size -= rr;
1440 }
1441
1442 if (dgs) {
Philipp Reisnera0638452011-01-19 14:31:32 +01001443 drbd_csum_bio(mdev, mdev->tconn->integrity_r_tfm, bio, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001444 if (memcmp(dig_in, dig_vv, dgs)) {
1445 dev_err(DEV, "Digest integrity check FAILED. Broken NICs?\n");
1446 return 0;
1447 }
1448 }
1449
1450 D_ASSERT(data_size == 0);
1451 return 1;
1452}
1453
1454/* e_end_resync_block() is called via
1455 * drbd_process_done_ee() by asender only */
Philipp Reisner00d56942011-02-09 18:09:48 +01001456static int e_end_resync_block(struct drbd_work *w, int unused)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001457{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001458 struct drbd_peer_request *peer_req = (struct drbd_peer_request *)w;
Philipp Reisner00d56942011-02-09 18:09:48 +01001459 struct drbd_conf *mdev = w->mdev;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001460 sector_t sector = peer_req->i.sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001461 int ok;
1462
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001463 D_ASSERT(drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001464
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001465 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
1466 drbd_set_in_sync(mdev, sector, peer_req->i.size);
1467 ok = drbd_send_ack(mdev, P_RS_WRITE_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001468 } else {
1469 /* Record failure to sync */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001470 drbd_rs_failed_io(mdev, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001471
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001472 ok = drbd_send_ack(mdev, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001473 }
1474 dec_unacked(mdev);
1475
1476 return ok;
1477}
1478
1479static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_size) __releases(local)
1480{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001481 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001482
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001483 peer_req = read_in_block(mdev, ID_SYNCER, sector, data_size);
1484 if (!peer_req)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001485 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001486
1487 dec_rs_pending(mdev);
1488
Philipp Reisnerb411b362009-09-25 16:07:19 -07001489 inc_unacked(mdev);
1490 /* corresponding dec_unacked() in e_end_resync_block()
1491 * respective _drbd_clear_done_ee */
1492
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001493 peer_req->w.cb = e_end_resync_block;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001494
Philipp Reisner87eeee42011-01-19 14:16:30 +01001495 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001496 list_add(&peer_req->w.list, &mdev->sync_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001497 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001498
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001499 atomic_add(data_size >> 9, &mdev->rs_sect_ev);
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001500 if (drbd_submit_peer_request(mdev, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001501 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001502
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001503 /* don't care for the reason here */
1504 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01001505 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001506 list_del(&peer_req->w.list);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001507 spin_unlock_irq(&mdev->tconn->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02001508
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001509 drbd_free_ee(mdev, peer_req);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001510fail:
1511 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001512 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001513}
1514
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001515static struct drbd_request *
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001516find_request(struct drbd_conf *mdev, struct rb_root *root, u64 id,
1517 sector_t sector, bool missing_ok, const char *func)
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001518{
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001519 struct drbd_request *req;
1520
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001521 /* Request object according to our peer */
1522 req = (struct drbd_request *)(unsigned long)id;
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001523 if (drbd_contains_interval(root, sector, &req->i) && req->i.local)
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001524 return req;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001525 if (!missing_ok) {
1526 dev_err(DEV, "%s: failed to find request %lu, sector %llus\n", func,
1527 (unsigned long)id, (unsigned long long)sector);
1528 }
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001529 return NULL;
1530}
1531
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001532static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packet cmd,
1533 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001534{
1535 struct drbd_request *req;
1536 sector_t sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001537 int ok;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001538 struct p_data *p = &mdev->tconn->data.rbuf.data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001539
1540 sector = be64_to_cpu(p->sector);
1541
Philipp Reisner87eeee42011-01-19 14:16:30 +01001542 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001543 req = find_request(mdev, &mdev->read_requests, p->block_id, sector, false, __func__);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001544 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001545 if (unlikely(!req))
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001546 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001547
Bart Van Assche24c48302011-05-21 18:32:29 +02001548 /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid
Philipp Reisnerb411b362009-09-25 16:07:19 -07001549 * special casing it there for the various failure cases.
1550 * still no race with drbd_fail_pending_reads */
1551 ok = recv_dless_read(mdev, req, sector, data_size);
1552
1553 if (ok)
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001554 req_mod(req, DATA_RECEIVED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001555 /* else: nothing. handled from drbd_disconnect...
1556 * I don't think we may complete this just yet
1557 * in case we are "on-disconnect: freeze" */
1558
1559 return ok;
1560}
1561
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001562static int receive_RSDataReply(struct drbd_conf *mdev, enum drbd_packet cmd,
1563 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001564{
1565 sector_t sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001566 int ok;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001567 struct p_data *p = &mdev->tconn->data.rbuf.data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001568
1569 sector = be64_to_cpu(p->sector);
1570 D_ASSERT(p->block_id == ID_SYNCER);
1571
1572 if (get_ldev(mdev)) {
1573 /* data is submitted to disk within recv_resync_read.
1574 * corresponding put_ldev done below on error,
Andreas Gruenbacher9c508422011-01-14 21:19:36 +01001575 * or in drbd_endio_sec. */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001576 ok = recv_resync_read(mdev, sector, data_size);
1577 } else {
1578 if (__ratelimit(&drbd_ratelimit_state))
1579 dev_err(DEV, "Can not write resync data to local disk.\n");
1580
1581 ok = drbd_drain_block(mdev, data_size);
1582
Lars Ellenberg2b2bf212010-10-06 11:46:55 +02001583 drbd_send_ack_dp(mdev, P_NEG_ACK, p, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001584 }
1585
Philipp Reisner778f2712010-07-06 11:14:00 +02001586 atomic_add(data_size >> 9, &mdev->rs_sect_in);
1587
Philipp Reisnerb411b362009-09-25 16:07:19 -07001588 return ok;
1589}
1590
1591/* e_end_block() is called via drbd_process_done_ee().
1592 * this means this function only runs in the asender thread
1593 */
Philipp Reisner00d56942011-02-09 18:09:48 +01001594static int e_end_block(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001595{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001596 struct drbd_peer_request *peer_req = (struct drbd_peer_request *)w;
Philipp Reisner00d56942011-02-09 18:09:48 +01001597 struct drbd_conf *mdev = w->mdev;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001598 sector_t sector = peer_req->i.sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001599 int ok = 1, pcmd;
1600
Philipp Reisner89e58e72011-01-19 13:12:45 +01001601 if (mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001602 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001603 pcmd = (mdev->state.conn >= C_SYNC_SOURCE &&
1604 mdev->state.conn <= C_PAUSED_SYNC_T &&
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001605 peer_req->flags & EE_MAY_SET_IN_SYNC) ?
Philipp Reisnerb411b362009-09-25 16:07:19 -07001606 P_RS_WRITE_ACK : P_WRITE_ACK;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001607 ok &= drbd_send_ack(mdev, pcmd, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001608 if (pcmd == P_RS_WRITE_ACK)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001609 drbd_set_in_sync(mdev, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001610 } else {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001611 ok = drbd_send_ack(mdev, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001612 /* we expect it to be marked out of sync anyways...
1613 * maybe assert this? */
1614 }
1615 dec_unacked(mdev);
1616 }
1617 /* we delete from the conflict detection hash _after_ we sent out the
1618 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */
Philipp Reisner89e58e72011-01-19 13:12:45 +01001619 if (mdev->tconn->net_conf->two_primaries) {
Philipp Reisner87eeee42011-01-19 14:16:30 +01001620 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001621 D_ASSERT(!drbd_interval_empty(&peer_req->i));
1622 drbd_remove_epoch_entry_interval(mdev, peer_req);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001623 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +01001624 } else
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001625 D_ASSERT(drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001626
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001627 drbd_may_finish_epoch(mdev, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001628
1629 return ok;
1630}
1631
Philipp Reisner00d56942011-02-09 18:09:48 +01001632static int e_send_discard_ack(struct drbd_work *w, int unused)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001633{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001634 struct drbd_peer_request *peer_req = (struct drbd_peer_request *)w;
Philipp Reisner00d56942011-02-09 18:09:48 +01001635 struct drbd_conf *mdev = w->mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001636 int ok = 1;
1637
Philipp Reisner89e58e72011-01-19 13:12:45 +01001638 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001639 ok = drbd_send_ack(mdev, P_DISCARD_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001640
Philipp Reisner87eeee42011-01-19 14:16:30 +01001641 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001642 D_ASSERT(!drbd_interval_empty(&peer_req->i));
1643 drbd_remove_epoch_entry_interval(mdev, peer_req);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001644 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001645
1646 dec_unacked(mdev);
1647
1648 return ok;
1649}
1650
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001651static bool seq_greater(u32 a, u32 b)
1652{
1653 /*
1654 * We assume 32-bit wrap-around here.
1655 * For 24-bit wrap-around, we would have to shift:
1656 * a <<= 8; b <<= 8;
1657 */
1658 return (s32)a - (s32)b > 0;
1659}
1660
1661static u32 seq_max(u32 a, u32 b)
1662{
1663 return seq_greater(a, b) ? a : b;
1664}
1665
Andreas Gruenbacher43ae0772011-02-03 18:42:08 +01001666static void update_peer_seq(struct drbd_conf *mdev, unsigned int peer_seq)
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001667{
Andreas Gruenbacher43ae0772011-02-03 18:42:08 +01001668 unsigned int old_peer_seq;
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001669
1670 spin_lock(&mdev->peer_seq_lock);
Andreas Gruenbacher43ae0772011-02-03 18:42:08 +01001671 old_peer_seq = mdev->peer_seq;
1672 mdev->peer_seq = seq_max(mdev->peer_seq, peer_seq);
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001673 spin_unlock(&mdev->peer_seq_lock);
Andreas Gruenbacher43ae0772011-02-03 18:42:08 +01001674 if (old_peer_seq != peer_seq)
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001675 wake_up(&mdev->seq_wait);
1676}
1677
Philipp Reisnerb411b362009-09-25 16:07:19 -07001678/* Called from receive_Data.
1679 * Synchronize packets on sock with packets on msock.
1680 *
1681 * This is here so even when a P_DATA packet traveling via sock overtook an Ack
1682 * packet traveling on msock, they are still processed in the order they have
1683 * been sent.
1684 *
1685 * Note: we don't care for Ack packets overtaking P_DATA packets.
1686 *
1687 * In case packet_seq is larger than mdev->peer_seq number, there are
1688 * outstanding packets on the msock. We wait for them to arrive.
1689 * In case we are the logically next packet, we update mdev->peer_seq
1690 * ourselves. Correctly handles 32bit wrap around.
1691 *
1692 * Assume we have a 10 GBit connection, that is about 1<<30 byte per second,
1693 * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds
1694 * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have
1695 * 1<<9 == 512 seconds aka ages for the 32bit wrap around...
1696 *
1697 * returns 0 if we may process the packet,
1698 * -ERESTARTSYS if we were interrupted (by disconnect signal). */
1699static int drbd_wait_peer_seq(struct drbd_conf *mdev, const u32 packet_seq)
1700{
1701 DEFINE_WAIT(wait);
1702 unsigned int p_seq;
1703 long timeout;
1704 int ret = 0;
1705 spin_lock(&mdev->peer_seq_lock);
1706 for (;;) {
1707 prepare_to_wait(&mdev->seq_wait, &wait, TASK_INTERRUPTIBLE);
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001708 if (!seq_greater(packet_seq, mdev->peer_seq + 1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07001709 break;
1710 if (signal_pending(current)) {
1711 ret = -ERESTARTSYS;
1712 break;
1713 }
1714 p_seq = mdev->peer_seq;
1715 spin_unlock(&mdev->peer_seq_lock);
1716 timeout = schedule_timeout(30*HZ);
1717 spin_lock(&mdev->peer_seq_lock);
1718 if (timeout == 0 && p_seq == mdev->peer_seq) {
1719 ret = -ETIMEDOUT;
1720 dev_err(DEV, "ASSERT FAILED waited 30 seconds for sequence update, forcing reconnect\n");
1721 break;
1722 }
1723 }
1724 finish_wait(&mdev->seq_wait, &wait);
1725 if (mdev->peer_seq+1 == packet_seq)
1726 mdev->peer_seq++;
1727 spin_unlock(&mdev->peer_seq_lock);
1728 return ret;
1729}
1730
Lars Ellenberg688593c2010-11-17 22:25:03 +01001731/* see also bio_flags_to_wire()
1732 * DRBD_REQ_*, because we need to semantically map the flags to data packet
1733 * flags and back. We may replicate to other kernel versions. */
1734static unsigned long wire_flags_to_bio(struct drbd_conf *mdev, u32 dpf)
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02001735{
Lars Ellenberg688593c2010-11-17 22:25:03 +01001736 return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
1737 (dpf & DP_FUA ? REQ_FUA : 0) |
1738 (dpf & DP_FLUSH ? REQ_FLUSH : 0) |
1739 (dpf & DP_DISCARD ? REQ_DISCARD : 0);
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02001740}
1741
Philipp Reisnerb411b362009-09-25 16:07:19 -07001742/* mirrored write */
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001743static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd,
1744 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001745{
1746 sector_t sector;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001747 struct drbd_peer_request *peer_req;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001748 struct p_data *p = &mdev->tconn->data.rbuf.data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001749 int rw = WRITE;
1750 u32 dp_flags;
1751
Philipp Reisnerb411b362009-09-25 16:07:19 -07001752 if (!get_ldev(mdev)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001753 spin_lock(&mdev->peer_seq_lock);
1754 if (mdev->peer_seq+1 == be32_to_cpu(p->seq_num))
1755 mdev->peer_seq++;
1756 spin_unlock(&mdev->peer_seq_lock);
1757
Lars Ellenberg2b2bf212010-10-06 11:46:55 +02001758 drbd_send_ack_dp(mdev, P_NEG_ACK, p, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001759 atomic_inc(&mdev->current_epoch->epoch_size);
1760 return drbd_drain_block(mdev, data_size);
1761 }
1762
1763 /* get_ldev(mdev) successful.
1764 * Corresponding put_ldev done either below (on various errors),
Andreas Gruenbacher9c508422011-01-14 21:19:36 +01001765 * or in drbd_endio_sec, if we successfully submit the data at
Philipp Reisnerb411b362009-09-25 16:07:19 -07001766 * the end of this function. */
1767
1768 sector = be64_to_cpu(p->sector);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001769 peer_req = read_in_block(mdev, p->block_id, sector, data_size);
1770 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001771 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001772 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001773 }
1774
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001775 peer_req->w.cb = e_end_block;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001776
Lars Ellenberg688593c2010-11-17 22:25:03 +01001777 dp_flags = be32_to_cpu(p->dp_flags);
1778 rw |= wire_flags_to_bio(mdev, dp_flags);
1779
1780 if (dp_flags & DP_MAY_SET_IN_SYNC)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001781 peer_req->flags |= EE_MAY_SET_IN_SYNC;
Lars Ellenberg688593c2010-11-17 22:25:03 +01001782
Philipp Reisnerb411b362009-09-25 16:07:19 -07001783 spin_lock(&mdev->epoch_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001784 peer_req->epoch = mdev->current_epoch;
1785 atomic_inc(&peer_req->epoch->epoch_size);
1786 atomic_inc(&peer_req->epoch->active);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001787 spin_unlock(&mdev->epoch_lock);
1788
Philipp Reisnerb411b362009-09-25 16:07:19 -07001789 /* I'm the receiver, I do hold a net_cnt reference. */
Philipp Reisner89e58e72011-01-19 13:12:45 +01001790 if (!mdev->tconn->net_conf->two_primaries) {
Philipp Reisner87eeee42011-01-19 14:16:30 +01001791 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001792 } else {
1793 /* don't get the req_lock yet,
1794 * we may sleep in drbd_wait_peer_seq */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001795 const int size = peer_req->i.size;
Philipp Reisner25703f82011-02-07 14:35:25 +01001796 const int discard = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001797 DEFINE_WAIT(wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001798 int first;
1799
Philipp Reisner89e58e72011-01-19 13:12:45 +01001800 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001801
1802 /* conflict detection and handling:
1803 * 1. wait on the sequence number,
1804 * in case this data packet overtook ACK packets.
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001805 * 2. check for conflicting write requests.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001806 *
1807 * Note: for two_primaries, we are protocol C,
1808 * so there cannot be any request that is DONE
1809 * but still on the transfer log.
1810 *
Philipp Reisnerb411b362009-09-25 16:07:19 -07001811 * if no conflicting request is found:
1812 * submit.
1813 *
1814 * if any conflicting request is found
1815 * that has not yet been acked,
1816 * AND I have the "discard concurrent writes" flag:
1817 * queue (via done_ee) the P_DISCARD_ACK; OUT.
1818 *
1819 * if any conflicting request is found:
1820 * block the receiver, waiting on misc_wait
1821 * until no more conflicting requests are there,
1822 * or we get interrupted (disconnect).
1823 *
1824 * we do not just write after local io completion of those
1825 * requests, but only after req is done completely, i.e.
1826 * we wait for the P_DISCARD_ACK to arrive!
1827 *
1828 * then proceed normally, i.e. submit.
1829 */
1830 if (drbd_wait_peer_seq(mdev, be32_to_cpu(p->seq_num)))
1831 goto out_interrupted;
1832
Philipp Reisner87eeee42011-01-19 14:16:30 +01001833 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001834
Philipp Reisnerb411b362009-09-25 16:07:19 -07001835 first = 1;
1836 for (;;) {
Andreas Gruenbacherde696712011-01-20 15:00:24 +01001837 struct drbd_interval *i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001838 int have_unacked = 0;
1839 int have_conflict = 0;
1840 prepare_to_wait(&mdev->misc_wait, &wait,
1841 TASK_INTERRUPTIBLE);
Andreas Gruenbacherde696712011-01-20 15:00:24 +01001842
1843 i = drbd_find_overlap(&mdev->write_requests, sector, size);
1844 if (i) {
Andreas Gruenbacherde696712011-01-20 15:00:24 +01001845 /* only ALERT on first iteration,
1846 * we may be woken up early... */
1847 if (first)
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001848 dev_alert(DEV, "%s[%u] Concurrent %s write detected!"
Andreas Gruenbacherde696712011-01-20 15:00:24 +01001849 " new: %llus +%u; pending: %llus +%u\n",
1850 current->comm, current->pid,
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001851 i->local ? "local" : "remote",
Andreas Gruenbacherde696712011-01-20 15:00:24 +01001852 (unsigned long long)sector, size,
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001853 (unsigned long long)i->sector, i->size);
1854
1855 if (i->local) {
1856 struct drbd_request *req2;
1857
1858 req2 = container_of(i, struct drbd_request, i);
1859 if (req2->rq_state & RQ_NET_PENDING)
1860 ++have_unacked;
1861 }
Andreas Gruenbacherde696712011-01-20 15:00:24 +01001862 ++have_conflict;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001863 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001864 if (!have_conflict)
1865 break;
1866
1867 /* Discard Ack only for the _first_ iteration */
1868 if (first && discard && have_unacked) {
1869 dev_alert(DEV, "Concurrent write! [DISCARD BY FLAG] sec=%llus\n",
1870 (unsigned long long)sector);
1871 inc_unacked(mdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001872 peer_req->w.cb = e_send_discard_ack;
1873 list_add_tail(&peer_req->w.list, &mdev->done_ee);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001874
Philipp Reisner87eeee42011-01-19 14:16:30 +01001875 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001876
1877 /* we could probably send that P_DISCARD_ACK ourselves,
1878 * but I don't like the receiver using the msock */
1879
1880 put_ldev(mdev);
Philipp Reisner0625ac12011-02-07 14:49:19 +01001881 wake_asender(mdev->tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001882 finish_wait(&mdev->misc_wait, &wait);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001883 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001884 }
1885
1886 if (signal_pending(current)) {
Philipp Reisner87eeee42011-01-19 14:16:30 +01001887 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001888 finish_wait(&mdev->misc_wait, &wait);
1889 goto out_interrupted;
1890 }
1891
Andreas Gruenbachera500c2e2011-01-27 14:12:23 +01001892 /* Indicate to wake up mdev->misc_wait upon completion. */
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001893 i->waiting = true;
Andreas Gruenbachera500c2e2011-01-27 14:12:23 +01001894
Philipp Reisner87eeee42011-01-19 14:16:30 +01001895 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001896 if (first) {
1897 first = 0;
1898 dev_alert(DEV, "Concurrent write! [W AFTERWARDS] "
1899 "sec=%llus\n", (unsigned long long)sector);
1900 } else if (discard) {
1901 /* we had none on the first iteration.
1902 * there must be none now. */
1903 D_ASSERT(have_unacked == 0);
1904 }
1905 schedule();
Philipp Reisner87eeee42011-01-19 14:16:30 +01001906 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001907 }
1908 finish_wait(&mdev->misc_wait, &wait);
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001909
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001910 drbd_insert_interval(&mdev->write_requests, &peer_req->i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001911 }
1912
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001913 list_add(&peer_req->w.list, &mdev->active_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001914 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001915
Philipp Reisner89e58e72011-01-19 13:12:45 +01001916 switch (mdev->tconn->net_conf->wire_protocol) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001917 case DRBD_PROT_C:
1918 inc_unacked(mdev);
1919 /* corresponding dec_unacked() in e_end_block()
1920 * respective _drbd_clear_done_ee */
1921 break;
1922 case DRBD_PROT_B:
1923 /* I really don't like it that the receiver thread
1924 * sends on the msock, but anyways */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001925 drbd_send_ack(mdev, P_RECV_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001926 break;
1927 case DRBD_PROT_A:
1928 /* nothing to do */
1929 break;
1930 }
1931
Lars Ellenberg6719fb02010-10-18 23:04:07 +02001932 if (mdev->state.pdsk < D_INCONSISTENT) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001933 /* In case we have the only disk of the cluster, */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001934 drbd_set_out_of_sync(mdev, peer_req->i.sector, peer_req->i.size);
1935 peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
1936 peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
1937 drbd_al_begin_io(mdev, peer_req->i.sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001938 }
1939
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001940 if (drbd_submit_peer_request(mdev, peer_req, rw, DRBD_FAULT_DT_WR) == 0)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001941 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001942
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001943 /* don't care for the reason here */
1944 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01001945 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001946 list_del(&peer_req->w.list);
1947 drbd_remove_epoch_entry_interval(mdev, peer_req);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001948 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001949 if (peer_req->flags & EE_CALL_AL_COMPLETE_IO)
1950 drbd_al_complete_io(mdev, peer_req->i.sector);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02001951
Philipp Reisnerb411b362009-09-25 16:07:19 -07001952out_interrupted:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001953 drbd_may_finish_epoch(mdev, peer_req->epoch, EV_PUT + EV_CLEANUP);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001954 put_ldev(mdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001955 drbd_free_ee(mdev, peer_req);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001956 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001957}
1958
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001959/* We may throttle resync, if the lower device seems to be busy,
1960 * and current sync rate is above c_min_rate.
1961 *
1962 * To decide whether or not the lower device is busy, we use a scheme similar
1963 * to MD RAID is_mddev_idle(): if the partition stats reveal "significant"
1964 * (more than 64 sectors) of activity we cannot account for with our own resync
1965 * activity, it obviously is "busy".
1966 *
1967 * The current sync rate used here uses only the most recent two step marks,
1968 * to have a short time average so we can react faster.
1969 */
Philipp Reisnere3555d82010-11-07 15:56:29 +01001970int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001971{
1972 struct gendisk *disk = mdev->ldev->backing_bdev->bd_contains->bd_disk;
1973 unsigned long db, dt, dbdt;
Philipp Reisnere3555d82010-11-07 15:56:29 +01001974 struct lc_element *tmp;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001975 int curr_events;
1976 int throttle = 0;
1977
1978 /* feature disabled? */
1979 if (mdev->sync_conf.c_min_rate == 0)
1980 return 0;
1981
Philipp Reisnere3555d82010-11-07 15:56:29 +01001982 spin_lock_irq(&mdev->al_lock);
1983 tmp = lc_find(mdev->resync, BM_SECT_TO_EXT(sector));
1984 if (tmp) {
1985 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
1986 if (test_bit(BME_PRIORITY, &bm_ext->flags)) {
1987 spin_unlock_irq(&mdev->al_lock);
1988 return 0;
1989 }
1990 /* Do not slow down if app IO is already waiting for this extent */
1991 }
1992 spin_unlock_irq(&mdev->al_lock);
1993
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001994 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
1995 (int)part_stat_read(&disk->part0, sectors[1]) -
1996 atomic_read(&mdev->rs_sect_ev);
Philipp Reisnere3555d82010-11-07 15:56:29 +01001997
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001998 if (!mdev->rs_last_events || curr_events - mdev->rs_last_events > 64) {
1999 unsigned long rs_left;
2000 int i;
2001
2002 mdev->rs_last_events = curr_events;
2003
2004 /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
2005 * approx. */
Lars Ellenberg2649f082010-11-05 10:05:47 +01002006 i = (mdev->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
2007
2008 if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T)
2009 rs_left = mdev->ov_left;
2010 else
2011 rs_left = drbd_bm_total_weight(mdev) - mdev->rs_failed;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002012
2013 dt = ((long)jiffies - (long)mdev->rs_mark_time[i]) / HZ;
2014 if (!dt)
2015 dt++;
2016 db = mdev->rs_mark_left[i] - rs_left;
2017 dbdt = Bit2KB(db/dt);
2018
2019 if (dbdt > mdev->sync_conf.c_min_rate)
2020 throttle = 1;
2021 }
2022 return throttle;
2023}
2024
2025
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01002026static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packet cmd,
2027 unsigned int digest_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002028{
2029 sector_t sector;
2030 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002031 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002032 struct digest_info *di = NULL;
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002033 int size, verb;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002034 unsigned int fault_type;
Philipp Reisnere42325a2011-01-19 13:55:45 +01002035 struct p_block_req *p = &mdev->tconn->data.rbuf.block_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002036
2037 sector = be64_to_cpu(p->sector);
2038 size = be32_to_cpu(p->blksize);
2039
Lars Ellenberg1816a2b2010-11-11 15:19:07 +01002040 if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_BIO_SIZE) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002041 dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2042 (unsigned long long)sector, size);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002043 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002044 }
2045 if (sector + (size>>9) > capacity) {
2046 dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2047 (unsigned long long)sector, size);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002048 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002049 }
2050
2051 if (!get_ldev_if_state(mdev, D_UP_TO_DATE)) {
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002052 verb = 1;
2053 switch (cmd) {
2054 case P_DATA_REQUEST:
2055 drbd_send_ack_rp(mdev, P_NEG_DREPLY, p);
2056 break;
2057 case P_RS_DATA_REQUEST:
2058 case P_CSUM_RS_REQUEST:
2059 case P_OV_REQUEST:
2060 drbd_send_ack_rp(mdev, P_NEG_RS_DREPLY , p);
2061 break;
2062 case P_OV_REPLY:
2063 verb = 0;
2064 dec_rs_pending(mdev);
2065 drbd_send_ack_ex(mdev, P_OV_RESULT, sector, size, ID_IN_SYNC);
2066 break;
2067 default:
2068 dev_err(DEV, "unexpected command (%s) in receive_DataRequest\n",
2069 cmdname(cmd));
2070 }
2071 if (verb && __ratelimit(&drbd_ratelimit_state))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002072 dev_err(DEV, "Can not satisfy peer's read request, "
2073 "no local data.\n");
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002074
Lars Ellenberga821cc42010-09-06 12:31:37 +02002075 /* drain possibly payload */
2076 return drbd_drain_block(mdev, digest_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002077 }
2078
2079 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
2080 * "criss-cross" setup, that might cause write-out on some other DRBD,
2081 * which in turn might block on the other node at this very place. */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002082 peer_req = drbd_alloc_ee(mdev, p->block_id, sector, size, GFP_NOIO);
2083 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002084 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002085 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002086 }
2087
Philipp Reisner02918be2010-08-20 14:35:10 +02002088 switch (cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002089 case P_DATA_REQUEST:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002090 peer_req->w.cb = w_e_end_data_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002091 fault_type = DRBD_FAULT_DT_RD;
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002092 /* application IO, don't drbd_rs_begin_io */
2093 goto submit;
2094
Philipp Reisnerb411b362009-09-25 16:07:19 -07002095 case P_RS_DATA_REQUEST:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002096 peer_req->w.cb = w_e_end_rsdata_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002097 fault_type = DRBD_FAULT_RS_RD;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002098 /* used in the sector offset progress display */
2099 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002100 break;
2101
2102 case P_OV_REPLY:
2103 case P_CSUM_RS_REQUEST:
2104 fault_type = DRBD_FAULT_RS_RD;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002105 di = kmalloc(sizeof(*di) + digest_size, GFP_NOIO);
2106 if (!di)
2107 goto out_free_e;
2108
2109 di->digest_size = digest_size;
2110 di->digest = (((char *)di)+sizeof(struct digest_info));
2111
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002112 peer_req->digest = di;
2113 peer_req->flags |= EE_HAS_DIGEST;
Lars Ellenbergc36c3ce2010-08-11 20:42:55 +02002114
Philipp Reisnerde0ff332011-02-07 16:56:20 +01002115 if (drbd_recv(mdev->tconn, di->digest, digest_size) != digest_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002116 goto out_free_e;
2117
Philipp Reisner02918be2010-08-20 14:35:10 +02002118 if (cmd == P_CSUM_RS_REQUEST) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002119 D_ASSERT(mdev->tconn->agreed_pro_version >= 89);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002120 peer_req->w.cb = w_e_end_csum_rs_req;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002121 /* used in the sector offset progress display */
2122 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
Philipp Reisner02918be2010-08-20 14:35:10 +02002123 } else if (cmd == P_OV_REPLY) {
Lars Ellenberg2649f082010-11-05 10:05:47 +01002124 /* track progress, we may need to throttle */
2125 atomic_add(size >> 9, &mdev->rs_sect_in);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002126 peer_req->w.cb = w_e_end_ov_reply;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002127 dec_rs_pending(mdev);
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002128 /* drbd_rs_begin_io done when we sent this request,
2129 * but accounting still needs to be done. */
2130 goto submit_for_resync;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002131 }
2132 break;
2133
2134 case P_OV_REQUEST:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002135 if (mdev->ov_start_sector == ~(sector_t)0 &&
Philipp Reisner31890f42011-01-19 14:12:51 +01002136 mdev->tconn->agreed_pro_version >= 90) {
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002137 unsigned long now = jiffies;
2138 int i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002139 mdev->ov_start_sector = sector;
2140 mdev->ov_position = sector;
Lars Ellenberg30b743a2010-11-05 09:39:06 +01002141 mdev->ov_left = drbd_bm_bits(mdev) - BM_SECT_TO_BIT(sector);
2142 mdev->rs_total = mdev->ov_left;
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002143 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
2144 mdev->rs_mark_left[i] = mdev->ov_left;
2145 mdev->rs_mark_time[i] = now;
2146 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002147 dev_info(DEV, "Online Verify start sector: %llu\n",
2148 (unsigned long long)sector);
2149 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002150 peer_req->w.cb = w_e_end_ov_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002151 fault_type = DRBD_FAULT_RS_RD;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002152 break;
2153
Philipp Reisnerb411b362009-09-25 16:07:19 -07002154 default:
2155 dev_err(DEV, "unexpected command (%s) in receive_DataRequest\n",
Philipp Reisner02918be2010-08-20 14:35:10 +02002156 cmdname(cmd));
Philipp Reisnerb411b362009-09-25 16:07:19 -07002157 fault_type = DRBD_FAULT_MAX;
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002158 goto out_free_e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002159 }
2160
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002161 /* Throttle, drbd_rs_begin_io and submit should become asynchronous
2162 * wrt the receiver, but it is not as straightforward as it may seem.
2163 * Various places in the resync start and stop logic assume resync
2164 * requests are processed in order, requeuing this on the worker thread
2165 * introduces a bunch of new code for synchronization between threads.
2166 *
2167 * Unlimited throttling before drbd_rs_begin_io may stall the resync
2168 * "forever", throttling after drbd_rs_begin_io will lock that extent
2169 * for application writes for the same time. For now, just throttle
2170 * here, where the rest of the code expects the receiver to sleep for
2171 * a while, anyways.
2172 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002173
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002174 /* Throttle before drbd_rs_begin_io, as that locks out application IO;
2175 * this defers syncer requests for some time, before letting at least
2176 * on request through. The resync controller on the receiving side
2177 * will adapt to the incoming rate accordingly.
2178 *
2179 * We cannot throttle here if remote is Primary/SyncTarget:
2180 * we would also throttle its application reads.
2181 * In that case, throttling is done on the SyncTarget only.
2182 */
Philipp Reisnere3555d82010-11-07 15:56:29 +01002183 if (mdev->state.peer != R_PRIMARY && drbd_rs_should_slow_down(mdev, sector))
2184 schedule_timeout_uninterruptible(HZ/10);
2185 if (drbd_rs_begin_io(mdev, sector))
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002186 goto out_free_e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002187
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002188submit_for_resync:
2189 atomic_add(size >> 9, &mdev->rs_sect_ev);
2190
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002191submit:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002192 inc_unacked(mdev);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002193 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002194 list_add_tail(&peer_req->w.list, &mdev->read_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002195 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002196
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01002197 if (drbd_submit_peer_request(mdev, peer_req, READ, fault_type) == 0)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002198 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002199
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002200 /* don't care for the reason here */
2201 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01002202 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002203 list_del(&peer_req->w.list);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002204 spin_unlock_irq(&mdev->tconn->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002205 /* no drbd_rs_complete_io(), we are dropping the connection anyways */
2206
Philipp Reisnerb411b362009-09-25 16:07:19 -07002207out_free_e:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002208 put_ldev(mdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002209 drbd_free_ee(mdev, peer_req);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002210 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002211}
2212
2213static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local)
2214{
2215 int self, peer, rv = -100;
2216 unsigned long ch_self, ch_peer;
2217
2218 self = mdev->ldev->md.uuid[UI_BITMAP] & 1;
2219 peer = mdev->p_uuid[UI_BITMAP] & 1;
2220
2221 ch_peer = mdev->p_uuid[UI_SIZE];
2222 ch_self = mdev->comm_bm_set;
2223
Philipp Reisner89e58e72011-01-19 13:12:45 +01002224 switch (mdev->tconn->net_conf->after_sb_0p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002225 case ASB_CONSENSUS:
2226 case ASB_DISCARD_SECONDARY:
2227 case ASB_CALL_HELPER:
2228 dev_err(DEV, "Configuration error.\n");
2229 break;
2230 case ASB_DISCONNECT:
2231 break;
2232 case ASB_DISCARD_YOUNGER_PRI:
2233 if (self == 0 && peer == 1) {
2234 rv = -1;
2235 break;
2236 }
2237 if (self == 1 && peer == 0) {
2238 rv = 1;
2239 break;
2240 }
2241 /* Else fall through to one of the other strategies... */
2242 case ASB_DISCARD_OLDER_PRI:
2243 if (self == 0 && peer == 1) {
2244 rv = 1;
2245 break;
2246 }
2247 if (self == 1 && peer == 0) {
2248 rv = -1;
2249 break;
2250 }
2251 /* Else fall through to one of the other strategies... */
Lars Ellenbergad19bf62009-10-14 09:36:49 +02002252 dev_warn(DEV, "Discard younger/older primary did not find a decision\n"
Philipp Reisnerb411b362009-09-25 16:07:19 -07002253 "Using discard-least-changes instead\n");
2254 case ASB_DISCARD_ZERO_CHG:
2255 if (ch_peer == 0 && ch_self == 0) {
Philipp Reisner25703f82011-02-07 14:35:25 +01002256 rv = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002257 ? -1 : 1;
2258 break;
2259 } else {
2260 if (ch_peer == 0) { rv = 1; break; }
2261 if (ch_self == 0) { rv = -1; break; }
2262 }
Philipp Reisner89e58e72011-01-19 13:12:45 +01002263 if (mdev->tconn->net_conf->after_sb_0p == ASB_DISCARD_ZERO_CHG)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002264 break;
2265 case ASB_DISCARD_LEAST_CHG:
2266 if (ch_self < ch_peer)
2267 rv = -1;
2268 else if (ch_self > ch_peer)
2269 rv = 1;
2270 else /* ( ch_self == ch_peer ) */
2271 /* Well, then use something else. */
Philipp Reisner25703f82011-02-07 14:35:25 +01002272 rv = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002273 ? -1 : 1;
2274 break;
2275 case ASB_DISCARD_LOCAL:
2276 rv = -1;
2277 break;
2278 case ASB_DISCARD_REMOTE:
2279 rv = 1;
2280 }
2281
2282 return rv;
2283}
2284
2285static int drbd_asb_recover_1p(struct drbd_conf *mdev) __must_hold(local)
2286{
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002287 int hg, rv = -100;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002288
Philipp Reisner89e58e72011-01-19 13:12:45 +01002289 switch (mdev->tconn->net_conf->after_sb_1p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002290 case ASB_DISCARD_YOUNGER_PRI:
2291 case ASB_DISCARD_OLDER_PRI:
2292 case ASB_DISCARD_LEAST_CHG:
2293 case ASB_DISCARD_LOCAL:
2294 case ASB_DISCARD_REMOTE:
2295 dev_err(DEV, "Configuration error.\n");
2296 break;
2297 case ASB_DISCONNECT:
2298 break;
2299 case ASB_CONSENSUS:
2300 hg = drbd_asb_recover_0p(mdev);
2301 if (hg == -1 && mdev->state.role == R_SECONDARY)
2302 rv = hg;
2303 if (hg == 1 && mdev->state.role == R_PRIMARY)
2304 rv = hg;
2305 break;
2306 case ASB_VIOLENTLY:
2307 rv = drbd_asb_recover_0p(mdev);
2308 break;
2309 case ASB_DISCARD_SECONDARY:
2310 return mdev->state.role == R_PRIMARY ? 1 : -1;
2311 case ASB_CALL_HELPER:
2312 hg = drbd_asb_recover_0p(mdev);
2313 if (hg == -1 && mdev->state.role == R_PRIMARY) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002314 enum drbd_state_rv rv2;
2315
2316 drbd_set_role(mdev, R_SECONDARY, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002317 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2318 * we might be here in C_WF_REPORT_PARAMS which is transient.
2319 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002320 rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
2321 if (rv2 != SS_SUCCESS) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002322 drbd_khelper(mdev, "pri-lost-after-sb");
2323 } else {
2324 dev_warn(DEV, "Successfully gave up primary role.\n");
2325 rv = hg;
2326 }
2327 } else
2328 rv = hg;
2329 }
2330
2331 return rv;
2332}
2333
2334static int drbd_asb_recover_2p(struct drbd_conf *mdev) __must_hold(local)
2335{
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002336 int hg, rv = -100;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002337
Philipp Reisner89e58e72011-01-19 13:12:45 +01002338 switch (mdev->tconn->net_conf->after_sb_2p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002339 case ASB_DISCARD_YOUNGER_PRI:
2340 case ASB_DISCARD_OLDER_PRI:
2341 case ASB_DISCARD_LEAST_CHG:
2342 case ASB_DISCARD_LOCAL:
2343 case ASB_DISCARD_REMOTE:
2344 case ASB_CONSENSUS:
2345 case ASB_DISCARD_SECONDARY:
2346 dev_err(DEV, "Configuration error.\n");
2347 break;
2348 case ASB_VIOLENTLY:
2349 rv = drbd_asb_recover_0p(mdev);
2350 break;
2351 case ASB_DISCONNECT:
2352 break;
2353 case ASB_CALL_HELPER:
2354 hg = drbd_asb_recover_0p(mdev);
2355 if (hg == -1) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002356 enum drbd_state_rv rv2;
2357
Philipp Reisnerb411b362009-09-25 16:07:19 -07002358 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2359 * we might be here in C_WF_REPORT_PARAMS which is transient.
2360 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002361 rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
2362 if (rv2 != SS_SUCCESS) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002363 drbd_khelper(mdev, "pri-lost-after-sb");
2364 } else {
2365 dev_warn(DEV, "Successfully gave up primary role.\n");
2366 rv = hg;
2367 }
2368 } else
2369 rv = hg;
2370 }
2371
2372 return rv;
2373}
2374
2375static void drbd_uuid_dump(struct drbd_conf *mdev, char *text, u64 *uuid,
2376 u64 bits, u64 flags)
2377{
2378 if (!uuid) {
2379 dev_info(DEV, "%s uuid info vanished while I was looking!\n", text);
2380 return;
2381 }
2382 dev_info(DEV, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n",
2383 text,
2384 (unsigned long long)uuid[UI_CURRENT],
2385 (unsigned long long)uuid[UI_BITMAP],
2386 (unsigned long long)uuid[UI_HISTORY_START],
2387 (unsigned long long)uuid[UI_HISTORY_END],
2388 (unsigned long long)bits,
2389 (unsigned long long)flags);
2390}
2391
2392/*
2393 100 after split brain try auto recover
2394 2 C_SYNC_SOURCE set BitMap
2395 1 C_SYNC_SOURCE use BitMap
2396 0 no Sync
2397 -1 C_SYNC_TARGET use BitMap
2398 -2 C_SYNC_TARGET set BitMap
2399 -100 after split brain, disconnect
2400-1000 unrelated data
Philipp Reisner4a23f262011-01-11 17:42:17 +01002401-1091 requires proto 91
2402-1096 requires proto 96
Philipp Reisnerb411b362009-09-25 16:07:19 -07002403 */
2404static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(local)
2405{
2406 u64 self, peer;
2407 int i, j;
2408
2409 self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2410 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
2411
2412 *rule_nr = 10;
2413 if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED)
2414 return 0;
2415
2416 *rule_nr = 20;
2417 if ((self == UUID_JUST_CREATED || self == (u64)0) &&
2418 peer != UUID_JUST_CREATED)
2419 return -2;
2420
2421 *rule_nr = 30;
2422 if (self != UUID_JUST_CREATED &&
2423 (peer == UUID_JUST_CREATED || peer == (u64)0))
2424 return 2;
2425
2426 if (self == peer) {
2427 int rct, dc; /* roles at crash time */
2428
2429 if (mdev->p_uuid[UI_BITMAP] == (u64)0 && mdev->ldev->md.uuid[UI_BITMAP] != (u64)0) {
2430
Philipp Reisner31890f42011-01-19 14:12:51 +01002431 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002432 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002433
2434 if ((mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
2435 (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
2436 dev_info(DEV, "was SyncSource, missed the resync finished event, corrected myself:\n");
2437 drbd_uuid_set_bm(mdev, 0UL);
2438
2439 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
2440 mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
2441 *rule_nr = 34;
2442 } else {
2443 dev_info(DEV, "was SyncSource (peer failed to write sync_uuid)\n");
2444 *rule_nr = 36;
2445 }
2446
2447 return 1;
2448 }
2449
2450 if (mdev->ldev->md.uuid[UI_BITMAP] == (u64)0 && mdev->p_uuid[UI_BITMAP] != (u64)0) {
2451
Philipp Reisner31890f42011-01-19 14:12:51 +01002452 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002453 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002454
2455 if ((mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_BITMAP] & ~((u64)1)) &&
2456 (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
2457 dev_info(DEV, "was SyncTarget, peer missed the resync finished event, corrected peer:\n");
2458
2459 mdev->p_uuid[UI_HISTORY_START + 1] = mdev->p_uuid[UI_HISTORY_START];
2460 mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_BITMAP];
2461 mdev->p_uuid[UI_BITMAP] = 0UL;
2462
2463 drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2464 *rule_nr = 35;
2465 } else {
2466 dev_info(DEV, "was SyncTarget (failed to write sync_uuid)\n");
2467 *rule_nr = 37;
2468 }
2469
2470 return -1;
2471 }
2472
2473 /* Common power [off|failure] */
2474 rct = (test_bit(CRASHED_PRIMARY, &mdev->flags) ? 1 : 0) +
2475 (mdev->p_uuid[UI_FLAGS] & 2);
2476 /* lowest bit is set when we were primary,
2477 * next bit (weight 2) is set when peer was primary */
2478 *rule_nr = 40;
2479
2480 switch (rct) {
2481 case 0: /* !self_pri && !peer_pri */ return 0;
2482 case 1: /* self_pri && !peer_pri */ return 1;
2483 case 2: /* !self_pri && peer_pri */ return -1;
2484 case 3: /* self_pri && peer_pri */
Philipp Reisner25703f82011-02-07 14:35:25 +01002485 dc = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002486 return dc ? -1 : 1;
2487 }
2488 }
2489
2490 *rule_nr = 50;
2491 peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1);
2492 if (self == peer)
2493 return -1;
2494
2495 *rule_nr = 51;
2496 peer = mdev->p_uuid[UI_HISTORY_START] & ~((u64)1);
2497 if (self == peer) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002498 if (mdev->tconn->agreed_pro_version < 96 ?
Philipp Reisner4a23f262011-01-11 17:42:17 +01002499 (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
2500 (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
2501 peer + UUID_NEW_BM_OFFSET == (mdev->p_uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002502 /* The last P_SYNC_UUID did not get though. Undo the last start of
2503 resync as sync source modifications of the peer's UUIDs. */
2504
Philipp Reisner31890f42011-01-19 14:12:51 +01002505 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002506 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002507
2508 mdev->p_uuid[UI_BITMAP] = mdev->p_uuid[UI_HISTORY_START];
2509 mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_HISTORY_START + 1];
Philipp Reisner4a23f262011-01-11 17:42:17 +01002510
2511 dev_info(DEV, "Did not got last syncUUID packet, corrected:\n");
2512 drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2513
Philipp Reisnerb411b362009-09-25 16:07:19 -07002514 return -1;
2515 }
2516 }
2517
2518 *rule_nr = 60;
2519 self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2520 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2521 peer = mdev->p_uuid[i] & ~((u64)1);
2522 if (self == peer)
2523 return -2;
2524 }
2525
2526 *rule_nr = 70;
2527 self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2528 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
2529 if (self == peer)
2530 return 1;
2531
2532 *rule_nr = 71;
2533 self = mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
2534 if (self == peer) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002535 if (mdev->tconn->agreed_pro_version < 96 ?
Philipp Reisner4a23f262011-01-11 17:42:17 +01002536 (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
2537 (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
2538 self + UUID_NEW_BM_OFFSET == (mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002539 /* The last P_SYNC_UUID did not get though. Undo the last start of
2540 resync as sync source modifications of our UUIDs. */
2541
Philipp Reisner31890f42011-01-19 14:12:51 +01002542 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002543 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002544
2545 _drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_HISTORY_START]);
2546 _drbd_uuid_set(mdev, UI_HISTORY_START, mdev->ldev->md.uuid[UI_HISTORY_START + 1]);
2547
Philipp Reisner4a23f262011-01-11 17:42:17 +01002548 dev_info(DEV, "Last syncUUID did not get through, corrected:\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002549 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
2550 mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
2551
2552 return 1;
2553 }
2554 }
2555
2556
2557 *rule_nr = 80;
Philipp Reisnerd8c2a362009-11-18 15:52:51 +01002558 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002559 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2560 self = mdev->ldev->md.uuid[i] & ~((u64)1);
2561 if (self == peer)
2562 return 2;
2563 }
2564
2565 *rule_nr = 90;
2566 self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2567 peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1);
2568 if (self == peer && self != ((u64)0))
2569 return 100;
2570
2571 *rule_nr = 100;
2572 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2573 self = mdev->ldev->md.uuid[i] & ~((u64)1);
2574 for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) {
2575 peer = mdev->p_uuid[j] & ~((u64)1);
2576 if (self == peer)
2577 return -100;
2578 }
2579 }
2580
2581 return -1000;
2582}
2583
2584/* drbd_sync_handshake() returns the new conn state on success, or
2585 CONN_MASK (-1) on failure.
2586 */
2587static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_role peer_role,
2588 enum drbd_disk_state peer_disk) __must_hold(local)
2589{
2590 int hg, rule_nr;
2591 enum drbd_conns rv = C_MASK;
2592 enum drbd_disk_state mydisk;
2593
2594 mydisk = mdev->state.disk;
2595 if (mydisk == D_NEGOTIATING)
2596 mydisk = mdev->new_state_tmp.disk;
2597
2598 dev_info(DEV, "drbd_sync_handshake:\n");
2599 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid, mdev->comm_bm_set, 0);
2600 drbd_uuid_dump(mdev, "peer", mdev->p_uuid,
2601 mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2602
2603 hg = drbd_uuid_compare(mdev, &rule_nr);
2604
2605 dev_info(DEV, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
2606
2607 if (hg == -1000) {
2608 dev_alert(DEV, "Unrelated data, aborting!\n");
2609 return C_MASK;
2610 }
Philipp Reisner4a23f262011-01-11 17:42:17 +01002611 if (hg < -1000) {
2612 dev_alert(DEV, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002613 return C_MASK;
2614 }
2615
2616 if ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) ||
2617 (peer_disk == D_INCONSISTENT && mydisk > D_INCONSISTENT)) {
2618 int f = (hg == -100) || abs(hg) == 2;
2619 hg = mydisk > D_INCONSISTENT ? 1 : -1;
2620 if (f)
2621 hg = hg*2;
2622 dev_info(DEV, "Becoming sync %s due to disk states.\n",
2623 hg > 0 ? "source" : "target");
2624 }
2625
Adam Gandelman3a11a482010-04-08 16:48:23 -07002626 if (abs(hg) == 100)
2627 drbd_khelper(mdev, "initial-split-brain");
2628
Philipp Reisner89e58e72011-01-19 13:12:45 +01002629 if (hg == 100 || (hg == -100 && mdev->tconn->net_conf->always_asbp)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002630 int pcount = (mdev->state.role == R_PRIMARY)
2631 + (peer_role == R_PRIMARY);
2632 int forced = (hg == -100);
2633
2634 switch (pcount) {
2635 case 0:
2636 hg = drbd_asb_recover_0p(mdev);
2637 break;
2638 case 1:
2639 hg = drbd_asb_recover_1p(mdev);
2640 break;
2641 case 2:
2642 hg = drbd_asb_recover_2p(mdev);
2643 break;
2644 }
2645 if (abs(hg) < 100) {
2646 dev_warn(DEV, "Split-Brain detected, %d primaries, "
2647 "automatically solved. Sync from %s node\n",
2648 pcount, (hg < 0) ? "peer" : "this");
2649 if (forced) {
2650 dev_warn(DEV, "Doing a full sync, since"
2651 " UUIDs where ambiguous.\n");
2652 hg = hg*2;
2653 }
2654 }
2655 }
2656
2657 if (hg == -100) {
Philipp Reisner89e58e72011-01-19 13:12:45 +01002658 if (mdev->tconn->net_conf->want_lose && !(mdev->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002659 hg = -1;
Philipp Reisner89e58e72011-01-19 13:12:45 +01002660 if (!mdev->tconn->net_conf->want_lose && (mdev->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002661 hg = 1;
2662
2663 if (abs(hg) < 100)
2664 dev_warn(DEV, "Split-Brain detected, manually solved. "
2665 "Sync from %s node\n",
2666 (hg < 0) ? "peer" : "this");
2667 }
2668
2669 if (hg == -100) {
Lars Ellenberg580b9762010-02-26 23:15:23 +01002670 /* FIXME this log message is not correct if we end up here
2671 * after an attempted attach on a diskless node.
2672 * We just refuse to attach -- well, we drop the "connection"
2673 * to that disk, in a way... */
Adam Gandelman3a11a482010-04-08 16:48:23 -07002674 dev_alert(DEV, "Split-Brain detected but unresolved, dropping connection!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002675 drbd_khelper(mdev, "split-brain");
2676 return C_MASK;
2677 }
2678
2679 if (hg > 0 && mydisk <= D_INCONSISTENT) {
2680 dev_err(DEV, "I shall become SyncSource, but I am inconsistent!\n");
2681 return C_MASK;
2682 }
2683
2684 if (hg < 0 && /* by intention we do not use mydisk here. */
2685 mdev->state.role == R_PRIMARY && mdev->state.disk >= D_CONSISTENT) {
Philipp Reisner89e58e72011-01-19 13:12:45 +01002686 switch (mdev->tconn->net_conf->rr_conflict) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002687 case ASB_CALL_HELPER:
2688 drbd_khelper(mdev, "pri-lost");
2689 /* fall through */
2690 case ASB_DISCONNECT:
2691 dev_err(DEV, "I shall become SyncTarget, but I am primary!\n");
2692 return C_MASK;
2693 case ASB_VIOLENTLY:
2694 dev_warn(DEV, "Becoming SyncTarget, violating the stable-data"
2695 "assumption\n");
2696 }
2697 }
2698
Philipp Reisner89e58e72011-01-19 13:12:45 +01002699 if (mdev->tconn->net_conf->dry_run || test_bit(CONN_DRY_RUN, &mdev->flags)) {
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002700 if (hg == 0)
2701 dev_info(DEV, "dry-run connect: No resync, would become Connected immediately.\n");
2702 else
2703 dev_info(DEV, "dry-run connect: Would become %s, doing a %s resync.",
2704 drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET),
2705 abs(hg) >= 2 ? "full" : "bit-map based");
2706 return C_MASK;
2707 }
2708
Philipp Reisnerb411b362009-09-25 16:07:19 -07002709 if (abs(hg) >= 2) {
2710 dev_info(DEV, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01002711 if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
2712 BM_LOCKED_SET_ALLOWED))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002713 return C_MASK;
2714 }
2715
2716 if (hg > 0) { /* become sync source. */
2717 rv = C_WF_BITMAP_S;
2718 } else if (hg < 0) { /* become sync target */
2719 rv = C_WF_BITMAP_T;
2720 } else {
2721 rv = C_CONNECTED;
2722 if (drbd_bm_total_weight(mdev)) {
2723 dev_info(DEV, "No resync, but %lu bits in bitmap!\n",
2724 drbd_bm_total_weight(mdev));
2725 }
2726 }
2727
2728 return rv;
2729}
2730
2731/* returns 1 if invalid */
2732static int cmp_after_sb(enum drbd_after_sb_p peer, enum drbd_after_sb_p self)
2733{
2734 /* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */
2735 if ((peer == ASB_DISCARD_REMOTE && self == ASB_DISCARD_LOCAL) ||
2736 (self == ASB_DISCARD_REMOTE && peer == ASB_DISCARD_LOCAL))
2737 return 0;
2738
2739 /* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */
2740 if (peer == ASB_DISCARD_REMOTE || peer == ASB_DISCARD_LOCAL ||
2741 self == ASB_DISCARD_REMOTE || self == ASB_DISCARD_LOCAL)
2742 return 1;
2743
2744 /* everything else is valid if they are equal on both sides. */
2745 if (peer == self)
2746 return 0;
2747
2748 /* everything es is invalid. */
2749 return 1;
2750}
2751
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01002752static int receive_protocol(struct drbd_conf *mdev, enum drbd_packet cmd,
2753 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002754{
Philipp Reisnere42325a2011-01-19 13:55:45 +01002755 struct p_protocol *p = &mdev->tconn->data.rbuf.protocol;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002756 int p_proto, p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002757 int p_want_lose, p_two_primaries, cf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002758 char p_integrity_alg[SHARED_SECRET_MAX] = "";
2759
Philipp Reisnerb411b362009-09-25 16:07:19 -07002760 p_proto = be32_to_cpu(p->protocol);
2761 p_after_sb_0p = be32_to_cpu(p->after_sb_0p);
2762 p_after_sb_1p = be32_to_cpu(p->after_sb_1p);
2763 p_after_sb_2p = be32_to_cpu(p->after_sb_2p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002764 p_two_primaries = be32_to_cpu(p->two_primaries);
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002765 cf = be32_to_cpu(p->conn_flags);
2766 p_want_lose = cf & CF_WANT_LOSE;
2767
2768 clear_bit(CONN_DRY_RUN, &mdev->flags);
2769
2770 if (cf & CF_DRY_RUN)
2771 set_bit(CONN_DRY_RUN, &mdev->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002772
Philipp Reisner89e58e72011-01-19 13:12:45 +01002773 if (p_proto != mdev->tconn->net_conf->wire_protocol) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002774 dev_err(DEV, "incompatible communication protocols\n");
2775 goto disconnect;
2776 }
2777
Philipp Reisner89e58e72011-01-19 13:12:45 +01002778 if (cmp_after_sb(p_after_sb_0p, mdev->tconn->net_conf->after_sb_0p)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002779 dev_err(DEV, "incompatible after-sb-0pri settings\n");
2780 goto disconnect;
2781 }
2782
Philipp Reisner89e58e72011-01-19 13:12:45 +01002783 if (cmp_after_sb(p_after_sb_1p, mdev->tconn->net_conf->after_sb_1p)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002784 dev_err(DEV, "incompatible after-sb-1pri settings\n");
2785 goto disconnect;
2786 }
2787
Philipp Reisner89e58e72011-01-19 13:12:45 +01002788 if (cmp_after_sb(p_after_sb_2p, mdev->tconn->net_conf->after_sb_2p)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002789 dev_err(DEV, "incompatible after-sb-2pri settings\n");
2790 goto disconnect;
2791 }
2792
Philipp Reisner89e58e72011-01-19 13:12:45 +01002793 if (p_want_lose && mdev->tconn->net_conf->want_lose) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002794 dev_err(DEV, "both sides have the 'want_lose' flag set\n");
2795 goto disconnect;
2796 }
2797
Philipp Reisner89e58e72011-01-19 13:12:45 +01002798 if (p_two_primaries != mdev->tconn->net_conf->two_primaries) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002799 dev_err(DEV, "incompatible setting of the two-primaries options\n");
2800 goto disconnect;
2801 }
2802
Philipp Reisner31890f42011-01-19 14:12:51 +01002803 if (mdev->tconn->agreed_pro_version >= 87) {
Philipp Reisner89e58e72011-01-19 13:12:45 +01002804 unsigned char *my_alg = mdev->tconn->net_conf->integrity_alg;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002805
Philipp Reisnerde0ff332011-02-07 16:56:20 +01002806 if (drbd_recv(mdev->tconn, p_integrity_alg, data_size) != data_size)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002807 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002808
2809 p_integrity_alg[SHARED_SECRET_MAX-1] = 0;
2810 if (strcmp(p_integrity_alg, my_alg)) {
2811 dev_err(DEV, "incompatible setting of the data-integrity-alg\n");
2812 goto disconnect;
2813 }
2814 dev_info(DEV, "data-integrity-alg: %s\n",
2815 my_alg[0] ? my_alg : (unsigned char *)"<not-used>");
2816 }
2817
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002818 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002819
2820disconnect:
2821 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002822 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002823}
2824
2825/* helper function
2826 * input: alg name, feature name
2827 * return: NULL (alg name was "")
2828 * ERR_PTR(error) if something goes wrong
2829 * or the crypto hash ptr, if it worked out ok. */
2830struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_conf *mdev,
2831 const char *alg, const char *name)
2832{
2833 struct crypto_hash *tfm;
2834
2835 if (!alg[0])
2836 return NULL;
2837
2838 tfm = crypto_alloc_hash(alg, 0, CRYPTO_ALG_ASYNC);
2839 if (IS_ERR(tfm)) {
2840 dev_err(DEV, "Can not allocate \"%s\" as %s (reason: %ld)\n",
2841 alg, name, PTR_ERR(tfm));
2842 return tfm;
2843 }
2844 if (!drbd_crypto_is_hash(crypto_hash_tfm(tfm))) {
2845 crypto_free_hash(tfm);
2846 dev_err(DEV, "\"%s\" is not a digest (%s)\n", alg, name);
2847 return ERR_PTR(-EINVAL);
2848 }
2849 return tfm;
2850}
2851
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01002852static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packet cmd,
2853 unsigned int packet_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002854{
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002855 int ok = true;
Philipp Reisnere42325a2011-01-19 13:55:45 +01002856 struct p_rs_param_95 *p = &mdev->tconn->data.rbuf.rs_param_95;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002857 unsigned int header_size, data_size, exp_max_sz;
2858 struct crypto_hash *verify_tfm = NULL;
2859 struct crypto_hash *csums_tfm = NULL;
Philipp Reisner31890f42011-01-19 14:12:51 +01002860 const int apv = mdev->tconn->agreed_pro_version;
Philipp Reisner778f2712010-07-06 11:14:00 +02002861 int *rs_plan_s = NULL;
2862 int fifo_size = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002863
2864 exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param)
2865 : apv == 88 ? sizeof(struct p_rs_param)
2866 + SHARED_SECRET_MAX
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002867 : apv <= 94 ? sizeof(struct p_rs_param_89)
2868 : /* apv >= 95 */ sizeof(struct p_rs_param_95);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002869
Philipp Reisner02918be2010-08-20 14:35:10 +02002870 if (packet_size > exp_max_sz) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002871 dev_err(DEV, "SyncParam packet too long: received %u, expected <= %u bytes\n",
Philipp Reisner02918be2010-08-20 14:35:10 +02002872 packet_size, exp_max_sz);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002873 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002874 }
2875
2876 if (apv <= 88) {
Philipp Reisner257d0af2011-01-26 12:15:29 +01002877 header_size = sizeof(struct p_rs_param) - sizeof(struct p_header);
Philipp Reisner02918be2010-08-20 14:35:10 +02002878 data_size = packet_size - header_size;
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002879 } else if (apv <= 94) {
Philipp Reisner257d0af2011-01-26 12:15:29 +01002880 header_size = sizeof(struct p_rs_param_89) - sizeof(struct p_header);
Philipp Reisner02918be2010-08-20 14:35:10 +02002881 data_size = packet_size - header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002882 D_ASSERT(data_size == 0);
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002883 } else {
Philipp Reisner257d0af2011-01-26 12:15:29 +01002884 header_size = sizeof(struct p_rs_param_95) - sizeof(struct p_header);
Philipp Reisner02918be2010-08-20 14:35:10 +02002885 data_size = packet_size - header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002886 D_ASSERT(data_size == 0);
2887 }
2888
2889 /* initialize verify_alg and csums_alg */
2890 memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
2891
Philipp Reisnerde0ff332011-02-07 16:56:20 +01002892 if (drbd_recv(mdev->tconn, &p->head.payload, header_size) != header_size)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002893 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002894
2895 mdev->sync_conf.rate = be32_to_cpu(p->rate);
2896
2897 if (apv >= 88) {
2898 if (apv == 88) {
2899 if (data_size > SHARED_SECRET_MAX) {
2900 dev_err(DEV, "verify-alg too long, "
2901 "peer wants %u, accepting only %u byte\n",
2902 data_size, SHARED_SECRET_MAX);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002903 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002904 }
2905
Philipp Reisnerde0ff332011-02-07 16:56:20 +01002906 if (drbd_recv(mdev->tconn, p->verify_alg, data_size) != data_size)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002907 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002908
2909 /* we expect NUL terminated string */
2910 /* but just in case someone tries to be evil */
2911 D_ASSERT(p->verify_alg[data_size-1] == 0);
2912 p->verify_alg[data_size-1] = 0;
2913
2914 } else /* apv >= 89 */ {
2915 /* we still expect NUL terminated strings */
2916 /* but just in case someone tries to be evil */
2917 D_ASSERT(p->verify_alg[SHARED_SECRET_MAX-1] == 0);
2918 D_ASSERT(p->csums_alg[SHARED_SECRET_MAX-1] == 0);
2919 p->verify_alg[SHARED_SECRET_MAX-1] = 0;
2920 p->csums_alg[SHARED_SECRET_MAX-1] = 0;
2921 }
2922
2923 if (strcmp(mdev->sync_conf.verify_alg, p->verify_alg)) {
2924 if (mdev->state.conn == C_WF_REPORT_PARAMS) {
2925 dev_err(DEV, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
2926 mdev->sync_conf.verify_alg, p->verify_alg);
2927 goto disconnect;
2928 }
2929 verify_tfm = drbd_crypto_alloc_digest_safe(mdev,
2930 p->verify_alg, "verify-alg");
2931 if (IS_ERR(verify_tfm)) {
2932 verify_tfm = NULL;
2933 goto disconnect;
2934 }
2935 }
2936
2937 if (apv >= 89 && strcmp(mdev->sync_conf.csums_alg, p->csums_alg)) {
2938 if (mdev->state.conn == C_WF_REPORT_PARAMS) {
2939 dev_err(DEV, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
2940 mdev->sync_conf.csums_alg, p->csums_alg);
2941 goto disconnect;
2942 }
2943 csums_tfm = drbd_crypto_alloc_digest_safe(mdev,
2944 p->csums_alg, "csums-alg");
2945 if (IS_ERR(csums_tfm)) {
2946 csums_tfm = NULL;
2947 goto disconnect;
2948 }
2949 }
2950
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002951 if (apv > 94) {
2952 mdev->sync_conf.rate = be32_to_cpu(p->rate);
2953 mdev->sync_conf.c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
2954 mdev->sync_conf.c_delay_target = be32_to_cpu(p->c_delay_target);
2955 mdev->sync_conf.c_fill_target = be32_to_cpu(p->c_fill_target);
2956 mdev->sync_conf.c_max_rate = be32_to_cpu(p->c_max_rate);
Philipp Reisner778f2712010-07-06 11:14:00 +02002957
2958 fifo_size = (mdev->sync_conf.c_plan_ahead * 10 * SLEEP_TIME) / HZ;
2959 if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) {
2960 rs_plan_s = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL);
2961 if (!rs_plan_s) {
2962 dev_err(DEV, "kmalloc of fifo_buffer failed");
2963 goto disconnect;
2964 }
2965 }
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002966 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002967
2968 spin_lock(&mdev->peer_seq_lock);
2969 /* lock against drbd_nl_syncer_conf() */
2970 if (verify_tfm) {
2971 strcpy(mdev->sync_conf.verify_alg, p->verify_alg);
2972 mdev->sync_conf.verify_alg_len = strlen(p->verify_alg) + 1;
2973 crypto_free_hash(mdev->verify_tfm);
2974 mdev->verify_tfm = verify_tfm;
2975 dev_info(DEV, "using verify-alg: \"%s\"\n", p->verify_alg);
2976 }
2977 if (csums_tfm) {
2978 strcpy(mdev->sync_conf.csums_alg, p->csums_alg);
2979 mdev->sync_conf.csums_alg_len = strlen(p->csums_alg) + 1;
2980 crypto_free_hash(mdev->csums_tfm);
2981 mdev->csums_tfm = csums_tfm;
2982 dev_info(DEV, "using csums-alg: \"%s\"\n", p->csums_alg);
2983 }
Philipp Reisner778f2712010-07-06 11:14:00 +02002984 if (fifo_size != mdev->rs_plan_s.size) {
2985 kfree(mdev->rs_plan_s.values);
2986 mdev->rs_plan_s.values = rs_plan_s;
2987 mdev->rs_plan_s.size = fifo_size;
2988 mdev->rs_planed = 0;
2989 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002990 spin_unlock(&mdev->peer_seq_lock);
2991 }
2992
2993 return ok;
2994disconnect:
2995 /* just for completeness: actually not needed,
2996 * as this is not reached if csums_tfm was ok. */
2997 crypto_free_hash(csums_tfm);
2998 /* but free the verify_tfm again, if csums_tfm did not work out */
2999 crypto_free_hash(verify_tfm);
3000 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003001 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003002}
3003
Philipp Reisnerb411b362009-09-25 16:07:19 -07003004/* warn if the arguments differ by more than 12.5% */
3005static void warn_if_differ_considerably(struct drbd_conf *mdev,
3006 const char *s, sector_t a, sector_t b)
3007{
3008 sector_t d;
3009 if (a == 0 || b == 0)
3010 return;
3011 d = (a > b) ? (a - b) : (b - a);
3012 if (d > (a>>3) || d > (b>>3))
3013 dev_warn(DEV, "Considerable difference in %s: %llus vs. %llus\n", s,
3014 (unsigned long long)a, (unsigned long long)b);
3015}
3016
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003017static int receive_sizes(struct drbd_conf *mdev, enum drbd_packet cmd,
3018 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003019{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003020 struct p_sizes *p = &mdev->tconn->data.rbuf.sizes;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003021 enum determine_dev_size dd = unchanged;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003022 sector_t p_size, p_usize, my_usize;
3023 int ldsc = 0; /* local disk size changed */
Philipp Reisnere89b5912010-03-24 17:11:33 +01003024 enum dds_flags ddsf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003025
Philipp Reisnerb411b362009-09-25 16:07:19 -07003026 p_size = be64_to_cpu(p->d_size);
3027 p_usize = be64_to_cpu(p->u_size);
3028
3029 if (p_size == 0 && mdev->state.disk == D_DISKLESS) {
3030 dev_err(DEV, "some backing storage is needed\n");
3031 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003032 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003033 }
3034
3035 /* just store the peer's disk size for now.
3036 * we still need to figure out whether we accept that. */
3037 mdev->p_size = p_size;
3038
Philipp Reisnerb411b362009-09-25 16:07:19 -07003039 if (get_ldev(mdev)) {
3040 warn_if_differ_considerably(mdev, "lower level device sizes",
3041 p_size, drbd_get_max_capacity(mdev->ldev));
3042 warn_if_differ_considerably(mdev, "user requested size",
3043 p_usize, mdev->ldev->dc.disk_size);
3044
3045 /* if this is the first connect, or an otherwise expected
3046 * param exchange, choose the minimum */
3047 if (mdev->state.conn == C_WF_REPORT_PARAMS)
3048 p_usize = min_not_zero((sector_t)mdev->ldev->dc.disk_size,
3049 p_usize);
3050
3051 my_usize = mdev->ldev->dc.disk_size;
3052
3053 if (mdev->ldev->dc.disk_size != p_usize) {
3054 mdev->ldev->dc.disk_size = p_usize;
3055 dev_info(DEV, "Peer sets u_size to %lu sectors\n",
3056 (unsigned long)mdev->ldev->dc.disk_size);
3057 }
3058
3059 /* Never shrink a device with usable data during connect.
3060 But allow online shrinking if we are connected. */
Philipp Reisnera393db62009-12-22 13:35:52 +01003061 if (drbd_new_dev_size(mdev, mdev->ldev, 0) <
Philipp Reisnerb411b362009-09-25 16:07:19 -07003062 drbd_get_capacity(mdev->this_bdev) &&
3063 mdev->state.disk >= D_OUTDATED &&
3064 mdev->state.conn < C_CONNECTED) {
3065 dev_err(DEV, "The peer's disk size is too small!\n");
3066 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
3067 mdev->ldev->dc.disk_size = my_usize;
3068 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003069 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003070 }
3071 put_ldev(mdev);
3072 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003073
Philipp Reisnere89b5912010-03-24 17:11:33 +01003074 ddsf = be16_to_cpu(p->dds_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003075 if (get_ldev(mdev)) {
Bart Van Assche24c48302011-05-21 18:32:29 +02003076 dd = drbd_determine_dev_size(mdev, ddsf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003077 put_ldev(mdev);
3078 if (dd == dev_size_error)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003079 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003080 drbd_md_sync(mdev);
3081 } else {
3082 /* I am diskless, need to accept the peer's size. */
3083 drbd_set_my_capacity(mdev, p_size);
3084 }
3085
Philipp Reisner99432fc2011-05-20 16:39:13 +02003086 mdev->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
3087 drbd_reconsider_max_bio_size(mdev);
3088
Philipp Reisnerb411b362009-09-25 16:07:19 -07003089 if (get_ldev(mdev)) {
3090 if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) {
3091 mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev);
3092 ldsc = 1;
3093 }
3094
Philipp Reisnerb411b362009-09-25 16:07:19 -07003095 put_ldev(mdev);
3096 }
3097
3098 if (mdev->state.conn > C_WF_REPORT_PARAMS) {
3099 if (be64_to_cpu(p->c_size) !=
3100 drbd_get_capacity(mdev->this_bdev) || ldsc) {
3101 /* we have different sizes, probably peer
3102 * needs to know my new size... */
Philipp Reisnere89b5912010-03-24 17:11:33 +01003103 drbd_send_sizes(mdev, 0, ddsf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003104 }
3105 if (test_and_clear_bit(RESIZE_PENDING, &mdev->flags) ||
3106 (dd == grew && mdev->state.conn == C_CONNECTED)) {
3107 if (mdev->state.pdsk >= D_INCONSISTENT &&
Philipp Reisnere89b5912010-03-24 17:11:33 +01003108 mdev->state.disk >= D_INCONSISTENT) {
3109 if (ddsf & DDSF_NO_RESYNC)
3110 dev_info(DEV, "Resync of new storage suppressed with --assume-clean\n");
3111 else
3112 resync_after_online_grow(mdev);
3113 } else
Philipp Reisnerb411b362009-09-25 16:07:19 -07003114 set_bit(RESYNC_AFTER_NEG, &mdev->flags);
3115 }
3116 }
3117
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003118 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003119}
3120
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003121static int receive_uuids(struct drbd_conf *mdev, enum drbd_packet cmd,
3122 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003123{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003124 struct p_uuids *p = &mdev->tconn->data.rbuf.uuids;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003125 u64 *p_uuid;
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003126 int i, updated_uuids = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003127
Philipp Reisnerb411b362009-09-25 16:07:19 -07003128 p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO);
3129
3130 for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
3131 p_uuid[i] = be64_to_cpu(p->uuid[i]);
3132
3133 kfree(mdev->p_uuid);
3134 mdev->p_uuid = p_uuid;
3135
3136 if (mdev->state.conn < C_CONNECTED &&
3137 mdev->state.disk < D_INCONSISTENT &&
3138 mdev->state.role == R_PRIMARY &&
3139 (mdev->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
3140 dev_err(DEV, "Can only connect to data with current UUID=%016llX\n",
3141 (unsigned long long)mdev->ed_uuid);
3142 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003143 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003144 }
3145
3146 if (get_ldev(mdev)) {
3147 int skip_initial_sync =
3148 mdev->state.conn == C_CONNECTED &&
Philipp Reisner31890f42011-01-19 14:12:51 +01003149 mdev->tconn->agreed_pro_version >= 90 &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003150 mdev->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
3151 (p_uuid[UI_FLAGS] & 8);
3152 if (skip_initial_sync) {
3153 dev_info(DEV, "Accepted new current UUID, preparing to skip initial sync\n");
3154 drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write,
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003155 "clear_n_write from receive_uuids",
3156 BM_LOCKED_TEST_ALLOWED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003157 _drbd_uuid_set(mdev, UI_CURRENT, p_uuid[UI_CURRENT]);
3158 _drbd_uuid_set(mdev, UI_BITMAP, 0);
3159 _drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
3160 CS_VERBOSE, NULL);
3161 drbd_md_sync(mdev);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003162 updated_uuids = 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003163 }
3164 put_ldev(mdev);
Philipp Reisner18a50fa2010-06-21 14:14:15 +02003165 } else if (mdev->state.disk < D_INCONSISTENT &&
3166 mdev->state.role == R_PRIMARY) {
3167 /* I am a diskless primary, the peer just created a new current UUID
3168 for me. */
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003169 updated_uuids = drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003170 }
3171
3172 /* Before we test for the disk state, we should wait until an eventually
3173 ongoing cluster wide state change is finished. That is important if
3174 we are primary and are detaching from our disk. We need to see the
3175 new disk state... */
Philipp Reisner8410da82011-02-11 20:11:10 +01003176 mutex_lock(mdev->state_mutex);
3177 mutex_unlock(mdev->state_mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003178 if (mdev->state.conn >= C_CONNECTED && mdev->state.disk < D_INCONSISTENT)
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003179 updated_uuids |= drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
3180
3181 if (updated_uuids)
3182 drbd_print_uuids(mdev, "receiver updated UUIDs to");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003183
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003184 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003185}
3186
3187/**
3188 * convert_state() - Converts the peer's view of the cluster state to our point of view
3189 * @ps: The state as seen by the peer.
3190 */
3191static union drbd_state convert_state(union drbd_state ps)
3192{
3193 union drbd_state ms;
3194
3195 static enum drbd_conns c_tab[] = {
3196 [C_CONNECTED] = C_CONNECTED,
3197
3198 [C_STARTING_SYNC_S] = C_STARTING_SYNC_T,
3199 [C_STARTING_SYNC_T] = C_STARTING_SYNC_S,
3200 [C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */
3201 [C_VERIFY_S] = C_VERIFY_T,
3202 [C_MASK] = C_MASK,
3203 };
3204
3205 ms.i = ps.i;
3206
3207 ms.conn = c_tab[ps.conn];
3208 ms.peer = ps.role;
3209 ms.role = ps.peer;
3210 ms.pdsk = ps.disk;
3211 ms.disk = ps.pdsk;
3212 ms.peer_isp = (ps.aftr_isp | ps.user_isp);
3213
3214 return ms;
3215}
3216
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003217static int receive_req_state(struct drbd_conf *mdev, enum drbd_packet cmd,
3218 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003219{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003220 struct p_req_state *p = &mdev->tconn->data.rbuf.req_state;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003221 union drbd_state mask, val;
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +01003222 enum drbd_state_rv rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003223
Philipp Reisnerb411b362009-09-25 16:07:19 -07003224 mask.i = be32_to_cpu(p->mask);
3225 val.i = be32_to_cpu(p->val);
3226
Philipp Reisner25703f82011-02-07 14:35:25 +01003227 if (test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags) &&
Philipp Reisner8410da82011-02-11 20:11:10 +01003228 mutex_is_locked(mdev->state_mutex)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003229 drbd_send_sr_reply(mdev, SS_CONCURRENT_ST_CHG);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003230 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003231 }
3232
3233 mask = convert_state(mask);
3234 val = convert_state(val);
3235
Philipp Reisner047cd4a2011-02-15 11:09:33 +01003236 if (cmd == P_CONN_ST_CHG_REQ) {
3237 rv = conn_request_state(mdev->tconn, mask, val, CS_VERBOSE | CS_LOCAL_ONLY);
3238 conn_send_sr_reply(mdev->tconn, rv);
3239 } else {
3240 rv = drbd_change_state(mdev, CS_VERBOSE, mask, val);
3241 drbd_send_sr_reply(mdev, rv);
3242 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003243
Philipp Reisnerb411b362009-09-25 16:07:19 -07003244 drbd_md_sync(mdev);
3245
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003246 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003247}
3248
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003249static int receive_state(struct drbd_conf *mdev, enum drbd_packet cmd,
3250 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003251{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003252 struct p_state *p = &mdev->tconn->data.rbuf.state;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003253 union drbd_state os, ns, peer_state;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003254 enum drbd_disk_state real_peer_disk;
Philipp Reisner65d922c2010-06-16 16:18:09 +02003255 enum chg_state_flags cs_flags;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003256 int rv;
3257
Philipp Reisnerb411b362009-09-25 16:07:19 -07003258 peer_state.i = be32_to_cpu(p->state);
3259
3260 real_peer_disk = peer_state.disk;
3261 if (peer_state.disk == D_NEGOTIATING) {
3262 real_peer_disk = mdev->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT;
3263 dev_info(DEV, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
3264 }
3265
Philipp Reisner87eeee42011-01-19 14:16:30 +01003266 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003267 retry:
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003268 os = ns = mdev->state;
Philipp Reisner87eeee42011-01-19 14:16:30 +01003269 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003270
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003271 /* peer says his disk is uptodate, while we think it is inconsistent,
3272 * and this happens while we think we have a sync going on. */
3273 if (os.pdsk == D_INCONSISTENT && real_peer_disk == D_UP_TO_DATE &&
3274 os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
3275 /* If we are (becoming) SyncSource, but peer is still in sync
3276 * preparation, ignore its uptodate-ness to avoid flapping, it
3277 * will change to inconsistent once the peer reaches active
3278 * syncing states.
3279 * It may have changed syncer-paused flags, however, so we
3280 * cannot ignore this completely. */
3281 if (peer_state.conn > C_CONNECTED &&
3282 peer_state.conn < C_SYNC_SOURCE)
3283 real_peer_disk = D_INCONSISTENT;
3284
3285 /* if peer_state changes to connected at the same time,
3286 * it explicitly notifies us that it finished resync.
3287 * Maybe we should finish it up, too? */
3288 else if (os.conn >= C_SYNC_SOURCE &&
3289 peer_state.conn == C_CONNECTED) {
3290 if (drbd_bm_total_weight(mdev) <= mdev->rs_failed)
3291 drbd_resync_finished(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003292 return true;
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003293 }
3294 }
3295
3296 /* peer says his disk is inconsistent, while we think it is uptodate,
3297 * and this happens while the peer still thinks we have a sync going on,
3298 * but we think we are already done with the sync.
3299 * We ignore this to avoid flapping pdsk.
3300 * This should not happen, if the peer is a recent version of drbd. */
3301 if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT &&
3302 os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE)
3303 real_peer_disk = D_UP_TO_DATE;
3304
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003305 if (ns.conn == C_WF_REPORT_PARAMS)
3306 ns.conn = C_CONNECTED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003307
Philipp Reisner67531712010-10-27 12:21:30 +02003308 if (peer_state.conn == C_AHEAD)
3309 ns.conn = C_BEHIND;
3310
Philipp Reisnerb411b362009-09-25 16:07:19 -07003311 if (mdev->p_uuid && peer_state.disk >= D_NEGOTIATING &&
3312 get_ldev_if_state(mdev, D_NEGOTIATING)) {
3313 int cr; /* consider resync */
3314
3315 /* if we established a new connection */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003316 cr = (os.conn < C_CONNECTED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003317 /* if we had an established connection
3318 * and one of the nodes newly attaches a disk */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003319 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003320 (peer_state.disk == D_NEGOTIATING ||
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003321 os.disk == D_NEGOTIATING));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003322 /* if we have both been inconsistent, and the peer has been
3323 * forced to be UpToDate with --overwrite-data */
3324 cr |= test_bit(CONSIDER_RESYNC, &mdev->flags);
3325 /* if we had been plain connected, and the admin requested to
3326 * start a sync by "invalidate" or "invalidate-remote" */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003327 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003328 (peer_state.conn >= C_STARTING_SYNC_S &&
3329 peer_state.conn <= C_WF_BITMAP_T));
3330
3331 if (cr)
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003332 ns.conn = drbd_sync_handshake(mdev, peer_state.role, real_peer_disk);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003333
3334 put_ldev(mdev);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003335 if (ns.conn == C_MASK) {
3336 ns.conn = C_CONNECTED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003337 if (mdev->state.disk == D_NEGOTIATING) {
Lars Ellenberg82f59cc2010-10-16 12:13:47 +02003338 drbd_force_state(mdev, NS(disk, D_FAILED));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003339 } else if (peer_state.disk == D_NEGOTIATING) {
3340 dev_err(DEV, "Disk attach process on the peer node was aborted.\n");
3341 peer_state.disk = D_DISKLESS;
Lars Ellenberg580b9762010-02-26 23:15:23 +01003342 real_peer_disk = D_DISKLESS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003343 } else {
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003344 if (test_and_clear_bit(CONN_DRY_RUN, &mdev->flags))
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003345 return false;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003346 D_ASSERT(os.conn == C_WF_REPORT_PARAMS);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003347 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003348 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003349 }
3350 }
3351 }
3352
Philipp Reisner87eeee42011-01-19 14:16:30 +01003353 spin_lock_irq(&mdev->tconn->req_lock);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003354 if (mdev->state.i != os.i)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003355 goto retry;
3356 clear_bit(CONSIDER_RESYNC, &mdev->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003357 ns.peer = peer_state.role;
3358 ns.pdsk = real_peer_disk;
3359 ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003360 if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003361 ns.disk = mdev->new_state_tmp.disk;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003362 cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
3363 if (ns.pdsk == D_CONSISTENT && is_susp(ns) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
Philipp Reisner481c6f52010-06-22 14:03:27 +02003364 test_bit(NEW_CUR_UUID, &mdev->flags)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01003365 /* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this
Philipp Reisner481c6f52010-06-22 14:03:27 +02003366 for temporal network outages! */
Philipp Reisner87eeee42011-01-19 14:16:30 +01003367 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisner481c6f52010-06-22 14:03:27 +02003368 dev_err(DEV, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
3369 tl_clear(mdev);
3370 drbd_uuid_new_current(mdev);
3371 clear_bit(NEW_CUR_UUID, &mdev->flags);
3372 drbd_force_state(mdev, NS2(conn, C_PROTOCOL_ERROR, susp, 0));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003373 return false;
Philipp Reisner481c6f52010-06-22 14:03:27 +02003374 }
Philipp Reisner65d922c2010-06-16 16:18:09 +02003375 rv = _drbd_set_state(mdev, ns, cs_flags, NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003376 ns = mdev->state;
Philipp Reisner87eeee42011-01-19 14:16:30 +01003377 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003378
3379 if (rv < SS_SUCCESS) {
3380 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003381 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003382 }
3383
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003384 if (os.conn > C_WF_REPORT_PARAMS) {
3385 if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003386 peer_state.disk != D_NEGOTIATING ) {
3387 /* we want resync, peer has not yet decided to sync... */
3388 /* Nowadays only used when forcing a node into primary role and
3389 setting its disk to UpToDate with that */
3390 drbd_send_uuids(mdev);
3391 drbd_send_state(mdev);
3392 }
3393 }
3394
Philipp Reisner89e58e72011-01-19 13:12:45 +01003395 mdev->tconn->net_conf->want_lose = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003396
3397 drbd_md_sync(mdev); /* update connected indicator, la_size, ... */
3398
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003399 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003400}
3401
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003402static int receive_sync_uuid(struct drbd_conf *mdev, enum drbd_packet cmd,
3403 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003404{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003405 struct p_rs_uuid *p = &mdev->tconn->data.rbuf.rs_uuid;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003406
3407 wait_event(mdev->misc_wait,
3408 mdev->state.conn == C_WF_SYNC_UUID ||
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02003409 mdev->state.conn == C_BEHIND ||
Philipp Reisnerb411b362009-09-25 16:07:19 -07003410 mdev->state.conn < C_CONNECTED ||
3411 mdev->state.disk < D_NEGOTIATING);
3412
3413 /* D_ASSERT( mdev->state.conn == C_WF_SYNC_UUID ); */
3414
Philipp Reisnerb411b362009-09-25 16:07:19 -07003415 /* Here the _drbd_uuid_ functions are right, current should
3416 _not_ be rotated into the history */
3417 if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
3418 _drbd_uuid_set(mdev, UI_CURRENT, be64_to_cpu(p->uuid));
3419 _drbd_uuid_set(mdev, UI_BITMAP, 0UL);
3420
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003421 drbd_print_uuids(mdev, "updated sync uuid");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003422 drbd_start_resync(mdev, C_SYNC_TARGET);
3423
3424 put_ldev(mdev);
3425 } else
3426 dev_err(DEV, "Ignoring SyncUUID packet!\n");
3427
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003428 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003429}
3430
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003431/**
3432 * receive_bitmap_plain
3433 *
3434 * Return 0 when done, 1 when another iteration is needed, and a negative error
3435 * code upon failure.
3436 */
3437static int
Philipp Reisner02918be2010-08-20 14:35:10 +02003438receive_bitmap_plain(struct drbd_conf *mdev, unsigned int data_size,
3439 unsigned long *buffer, struct bm_xfer_ctx *c)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003440{
3441 unsigned num_words = min_t(size_t, BM_PACKET_WORDS, c->bm_words - c->word_offset);
3442 unsigned want = num_words * sizeof(long);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003443 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003444
Philipp Reisner02918be2010-08-20 14:35:10 +02003445 if (want != data_size) {
3446 dev_err(DEV, "%s:want (%u) != data_size (%u)\n", __func__, want, data_size);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003447 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003448 }
3449 if (want == 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003450 return 0;
Philipp Reisnerde0ff332011-02-07 16:56:20 +01003451 err = drbd_recv(mdev->tconn, buffer, want);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003452 if (err != want) {
3453 if (err >= 0)
3454 err = -EIO;
3455 return err;
3456 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003457
3458 drbd_bm_merge_lel(mdev, c->word_offset, num_words, buffer);
3459
3460 c->word_offset += num_words;
3461 c->bit_offset = c->word_offset * BITS_PER_LONG;
3462 if (c->bit_offset > c->bm_bits)
3463 c->bit_offset = c->bm_bits;
3464
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003465 return 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003466}
3467
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003468/**
3469 * recv_bm_rle_bits
3470 *
3471 * Return 0 when done, 1 when another iteration is needed, and a negative error
3472 * code upon failure.
3473 */
3474static int
Philipp Reisnerb411b362009-09-25 16:07:19 -07003475recv_bm_rle_bits(struct drbd_conf *mdev,
3476 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003477 struct bm_xfer_ctx *c,
3478 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003479{
3480 struct bitstream bs;
3481 u64 look_ahead;
3482 u64 rl;
3483 u64 tmp;
3484 unsigned long s = c->bit_offset;
3485 unsigned long e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003486 int toggle = DCBP_get_start(p);
3487 int have;
3488 int bits;
3489
3490 bitstream_init(&bs, p->code, len, DCBP_get_pad_bits(p));
3491
3492 bits = bitstream_get_bits(&bs, &look_ahead, 64);
3493 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003494 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003495
3496 for (have = bits; have > 0; s += rl, toggle = !toggle) {
3497 bits = vli_decode_bits(&rl, look_ahead);
3498 if (bits <= 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003499 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003500
3501 if (toggle) {
3502 e = s + rl -1;
3503 if (e >= c->bm_bits) {
3504 dev_err(DEV, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003505 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003506 }
3507 _drbd_bm_set_bits(mdev, s, e);
3508 }
3509
3510 if (have < bits) {
3511 dev_err(DEV, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n",
3512 have, bits, look_ahead,
3513 (unsigned int)(bs.cur.b - p->code),
3514 (unsigned int)bs.buf_len);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003515 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003516 }
3517 look_ahead >>= bits;
3518 have -= bits;
3519
3520 bits = bitstream_get_bits(&bs, &tmp, 64 - have);
3521 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003522 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003523 look_ahead |= tmp << have;
3524 have += bits;
3525 }
3526
3527 c->bit_offset = s;
3528 bm_xfer_ctx_bit_to_word_offset(c);
3529
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003530 return (s != c->bm_bits);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003531}
3532
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003533/**
3534 * decode_bitmap_c
3535 *
3536 * Return 0 when done, 1 when another iteration is needed, and a negative error
3537 * code upon failure.
3538 */
3539static int
Philipp Reisnerb411b362009-09-25 16:07:19 -07003540decode_bitmap_c(struct drbd_conf *mdev,
3541 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003542 struct bm_xfer_ctx *c,
3543 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003544{
3545 if (DCBP_get_code(p) == RLE_VLI_Bits)
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003546 return recv_bm_rle_bits(mdev, p, c, len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003547
3548 /* other variants had been implemented for evaluation,
3549 * but have been dropped as this one turned out to be "best"
3550 * during all our tests. */
3551
3552 dev_err(DEV, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
3553 drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR));
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003554 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003555}
3556
3557void INFO_bm_xfer_stats(struct drbd_conf *mdev,
3558 const char *direction, struct bm_xfer_ctx *c)
3559{
3560 /* what would it take to transfer it "plaintext" */
Philipp Reisnerc0129492011-01-19 16:58:16 +01003561 unsigned plain = sizeof(struct p_header) *
Philipp Reisnerb411b362009-09-25 16:07:19 -07003562 ((c->bm_words+BM_PACKET_WORDS-1)/BM_PACKET_WORDS+1)
3563 + c->bm_words * sizeof(long);
3564 unsigned total = c->bytes[0] + c->bytes[1];
3565 unsigned r;
3566
3567 /* total can not be zero. but just in case: */
3568 if (total == 0)
3569 return;
3570
3571 /* don't report if not compressed */
3572 if (total >= plain)
3573 return;
3574
3575 /* total < plain. check for overflow, still */
3576 r = (total > UINT_MAX/1000) ? (total / (plain/1000))
3577 : (1000 * total / plain);
3578
3579 if (r > 1000)
3580 r = 1000;
3581
3582 r = 1000 - r;
3583 dev_info(DEV, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
3584 "total %u; compression: %u.%u%%\n",
3585 direction,
3586 c->bytes[1], c->packets[1],
3587 c->bytes[0], c->packets[0],
3588 total, r/10, r % 10);
3589}
3590
3591/* Since we are processing the bitfield from lower addresses to higher,
3592 it does not matter if the process it in 32 bit chunks or 64 bit
3593 chunks as long as it is little endian. (Understand it as byte stream,
3594 beginning with the lowest byte...) If we would use big endian
3595 we would need to process it from the highest address to the lowest,
3596 in order to be agnostic to the 32 vs 64 bits issue.
3597
3598 returns 0 on failure, 1 if we successfully received it. */
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003599static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packet cmd,
3600 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003601{
3602 struct bm_xfer_ctx c;
3603 void *buffer;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003604 int err;
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003605 int ok = false;
Philipp Reisner257d0af2011-01-26 12:15:29 +01003606 struct p_header *h = &mdev->tconn->data.rbuf.header;
Philipp Reisner77351055b2011-02-07 17:24:26 +01003607 struct packet_info pi;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003608
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003609 drbd_bm_lock(mdev, "receive bitmap", BM_LOCKED_SET_ALLOWED);
3610 /* you are supposed to send additional out-of-sync information
3611 * if you actually set bits during this phase */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003612
3613 /* maybe we should use some per thread scratch page,
3614 * and allocate that during initial device creation? */
3615 buffer = (unsigned long *) __get_free_page(GFP_NOIO);
3616 if (!buffer) {
3617 dev_err(DEV, "failed to allocate one page buffer in %s\n", __func__);
3618 goto out;
3619 }
3620
3621 c = (struct bm_xfer_ctx) {
3622 .bm_bits = drbd_bm_bits(mdev),
3623 .bm_words = drbd_bm_words(mdev),
3624 };
3625
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003626 for(;;) {
Philipp Reisner02918be2010-08-20 14:35:10 +02003627 if (cmd == P_BITMAP) {
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003628 err = receive_bitmap_plain(mdev, data_size, buffer, &c);
Philipp Reisner02918be2010-08-20 14:35:10 +02003629 } else if (cmd == P_COMPRESSED_BITMAP) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003630 /* MAYBE: sanity check that we speak proto >= 90,
3631 * and the feature is enabled! */
3632 struct p_compressed_bm *p;
3633
Philipp Reisner02918be2010-08-20 14:35:10 +02003634 if (data_size > BM_PACKET_PAYLOAD_BYTES) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003635 dev_err(DEV, "ReportCBitmap packet too large\n");
3636 goto out;
3637 }
3638 /* use the page buff */
3639 p = buffer;
3640 memcpy(p, h, sizeof(*h));
Philipp Reisnerde0ff332011-02-07 16:56:20 +01003641 if (drbd_recv(mdev->tconn, p->head.payload, data_size) != data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003642 goto out;
Lars Ellenberg004352f2010-10-05 20:13:58 +02003643 if (data_size <= (sizeof(*p) - sizeof(p->head))) {
3644 dev_err(DEV, "ReportCBitmap packet too small (l:%u)\n", data_size);
Andreas Gruenbacher78fcbda2010-12-10 22:18:27 +01003645 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003646 }
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003647 err = decode_bitmap_c(mdev, p, &c, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003648 } else {
Philipp Reisner02918be2010-08-20 14:35:10 +02003649 dev_warn(DEV, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003650 goto out;
3651 }
3652
Philipp Reisner02918be2010-08-20 14:35:10 +02003653 c.packets[cmd == P_BITMAP]++;
Philipp Reisner257d0af2011-01-26 12:15:29 +01003654 c.bytes[cmd == P_BITMAP] += sizeof(struct p_header) + data_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003655
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003656 if (err <= 0) {
3657 if (err < 0)
3658 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003659 break;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003660 }
Philipp Reisner9ba7aa02011-02-07 17:32:41 +01003661 if (!drbd_recv_header(mdev->tconn, &pi))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003662 goto out;
Philipp Reisner77351055b2011-02-07 17:24:26 +01003663 cmd = pi.cmd;
3664 data_size = pi.size;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003665 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003666
3667 INFO_bm_xfer_stats(mdev, "receive", &c);
3668
3669 if (mdev->state.conn == C_WF_BITMAP_T) {
Andreas Gruenbacherde1f8e42010-12-10 21:04:00 +01003670 enum drbd_state_rv rv;
3671
Philipp Reisnerb411b362009-09-25 16:07:19 -07003672 ok = !drbd_send_bitmap(mdev);
3673 if (!ok)
3674 goto out;
3675 /* Omit CS_ORDERED with this state transition to avoid deadlocks. */
Andreas Gruenbacherde1f8e42010-12-10 21:04:00 +01003676 rv = _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
3677 D_ASSERT(rv == SS_SUCCESS);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003678 } else if (mdev->state.conn != C_WF_BITMAP_S) {
3679 /* admin may have requested C_DISCONNECTING,
3680 * other threads may have noticed network errors */
3681 dev_info(DEV, "unexpected cstate (%s) in receive_bitmap\n",
3682 drbd_conn_str(mdev->state.conn));
3683 }
3684
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003685 ok = true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003686 out:
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003687 drbd_bm_unlock(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003688 if (ok && mdev->state.conn == C_WF_BITMAP_S)
3689 drbd_start_resync(mdev, C_SYNC_SOURCE);
3690 free_page((unsigned long) buffer);
3691 return ok;
3692}
3693
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003694static int receive_skip(struct drbd_conf *mdev, enum drbd_packet cmd,
3695 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003696{
3697 /* TODO zero copy sink :) */
3698 static char sink[128];
3699 int size, want, r;
3700
Philipp Reisner02918be2010-08-20 14:35:10 +02003701 dev_warn(DEV, "skipping unknown optional packet type %d, l: %d!\n",
3702 cmd, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003703
Philipp Reisner02918be2010-08-20 14:35:10 +02003704 size = data_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003705 while (size > 0) {
3706 want = min_t(int, size, sizeof(sink));
Philipp Reisnerde0ff332011-02-07 16:56:20 +01003707 r = drbd_recv(mdev->tconn, sink, want);
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01003708 if (!expect(r > 0))
3709 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003710 size -= r;
3711 }
3712 return size == 0;
3713}
3714
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003715static int receive_UnplugRemote(struct drbd_conf *mdev, enum drbd_packet cmd,
3716 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003717{
Philipp Reisnerb411b362009-09-25 16:07:19 -07003718 /* Make sure we've acked all the TCP data associated
3719 * with the data requests being unplugged */
Philipp Reisnere42325a2011-01-19 13:55:45 +01003720 drbd_tcp_quickack(mdev->tconn->data.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003721
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003722 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003723}
3724
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003725static int receive_out_of_sync(struct drbd_conf *mdev, enum drbd_packet cmd,
3726 unsigned int data_size)
Philipp Reisner73a01a12010-10-27 14:33:00 +02003727{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003728 struct p_block_desc *p = &mdev->tconn->data.rbuf.block_desc;
Philipp Reisner73a01a12010-10-27 14:33:00 +02003729
Lars Ellenbergf735e3632010-12-17 21:06:18 +01003730 switch (mdev->state.conn) {
3731 case C_WF_SYNC_UUID:
3732 case C_WF_BITMAP_T:
3733 case C_BEHIND:
3734 break;
3735 default:
3736 dev_err(DEV, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n",
3737 drbd_conn_str(mdev->state.conn));
3738 }
3739
Philipp Reisner73a01a12010-10-27 14:33:00 +02003740 drbd_set_out_of_sync(mdev, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
3741
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003742 return true;
Philipp Reisner73a01a12010-10-27 14:33:00 +02003743}
3744
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003745typedef int (*drbd_cmd_handler_f)(struct drbd_conf *, enum drbd_packet cmd,
3746 unsigned int to_receive);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003747
Philipp Reisner02918be2010-08-20 14:35:10 +02003748struct data_cmd {
3749 int expect_payload;
3750 size_t pkt_size;
3751 drbd_cmd_handler_f function;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003752};
3753
Philipp Reisner02918be2010-08-20 14:35:10 +02003754static struct data_cmd drbd_cmd_handler[] = {
3755 [P_DATA] = { 1, sizeof(struct p_data), receive_Data },
3756 [P_DATA_REPLY] = { 1, sizeof(struct p_data), receive_DataReply },
3757 [P_RS_DATA_REPLY] = { 1, sizeof(struct p_data), receive_RSDataReply } ,
3758 [P_BARRIER] = { 0, sizeof(struct p_barrier), receive_Barrier } ,
Philipp Reisner257d0af2011-01-26 12:15:29 +01003759 [P_BITMAP] = { 1, sizeof(struct p_header), receive_bitmap } ,
3760 [P_COMPRESSED_BITMAP] = { 1, sizeof(struct p_header), receive_bitmap } ,
3761 [P_UNPLUG_REMOTE] = { 0, sizeof(struct p_header), receive_UnplugRemote },
Philipp Reisner02918be2010-08-20 14:35:10 +02003762 [P_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
3763 [P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
Philipp Reisner257d0af2011-01-26 12:15:29 +01003764 [P_SYNC_PARAM] = { 1, sizeof(struct p_header), receive_SyncParam },
3765 [P_SYNC_PARAM89] = { 1, sizeof(struct p_header), receive_SyncParam },
Philipp Reisner02918be2010-08-20 14:35:10 +02003766 [P_PROTOCOL] = { 1, sizeof(struct p_protocol), receive_protocol },
3767 [P_UUIDS] = { 0, sizeof(struct p_uuids), receive_uuids },
3768 [P_SIZES] = { 0, sizeof(struct p_sizes), receive_sizes },
3769 [P_STATE] = { 0, sizeof(struct p_state), receive_state },
3770 [P_STATE_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_state },
3771 [P_SYNC_UUID] = { 0, sizeof(struct p_rs_uuid), receive_sync_uuid },
3772 [P_OV_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
3773 [P_OV_REPLY] = { 1, sizeof(struct p_block_req), receive_DataRequest },
3774 [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest },
3775 [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip },
Philipp Reisner73a01a12010-10-27 14:33:00 +02003776 [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
Philipp Reisner047cd4a2011-02-15 11:09:33 +01003777 [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_state },
Philipp Reisner02918be2010-08-20 14:35:10 +02003778 /* anything missing from this table is in
3779 * the asender_tbl, see get_asender_cmd */
3780 [P_MAX_CMD] = { 0, 0, NULL },
3781};
3782
3783/* All handler functions that expect a sub-header get that sub-heder in
Philipp Reisnere42325a2011-01-19 13:55:45 +01003784 mdev->tconn->data.rbuf.header.head.payload.
Philipp Reisner02918be2010-08-20 14:35:10 +02003785
Philipp Reisnere42325a2011-01-19 13:55:45 +01003786 Usually in mdev->tconn->data.rbuf.header.head the callback can find the usual
Philipp Reisner02918be2010-08-20 14:35:10 +02003787 p_header, but they may not rely on that. Since there is also p_header95 !
3788 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003789
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003790static void drbdd(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003791{
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003792 struct p_header *header = &tconn->data.rbuf.header;
Philipp Reisner77351055b2011-02-07 17:24:26 +01003793 struct packet_info pi;
Philipp Reisner02918be2010-08-20 14:35:10 +02003794 size_t shs; /* sub header size */
3795 int rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003796
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003797 while (get_t_state(&tconn->receiver) == RUNNING) {
3798 drbd_thread_current_set_cpu(&tconn->receiver);
3799 if (!drbd_recv_header(tconn, &pi))
Philipp Reisner02918be2010-08-20 14:35:10 +02003800 goto err_out;
3801
Philipp Reisner77351055b2011-02-07 17:24:26 +01003802 if (unlikely(pi.cmd >= P_MAX_CMD || !drbd_cmd_handler[pi.cmd].function)) {
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003803 conn_err(tconn, "unknown packet type %d, l: %d!\n", pi.cmd, pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02003804 goto err_out;
Lars Ellenberg0b33a912009-11-16 15:58:04 +01003805 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003806
Philipp Reisner77351055b2011-02-07 17:24:26 +01003807 shs = drbd_cmd_handler[pi.cmd].pkt_size - sizeof(struct p_header);
3808 if (pi.size - shs > 0 && !drbd_cmd_handler[pi.cmd].expect_payload) {
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003809 conn_err(tconn, "No payload expected %s l:%d\n", cmdname(pi.cmd), pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02003810 goto err_out;
3811 }
3812
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02003813 if (shs) {
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003814 rv = drbd_recv(tconn, &header->payload, shs);
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02003815 if (unlikely(rv != shs)) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01003816 if (!signal_pending(current))
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003817 conn_warn(tconn, "short read while reading sub header: rv=%d\n", rv);
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02003818 goto err_out;
3819 }
3820 }
3821
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003822 rv = drbd_cmd_handler[pi.cmd].function(vnr_to_mdev(tconn, pi.vnr), pi.cmd, pi.size - shs);
Philipp Reisner02918be2010-08-20 14:35:10 +02003823
3824 if (unlikely(!rv)) {
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003825 conn_err(tconn, "error receiving %s, l: %d!\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01003826 cmdname(pi.cmd), pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02003827 goto err_out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003828 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003829 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003830
Philipp Reisner02918be2010-08-20 14:35:10 +02003831 if (0) {
3832 err_out:
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01003833 conn_request_state(tconn, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003834 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003835}
3836
Philipp Reisnera21e9292011-02-08 15:08:49 +01003837void drbd_flush_workqueue(struct drbd_conf *mdev)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003838{
3839 struct drbd_wq_barrier barr;
3840
3841 barr.w.cb = w_prev_work_done;
Philipp Reisnera21e9292011-02-08 15:08:49 +01003842 barr.w.mdev = mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003843 init_completion(&barr.done);
Philipp Reisnera21e9292011-02-08 15:08:49 +01003844 drbd_queue_work(&mdev->tconn->data.work, &barr.w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003845 wait_for_completion(&barr.done);
3846}
3847
Philipp Reisner360cc742011-02-08 14:29:53 +01003848static void drbd_disconnect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003849{
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01003850 enum drbd_conns oc;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003851 int rv = SS_UNKNOWN_ERROR;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003852
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01003853 if (tconn->cstate == C_STANDALONE)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003854 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003855
3856 /* asender does not clean up anything. it must not interfere, either */
Philipp Reisner360cc742011-02-08 14:29:53 +01003857 drbd_thread_stop(&tconn->asender);
3858 drbd_free_sock(tconn);
3859
3860 idr_for_each(&tconn->volumes, drbd_disconnected, tconn);
3861
3862 conn_info(tconn, "Connection closed\n");
3863
3864 spin_lock_irq(&tconn->req_lock);
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01003865 oc = tconn->cstate;
3866 if (oc >= C_UNCONNECTED)
3867 rv = _conn_request_state(tconn, NS(conn, C_UNCONNECTED), CS_VERBOSE);
3868
Philipp Reisner360cc742011-02-08 14:29:53 +01003869 spin_unlock_irq(&tconn->req_lock);
3870
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01003871 if (oc == C_DISCONNECTING) {
Philipp Reisner360cc742011-02-08 14:29:53 +01003872 wait_event(tconn->net_cnt_wait, atomic_read(&tconn->net_cnt) == 0);
3873
3874 crypto_free_hash(tconn->cram_hmac_tfm);
3875 tconn->cram_hmac_tfm = NULL;
3876
3877 kfree(tconn->net_conf);
3878 tconn->net_conf = NULL;
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01003879 conn_request_state(tconn, NS(conn, C_STANDALONE), CS_VERBOSE);
Philipp Reisner360cc742011-02-08 14:29:53 +01003880 }
3881}
3882
3883static int drbd_disconnected(int vnr, void *p, void *data)
3884{
3885 struct drbd_conf *mdev = (struct drbd_conf *)p;
3886 enum drbd_fencing_p fp;
3887 unsigned int i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003888
Philipp Reisner85719572010-07-21 10:20:17 +02003889 /* wait for current activity to cease. */
Philipp Reisner87eeee42011-01-19 14:16:30 +01003890 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003891 _drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
3892 _drbd_wait_ee_list_empty(mdev, &mdev->sync_ee);
3893 _drbd_wait_ee_list_empty(mdev, &mdev->read_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01003894 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003895
3896 /* We do not have data structures that would allow us to
3897 * get the rs_pending_cnt down to 0 again.
3898 * * On C_SYNC_TARGET we do not have any data structures describing
3899 * the pending RSDataRequest's we have sent.
3900 * * On C_SYNC_SOURCE there is no data structure that tracks
3901 * the P_RS_DATA_REPLY blocks that we sent to the SyncTarget.
3902 * And no, it is not the sum of the reference counts in the
3903 * resync_LRU. The resync_LRU tracks the whole operation including
3904 * the disk-IO, while the rs_pending_cnt only tracks the blocks
3905 * on the fly. */
3906 drbd_rs_cancel_all(mdev);
3907 mdev->rs_total = 0;
3908 mdev->rs_failed = 0;
3909 atomic_set(&mdev->rs_pending_cnt, 0);
3910 wake_up(&mdev->misc_wait);
3911
Philipp Reisner7fde2be2011-03-01 11:08:28 +01003912 del_timer(&mdev->request_timer);
3913
Philipp Reisnerb411b362009-09-25 16:07:19 -07003914 /* make sure syncer is stopped and w_resume_next_sg queued */
3915 del_timer_sync(&mdev->resync_timer);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003916 resync_timer_fn((unsigned long)mdev);
3917
Philipp Reisnerb411b362009-09-25 16:07:19 -07003918 /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
3919 * w_make_resync_request etc. which may still be on the worker queue
3920 * to be "canceled" */
Philipp Reisnera21e9292011-02-08 15:08:49 +01003921 drbd_flush_workqueue(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003922
3923 /* This also does reclaim_net_ee(). If we do this too early, we might
3924 * miss some resync ee and pages.*/
3925 drbd_process_done_ee(mdev);
3926
3927 kfree(mdev->p_uuid);
3928 mdev->p_uuid = NULL;
3929
Philipp Reisnerfb22c402010-09-08 23:20:21 +02003930 if (!is_susp(mdev->state))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003931 tl_clear(mdev);
3932
Philipp Reisnerb411b362009-09-25 16:07:19 -07003933 drbd_md_sync(mdev);
3934
3935 fp = FP_DONT_CARE;
3936 if (get_ldev(mdev)) {
3937 fp = mdev->ldev->dc.fencing;
3938 put_ldev(mdev);
3939 }
3940
Philipp Reisner87f7be42010-06-11 13:56:33 +02003941 if (mdev->state.role == R_PRIMARY && fp >= FP_RESOURCE && mdev->state.pdsk >= D_UNKNOWN)
3942 drbd_try_outdate_peer_async(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003943
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003944 /* serialize with bitmap writeout triggered by the state change,
3945 * if any. */
3946 wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
3947
Philipp Reisnerb411b362009-09-25 16:07:19 -07003948 /* tcp_close and release of sendpage pages can be deferred. I don't
3949 * want to use SO_LINGER, because apparently it can be deferred for
3950 * more than 20 seconds (longest time I checked).
3951 *
3952 * Actually we don't care for exactly when the network stack does its
3953 * put_page(), but release our reference on these pages right here.
3954 */
3955 i = drbd_release_ee(mdev, &mdev->net_ee);
3956 if (i)
3957 dev_info(DEV, "net_ee not empty, killed %u entries\n", i);
Lars Ellenberg435f0742010-09-06 12:30:25 +02003958 i = atomic_read(&mdev->pp_in_use_by_net);
3959 if (i)
3960 dev_info(DEV, "pp_in_use_by_net = %d, expected 0\n", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003961 i = atomic_read(&mdev->pp_in_use);
3962 if (i)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02003963 dev_info(DEV, "pp_in_use = %d, expected 0\n", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003964
3965 D_ASSERT(list_empty(&mdev->read_ee));
3966 D_ASSERT(list_empty(&mdev->active_ee));
3967 D_ASSERT(list_empty(&mdev->sync_ee));
3968 D_ASSERT(list_empty(&mdev->done_ee));
3969
3970 /* ok, no more ee's on the fly, it is safe to reset the epoch_size */
3971 atomic_set(&mdev->current_epoch->epoch_size, 0);
3972 D_ASSERT(list_empty(&mdev->current_epoch->list));
Philipp Reisner360cc742011-02-08 14:29:53 +01003973
3974 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003975}
3976
3977/*
3978 * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version
3979 * we can agree on is stored in agreed_pro_version.
3980 *
3981 * feature flags and the reserved array should be enough room for future
3982 * enhancements of the handshake protocol, and possible plugins...
3983 *
3984 * for now, they are expected to be zero, but ignored.
3985 */
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01003986static int drbd_send_handshake(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003987{
Philipp Reisnere6b3ea82011-01-19 14:02:01 +01003988 /* ASSERT current == mdev->tconn->receiver ... */
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01003989 struct p_handshake *p = &tconn->data.sbuf.handshake;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003990 int ok;
3991
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01003992 if (mutex_lock_interruptible(&tconn->data.mutex)) {
3993 conn_err(tconn, "interrupted during initial handshake\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003994 return 0; /* interrupted. not ok. */
3995 }
3996
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01003997 if (tconn->data.socket == NULL) {
3998 mutex_unlock(&tconn->data.mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003999 return 0;
4000 }
4001
4002 memset(p, 0, sizeof(*p));
4003 p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
4004 p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01004005 ok = _conn_send_cmd(tconn, 0, tconn->data.socket, P_HAND_SHAKE,
4006 &p->head, sizeof(*p), 0);
4007 mutex_unlock(&tconn->data.mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004008 return ok;
4009}
4010
4011/*
4012 * return values:
4013 * 1 yes, we have a valid connection
4014 * 0 oops, did not work out, please try again
4015 * -1 peer talks different language,
4016 * no point in trying again, please go standalone.
4017 */
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004018static int drbd_do_handshake(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004019{
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004020 /* ASSERT current == tconn->receiver ... */
4021 struct p_handshake *p = &tconn->data.rbuf.handshake;
Philipp Reisner02918be2010-08-20 14:35:10 +02004022 const int expect = sizeof(struct p_handshake) - sizeof(struct p_header80);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004023 struct packet_info pi;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004024 int rv;
4025
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004026 rv = drbd_send_handshake(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004027 if (!rv)
4028 return 0;
4029
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004030 rv = drbd_recv_header(tconn, &pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004031 if (!rv)
4032 return 0;
4033
Philipp Reisner77351055b2011-02-07 17:24:26 +01004034 if (pi.cmd != P_HAND_SHAKE) {
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004035 conn_err(tconn, "expected HandShake packet, received: %s (0x%04x)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004036 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004037 return -1;
4038 }
4039
Philipp Reisner77351055b2011-02-07 17:24:26 +01004040 if (pi.size != expect) {
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004041 conn_err(tconn, "expected HandShake length: %u, received: %u\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004042 expect, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004043 return -1;
4044 }
4045
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004046 rv = drbd_recv(tconn, &p->head.payload, expect);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004047
4048 if (rv != expect) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01004049 if (!signal_pending(current))
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004050 conn_warn(tconn, "short read receiving handshake packet: l=%u\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004051 return 0;
4052 }
4053
Philipp Reisnerb411b362009-09-25 16:07:19 -07004054 p->protocol_min = be32_to_cpu(p->protocol_min);
4055 p->protocol_max = be32_to_cpu(p->protocol_max);
4056 if (p->protocol_max == 0)
4057 p->protocol_max = p->protocol_min;
4058
4059 if (PRO_VERSION_MAX < p->protocol_min ||
4060 PRO_VERSION_MIN > p->protocol_max)
4061 goto incompat;
4062
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004063 tconn->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004064
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004065 conn_info(tconn, "Handshake successful: "
4066 "Agreed network protocol version %d\n", tconn->agreed_pro_version);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004067
4068 return 1;
4069
4070 incompat:
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004071 conn_err(tconn, "incompatible DRBD dialects: "
Philipp Reisnerb411b362009-09-25 16:07:19 -07004072 "I support %d-%d, peer supports %d-%d\n",
4073 PRO_VERSION_MIN, PRO_VERSION_MAX,
4074 p->protocol_min, p->protocol_max);
4075 return -1;
4076}
4077
4078#if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
Philipp Reisner13e60372011-02-08 09:54:40 +01004079static int drbd_do_auth(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004080{
4081 dev_err(DEV, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
4082 dev_err(DEV, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004083 return -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004084}
4085#else
4086#define CHALLENGE_LEN 64
Johannes Thomab10d96c2010-01-07 16:02:50 +01004087
4088/* Return value:
4089 1 - auth succeeded,
4090 0 - failed, try again (network error),
4091 -1 - auth failed, don't try again.
4092*/
4093
Philipp Reisner13e60372011-02-08 09:54:40 +01004094static int drbd_do_auth(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004095{
4096 char my_challenge[CHALLENGE_LEN]; /* 64 Bytes... */
4097 struct scatterlist sg;
4098 char *response = NULL;
4099 char *right_response = NULL;
4100 char *peers_ch = NULL;
Philipp Reisner13e60372011-02-08 09:54:40 +01004101 unsigned int key_len = strlen(tconn->net_conf->shared_secret);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004102 unsigned int resp_size;
4103 struct hash_desc desc;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004104 struct packet_info pi;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004105 int rv;
4106
Philipp Reisner13e60372011-02-08 09:54:40 +01004107 desc.tfm = tconn->cram_hmac_tfm;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004108 desc.flags = 0;
4109
Philipp Reisner13e60372011-02-08 09:54:40 +01004110 rv = crypto_hash_setkey(tconn->cram_hmac_tfm,
4111 (u8 *)tconn->net_conf->shared_secret, key_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004112 if (rv) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004113 conn_err(tconn, "crypto_hash_setkey() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004114 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004115 goto fail;
4116 }
4117
4118 get_random_bytes(my_challenge, CHALLENGE_LEN);
4119
Philipp Reisner13e60372011-02-08 09:54:40 +01004120 rv = conn_send_cmd2(tconn, P_AUTH_CHALLENGE, my_challenge, CHALLENGE_LEN);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004121 if (!rv)
4122 goto fail;
4123
Philipp Reisner13e60372011-02-08 09:54:40 +01004124 rv = drbd_recv_header(tconn, &pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004125 if (!rv)
4126 goto fail;
4127
Philipp Reisner77351055b2011-02-07 17:24:26 +01004128 if (pi.cmd != P_AUTH_CHALLENGE) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004129 conn_err(tconn, "expected AuthChallenge packet, received: %s (0x%04x)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004130 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004131 rv = 0;
4132 goto fail;
4133 }
4134
Philipp Reisner77351055b2011-02-07 17:24:26 +01004135 if (pi.size > CHALLENGE_LEN * 2) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004136 conn_err(tconn, "expected AuthChallenge payload too big.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004137 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004138 goto fail;
4139 }
4140
Philipp Reisner77351055b2011-02-07 17:24:26 +01004141 peers_ch = kmalloc(pi.size, GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004142 if (peers_ch == NULL) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004143 conn_err(tconn, "kmalloc of peers_ch failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004144 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004145 goto fail;
4146 }
4147
Philipp Reisner13e60372011-02-08 09:54:40 +01004148 rv = drbd_recv(tconn, peers_ch, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004149
Philipp Reisner77351055b2011-02-07 17:24:26 +01004150 if (rv != pi.size) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01004151 if (!signal_pending(current))
Philipp Reisner13e60372011-02-08 09:54:40 +01004152 conn_warn(tconn, "short read AuthChallenge: l=%u\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004153 rv = 0;
4154 goto fail;
4155 }
4156
Philipp Reisner13e60372011-02-08 09:54:40 +01004157 resp_size = crypto_hash_digestsize(tconn->cram_hmac_tfm);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004158 response = kmalloc(resp_size, GFP_NOIO);
4159 if (response == NULL) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004160 conn_err(tconn, "kmalloc of response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004161 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004162 goto fail;
4163 }
4164
4165 sg_init_table(&sg, 1);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004166 sg_set_buf(&sg, peers_ch, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004167
4168 rv = crypto_hash_digest(&desc, &sg, sg.length, response);
4169 if (rv) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004170 conn_err(tconn, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004171 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004172 goto fail;
4173 }
4174
Philipp Reisner13e60372011-02-08 09:54:40 +01004175 rv = conn_send_cmd2(tconn, P_AUTH_RESPONSE, response, resp_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004176 if (!rv)
4177 goto fail;
4178
Philipp Reisner13e60372011-02-08 09:54:40 +01004179 rv = drbd_recv_header(tconn, &pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004180 if (!rv)
4181 goto fail;
4182
Philipp Reisner77351055b2011-02-07 17:24:26 +01004183 if (pi.cmd != P_AUTH_RESPONSE) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004184 conn_err(tconn, "expected AuthResponse packet, received: %s (0x%04x)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004185 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004186 rv = 0;
4187 goto fail;
4188 }
4189
Philipp Reisner77351055b2011-02-07 17:24:26 +01004190 if (pi.size != resp_size) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004191 conn_err(tconn, "expected AuthResponse payload of wrong size\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004192 rv = 0;
4193 goto fail;
4194 }
4195
Philipp Reisner13e60372011-02-08 09:54:40 +01004196 rv = drbd_recv(tconn, response , resp_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004197
4198 if (rv != resp_size) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01004199 if (!signal_pending(current))
Philipp Reisner13e60372011-02-08 09:54:40 +01004200 conn_warn(tconn, "short read receiving AuthResponse: l=%u\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004201 rv = 0;
4202 goto fail;
4203 }
4204
4205 right_response = kmalloc(resp_size, GFP_NOIO);
Julia Lawall2d1ee872009-12-27 22:27:11 +01004206 if (right_response == NULL) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004207 conn_err(tconn, "kmalloc of right_response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004208 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004209 goto fail;
4210 }
4211
4212 sg_set_buf(&sg, my_challenge, CHALLENGE_LEN);
4213
4214 rv = crypto_hash_digest(&desc, &sg, sg.length, right_response);
4215 if (rv) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004216 conn_err(tconn, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004217 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004218 goto fail;
4219 }
4220
4221 rv = !memcmp(response, right_response, resp_size);
4222
4223 if (rv)
Philipp Reisner13e60372011-02-08 09:54:40 +01004224 conn_info(tconn, "Peer authenticated using %d bytes of '%s' HMAC\n",
4225 resp_size, tconn->net_conf->cram_hmac_alg);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004226 else
4227 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004228
4229 fail:
4230 kfree(peers_ch);
4231 kfree(response);
4232 kfree(right_response);
4233
4234 return rv;
4235}
4236#endif
4237
4238int drbdd_init(struct drbd_thread *thi)
4239{
Philipp Reisner392c8802011-02-09 10:33:31 +01004240 struct drbd_tconn *tconn = thi->tconn;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004241 int h;
4242
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004243 conn_info(tconn, "receiver (re)started\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004244
4245 do {
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004246 h = drbd_connect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004247 if (h == 0) {
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004248 drbd_disconnect(tconn);
Philipp Reisner20ee6392011-01-18 15:28:59 +01004249 schedule_timeout_interruptible(HZ);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004250 }
4251 if (h == -1) {
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004252 conn_warn(tconn, "Discarding network configuration.\n");
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004253 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004254 }
4255 } while (h == 0);
4256
4257 if (h > 0) {
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004258 if (get_net_conf(tconn)) {
4259 drbdd(tconn);
4260 put_net_conf(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004261 }
4262 }
4263
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004264 drbd_disconnect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004265
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004266 conn_info(tconn, "receiver terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004267 return 0;
4268}
4269
4270/* ********* acknowledge sender ******** */
4271
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004272static int got_RqSReply(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004273{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004274 struct p_req_state_reply *p = &mdev->tconn->meta.rbuf.req_state_reply;
Philipp Reisnerfc3b10a2011-02-15 11:07:59 +01004275 struct drbd_tconn *tconn = mdev->tconn;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004276
4277 int retcode = be32_to_cpu(p->retcode);
4278
Philipp Reisnerfc3b10a2011-02-15 11:07:59 +01004279 if (cmd == P_STATE_CHG_REPLY) {
4280 if (retcode >= SS_SUCCESS) {
4281 set_bit(CL_ST_CHG_SUCCESS, &mdev->flags);
4282 } else {
4283 set_bit(CL_ST_CHG_FAIL, &mdev->flags);
4284 dev_err(DEV, "Requested state change failed by peer: %s (%d)\n",
4285 drbd_set_st_err_str(retcode), retcode);
4286 }
4287 wake_up(&mdev->state_wait);
4288 } else /* conn == P_CONN_ST_CHG_REPLY */ {
4289 if (retcode >= SS_SUCCESS) {
4290 set_bit(CONN_WD_ST_CHG_OKAY, &tconn->flags);
4291 } else {
4292 set_bit(CONN_WD_ST_CHG_FAIL, &tconn->flags);
4293 conn_err(tconn, "Requested state change failed by peer: %s (%d)\n",
4294 drbd_set_st_err_str(retcode), retcode);
4295 }
4296 wake_up(&tconn->ping_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004297 }
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004298 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004299}
4300
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004301static int got_Ping(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004302{
Philipp Reisner2a67d8b2011-02-09 14:10:32 +01004303 return drbd_send_ping_ack(mdev->tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004304
4305}
4306
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004307static int got_PingAck(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004308{
Philipp Reisner2a67d8b2011-02-09 14:10:32 +01004309 struct drbd_tconn *tconn = mdev->tconn;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004310 /* restore idle timeout */
Philipp Reisner2a67d8b2011-02-09 14:10:32 +01004311 tconn->meta.socket->sk->sk_rcvtimeo = tconn->net_conf->ping_int*HZ;
4312 if (!test_and_set_bit(GOT_PING_ACK, &tconn->flags))
4313 wake_up(&tconn->ping_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004314
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004315 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004316}
4317
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004318static int got_IsInSync(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004319{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004320 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004321 sector_t sector = be64_to_cpu(p->sector);
4322 int blksize = be32_to_cpu(p->blksize);
4323
Philipp Reisner31890f42011-01-19 14:12:51 +01004324 D_ASSERT(mdev->tconn->agreed_pro_version >= 89);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004325
4326 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4327
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004328 if (get_ldev(mdev)) {
4329 drbd_rs_complete_io(mdev, sector);
4330 drbd_set_in_sync(mdev, sector, blksize);
4331 /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
4332 mdev->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
4333 put_ldev(mdev);
4334 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004335 dec_rs_pending(mdev);
Philipp Reisner778f2712010-07-06 11:14:00 +02004336 atomic_add(blksize >> 9, &mdev->rs_sect_in);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004337
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004338 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004339}
4340
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004341static int
4342validate_req_change_req_state(struct drbd_conf *mdev, u64 id, sector_t sector,
4343 struct rb_root *root, const char *func,
4344 enum drbd_req_event what, bool missing_ok)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004345{
4346 struct drbd_request *req;
4347 struct bio_and_error m;
4348
Philipp Reisner87eeee42011-01-19 14:16:30 +01004349 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004350 req = find_request(mdev, root, id, sector, missing_ok, func);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004351 if (unlikely(!req)) {
Philipp Reisner87eeee42011-01-19 14:16:30 +01004352 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004353 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004354 }
4355 __req_mod(req, what, &m);
Philipp Reisner87eeee42011-01-19 14:16:30 +01004356 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004357
4358 if (m.bio)
4359 complete_master_bio(mdev, &m);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004360 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004361}
4362
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004363static int got_BlockAck(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004364{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004365 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004366 sector_t sector = be64_to_cpu(p->sector);
4367 int blksize = be32_to_cpu(p->blksize);
4368 enum drbd_req_event what;
4369
4370 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4371
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01004372 if (p->block_id == ID_SYNCER) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004373 drbd_set_in_sync(mdev, sector, blksize);
4374 dec_rs_pending(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004375 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004376 }
Philipp Reisner257d0af2011-01-26 12:15:29 +01004377 switch (cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004378 case P_RS_WRITE_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004379 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004380 what = WRITE_ACKED_BY_PEER_AND_SIS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004381 break;
4382 case P_WRITE_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004383 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004384 what = WRITE_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004385 break;
4386 case P_RECV_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004387 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_B);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004388 what = RECV_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004389 break;
4390 case P_DISCARD_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004391 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004392 what = CONFLICT_DISCARDED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004393 break;
4394 default:
4395 D_ASSERT(0);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004396 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004397 }
4398
4399 return validate_req_change_req_state(mdev, p->block_id, sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004400 &mdev->write_requests, __func__,
4401 what, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004402}
4403
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004404static int got_NegAck(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004405{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004406 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004407 sector_t sector = be64_to_cpu(p->sector);
Philipp Reisner2deb8332011-01-17 18:39:18 +01004408 int size = be32_to_cpu(p->blksize);
Philipp Reisner89e58e72011-01-19 13:12:45 +01004409 bool missing_ok = mdev->tconn->net_conf->wire_protocol == DRBD_PROT_A ||
4410 mdev->tconn->net_conf->wire_protocol == DRBD_PROT_B;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004411 bool found;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004412
4413 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4414
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01004415 if (p->block_id == ID_SYNCER) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004416 dec_rs_pending(mdev);
4417 drbd_rs_failed_io(mdev, sector, size);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004418 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004419 }
Philipp Reisner2deb8332011-01-17 18:39:18 +01004420
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004421 found = validate_req_change_req_state(mdev, p->block_id, sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004422 &mdev->write_requests, __func__,
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004423 NEG_ACKED, missing_ok);
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004424 if (!found) {
4425 /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs.
4426 The master bio might already be completed, therefore the
4427 request is no longer in the collision hash. */
4428 /* In Protocol B we might already have got a P_RECV_ACK
4429 but then get a P_NEG_ACK afterwards. */
4430 if (!missing_ok)
Philipp Reisner2deb8332011-01-17 18:39:18 +01004431 return false;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004432 drbd_set_out_of_sync(mdev, sector, size);
Philipp Reisner2deb8332011-01-17 18:39:18 +01004433 }
Philipp Reisner2deb8332011-01-17 18:39:18 +01004434 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004435}
4436
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004437static int got_NegDReply(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004438{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004439 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004440 sector_t sector = be64_to_cpu(p->sector);
4441
4442 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4443 dev_err(DEV, "Got NegDReply; Sector %llus, len %u; Fail original request.\n",
4444 (unsigned long long)sector, be32_to_cpu(p->blksize));
4445
4446 return validate_req_change_req_state(mdev, p->block_id, sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004447 &mdev->read_requests, __func__,
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004448 NEG_ACKED, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004449}
4450
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004451static int got_NegRSDReply(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004452{
4453 sector_t sector;
4454 int size;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004455 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004456
4457 sector = be64_to_cpu(p->sector);
4458 size = be32_to_cpu(p->blksize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004459
4460 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4461
4462 dec_rs_pending(mdev);
4463
4464 if (get_ldev_if_state(mdev, D_FAILED)) {
4465 drbd_rs_complete_io(mdev, sector);
Philipp Reisner257d0af2011-01-26 12:15:29 +01004466 switch (cmd) {
Philipp Reisnerd612d302010-12-27 10:53:28 +01004467 case P_NEG_RS_DREPLY:
4468 drbd_rs_failed_io(mdev, sector, size);
4469 case P_RS_CANCEL:
4470 break;
4471 default:
4472 D_ASSERT(0);
4473 put_ldev(mdev);
4474 return false;
4475 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004476 put_ldev(mdev);
4477 }
4478
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004479 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004480}
4481
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004482static int got_BarrierAck(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004483{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004484 struct p_barrier_ack *p = &mdev->tconn->meta.rbuf.barrier_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004485
4486 tl_release(mdev, p->barrier, be32_to_cpu(p->set_size));
4487
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02004488 if (mdev->state.conn == C_AHEAD &&
4489 atomic_read(&mdev->ap_in_flight) == 0 &&
Philipp Reisner370a43e2011-01-14 16:03:11 +01004490 !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &mdev->current_epoch->flags)) {
4491 mdev->start_resync_timer.expires = jiffies + HZ;
4492 add_timer(&mdev->start_resync_timer);
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02004493 }
4494
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004495 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004496}
4497
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004498static int got_OVResult(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004499{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004500 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004501 struct drbd_work *w;
4502 sector_t sector;
4503 int size;
4504
4505 sector = be64_to_cpu(p->sector);
4506 size = be32_to_cpu(p->blksize);
4507
4508 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4509
4510 if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
4511 drbd_ov_oos_found(mdev, sector, size);
4512 else
4513 ov_oos_print(mdev);
4514
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004515 if (!get_ldev(mdev))
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004516 return true;
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004517
Philipp Reisnerb411b362009-09-25 16:07:19 -07004518 drbd_rs_complete_io(mdev, sector);
4519 dec_rs_pending(mdev);
4520
Lars Ellenbergea5442a2010-11-05 09:48:01 +01004521 --mdev->ov_left;
4522
4523 /* let's advance progress step marks only for every other megabyte */
4524 if ((mdev->ov_left & 0x200) == 0x200)
4525 drbd_advance_rs_marks(mdev, mdev->ov_left);
4526
4527 if (mdev->ov_left == 0) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004528 w = kmalloc(sizeof(*w), GFP_NOIO);
4529 if (w) {
4530 w->cb = w_ov_finished;
Philipp Reisnera21e9292011-02-08 15:08:49 +01004531 w->mdev = mdev;
Philipp Reisnere42325a2011-01-19 13:55:45 +01004532 drbd_queue_work_front(&mdev->tconn->data.work, w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004533 } else {
4534 dev_err(DEV, "kmalloc(w) failed.");
4535 ov_oos_print(mdev);
4536 drbd_resync_finished(mdev);
4537 }
4538 }
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004539 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004540 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004541}
4542
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004543static int got_skip(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisner0ced55a2010-04-30 15:26:20 +02004544{
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004545 return true;
Philipp Reisner0ced55a2010-04-30 15:26:20 +02004546}
4547
Philipp Reisnerb411b362009-09-25 16:07:19 -07004548struct asender_cmd {
4549 size_t pkt_size;
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004550 int (*process)(struct drbd_conf *mdev, enum drbd_packet cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004551};
4552
4553static struct asender_cmd *get_asender_cmd(int cmd)
4554{
4555 static struct asender_cmd asender_tbl[] = {
4556 /* anything missing from this table is in
4557 * the drbd_cmd_handler (drbd_default_handler) table,
4558 * see the beginning of drbdd() */
Philipp Reisner257d0af2011-01-26 12:15:29 +01004559 [P_PING] = { sizeof(struct p_header), got_Ping },
4560 [P_PING_ACK] = { sizeof(struct p_header), got_PingAck },
Philipp Reisnerb411b362009-09-25 16:07:19 -07004561 [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
4562 [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
4563 [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
4564 [P_DISCARD_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
4565 [P_NEG_ACK] = { sizeof(struct p_block_ack), got_NegAck },
4566 [P_NEG_DREPLY] = { sizeof(struct p_block_ack), got_NegDReply },
4567 [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), got_NegRSDReply},
4568 [P_OV_RESULT] = { sizeof(struct p_block_ack), got_OVResult },
4569 [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck },
4570 [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
4571 [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync },
Philipp Reisner02918be2010-08-20 14:35:10 +02004572 [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip },
Philipp Reisnerd612d302010-12-27 10:53:28 +01004573 [P_RS_CANCEL] = { sizeof(struct p_block_ack), got_NegRSDReply},
Philipp Reisnerfc3b10a2011-02-15 11:07:59 +01004574 [P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), got_RqSReply },
Philipp Reisnerb411b362009-09-25 16:07:19 -07004575 [P_MAX_CMD] = { 0, NULL },
4576 };
4577 if (cmd > P_MAX_CMD || asender_tbl[cmd].process == NULL)
4578 return NULL;
4579 return &asender_tbl[cmd];
4580}
4581
Philipp Reisner32862ec2011-02-08 16:41:01 +01004582static int _drbd_process_done_ee(int vnr, void *p, void *data)
4583{
4584 struct drbd_conf *mdev = (struct drbd_conf *)p;
4585 return !drbd_process_done_ee(mdev);
4586}
4587
4588static int _check_ee_empty(int vnr, void *p, void *data)
4589{
4590 struct drbd_conf *mdev = (struct drbd_conf *)p;
4591 struct drbd_tconn *tconn = mdev->tconn;
4592 int not_empty;
4593
4594 spin_lock_irq(&tconn->req_lock);
4595 not_empty = !list_empty(&mdev->done_ee);
4596 spin_unlock_irq(&tconn->req_lock);
4597
4598 return not_empty;
4599}
4600
4601static int tconn_process_done_ee(struct drbd_tconn *tconn)
4602{
4603 int not_empty, err;
4604
4605 do {
4606 clear_bit(SIGNAL_ASENDER, &tconn->flags);
4607 flush_signals(current);
4608 err = idr_for_each(&tconn->volumes, _drbd_process_done_ee, NULL);
4609 if (err)
4610 return err;
4611 set_bit(SIGNAL_ASENDER, &tconn->flags);
4612 not_empty = idr_for_each(&tconn->volumes, _check_ee_empty, NULL);
4613 } while (not_empty);
4614
4615 return 0;
4616}
4617
Philipp Reisnerb411b362009-09-25 16:07:19 -07004618int drbd_asender(struct drbd_thread *thi)
4619{
Philipp Reisner392c8802011-02-09 10:33:31 +01004620 struct drbd_tconn *tconn = thi->tconn;
Philipp Reisner32862ec2011-02-08 16:41:01 +01004621 struct p_header *h = &tconn->meta.rbuf.header;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004622 struct asender_cmd *cmd = NULL;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004623 struct packet_info pi;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004624 int rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004625 void *buf = h;
4626 int received = 0;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004627 int expect = sizeof(struct p_header);
Lars Ellenbergf36af182011-03-09 22:44:55 +01004628 int ping_timeout_active = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004629
Philipp Reisnerb411b362009-09-25 16:07:19 -07004630 current->policy = SCHED_RR; /* Make this a realtime task! */
4631 current->rt_priority = 2; /* more important than all other tasks */
4632
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +01004633 while (get_t_state(thi) == RUNNING) {
Philipp Reisner80822282011-02-08 12:46:30 +01004634 drbd_thread_current_set_cpu(thi);
Philipp Reisner32862ec2011-02-08 16:41:01 +01004635 if (test_and_clear_bit(SEND_PING, &tconn->flags)) {
Philipp Reisner2a67d8b2011-02-09 14:10:32 +01004636 if (!drbd_send_ping(tconn)) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004637 conn_err(tconn, "drbd_send_ping has failed\n");
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01004638 goto reconnect;
4639 }
Philipp Reisner32862ec2011-02-08 16:41:01 +01004640 tconn->meta.socket->sk->sk_rcvtimeo =
4641 tconn->net_conf->ping_timeo*HZ/10;
Lars Ellenbergf36af182011-03-09 22:44:55 +01004642 ping_timeout_active = 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004643 }
4644
Philipp Reisner32862ec2011-02-08 16:41:01 +01004645 /* TODO: conditionally cork; it may hurt latency if we cork without
4646 much to send */
4647 if (!tconn->net_conf->no_cork)
4648 drbd_tcp_cork(tconn->meta.socket);
4649 if (tconn_process_done_ee(tconn))
4650 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004651 /* but unconditionally uncork unless disabled */
Philipp Reisner32862ec2011-02-08 16:41:01 +01004652 if (!tconn->net_conf->no_cork)
4653 drbd_tcp_uncork(tconn->meta.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004654
4655 /* short circuit, recv_msg would return EINTR anyways. */
4656 if (signal_pending(current))
4657 continue;
4658
Philipp Reisner32862ec2011-02-08 16:41:01 +01004659 rv = drbd_recv_short(tconn->meta.socket, buf, expect-received, 0);
4660 clear_bit(SIGNAL_ASENDER, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004661
4662 flush_signals(current);
4663
4664 /* Note:
4665 * -EINTR (on meta) we got a signal
4666 * -EAGAIN (on meta) rcvtimeo expired
4667 * -ECONNRESET other side closed the connection
4668 * -ERESTARTSYS (on data) we got a signal
4669 * rv < 0 other than above: unexpected error!
4670 * rv == expected: full header or command
4671 * rv < expected: "woken" by signal during receive
4672 * rv == 0 : "connection shut down by peer"
4673 */
4674 if (likely(rv > 0)) {
4675 received += rv;
4676 buf += rv;
4677 } else if (rv == 0) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004678 conn_err(tconn, "meta connection shut down by peer.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004679 goto reconnect;
4680 } else if (rv == -EAGAIN) {
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02004681 /* If the data socket received something meanwhile,
4682 * that is good enough: peer is still alive. */
Philipp Reisner32862ec2011-02-08 16:41:01 +01004683 if (time_after(tconn->last_received,
4684 jiffies - tconn->meta.socket->sk->sk_rcvtimeo))
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02004685 continue;
Lars Ellenbergf36af182011-03-09 22:44:55 +01004686 if (ping_timeout_active) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004687 conn_err(tconn, "PingAck did not arrive in time.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004688 goto reconnect;
4689 }
Philipp Reisner32862ec2011-02-08 16:41:01 +01004690 set_bit(SEND_PING, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004691 continue;
4692 } else if (rv == -EINTR) {
4693 continue;
4694 } else {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004695 conn_err(tconn, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004696 goto reconnect;
4697 }
4698
4699 if (received == expect && cmd == NULL) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004700 if (!decode_header(tconn, h, &pi))
Philipp Reisnerb411b362009-09-25 16:07:19 -07004701 goto reconnect;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004702 cmd = get_asender_cmd(pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004703 if (unlikely(cmd == NULL)) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004704 conn_err(tconn, "unknown command %d on meta (l: %d)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004705 pi.cmd, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004706 goto disconnect;
4707 }
4708 expect = cmd->pkt_size;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004709 if (pi.size != expect - sizeof(struct p_header)) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004710 conn_err(tconn, "Wrong packet size on meta (c: %d, l: %d)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004711 pi.cmd, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004712 goto reconnect;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004713 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004714 }
4715 if (received == expect) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004716 tconn->last_received = jiffies;
4717 if (!cmd->process(vnr_to_mdev(tconn, pi.vnr), pi.cmd))
Philipp Reisnerb411b362009-09-25 16:07:19 -07004718 goto reconnect;
4719
Lars Ellenbergf36af182011-03-09 22:44:55 +01004720 /* the idle_timeout (ping-int)
4721 * has been restored in got_PingAck() */
4722 if (cmd == get_asender_cmd(P_PING_ACK))
4723 ping_timeout_active = 0;
4724
Philipp Reisnerb411b362009-09-25 16:07:19 -07004725 buf = h;
4726 received = 0;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004727 expect = sizeof(struct p_header);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004728 cmd = NULL;
4729 }
4730 }
4731
4732 if (0) {
4733reconnect:
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004734 conn_request_state(tconn, NS(conn, C_NETWORK_FAILURE), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004735 }
4736 if (0) {
4737disconnect:
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004738 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004739 }
Philipp Reisner32862ec2011-02-08 16:41:01 +01004740 clear_bit(SIGNAL_ASENDER, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004741
Philipp Reisner32862ec2011-02-08 16:41:01 +01004742 conn_info(tconn, "asender terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004743
4744 return 0;
4745}