blob: 57691a3b8f3baa5f85ac48f7179b18160afba7b8 [file] [log] [blame]
Philipp Reisnerb411b362009-09-25 16:07:19 -07001/*
2 drbd_receiver.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23 */
24
25
Philipp Reisnerb411b362009-09-25 16:07:19 -070026#include <linux/module.h>
27
28#include <asm/uaccess.h>
29#include <net/sock.h>
30
Philipp Reisnerb411b362009-09-25 16:07:19 -070031#include <linux/drbd.h>
32#include <linux/fs.h>
33#include <linux/file.h>
34#include <linux/in.h>
35#include <linux/mm.h>
36#include <linux/memcontrol.h>
37#include <linux/mm_inline.h>
38#include <linux/slab.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070039#include <linux/pkt_sched.h>
40#define __KERNEL_SYSCALLS__
41#include <linux/unistd.h>
42#include <linux/vmalloc.h>
43#include <linux/random.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070044#include <linux/string.h>
45#include <linux/scatterlist.h>
46#include "drbd_int.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070047#include "drbd_req.h"
48
49#include "drbd_vli.h"
50
Philipp Reisner77351055b2011-02-07 17:24:26 +010051struct packet_info {
52 enum drbd_packet cmd;
53 int size;
54 int vnr;
55};
56
Philipp Reisnerb411b362009-09-25 16:07:19 -070057enum finish_epoch {
58 FE_STILL_LIVE,
59 FE_DESTROYED,
60 FE_RECYCLED,
61};
62
Philipp Reisner65d11ed2011-02-07 17:35:59 +010063static int drbd_do_handshake(struct drbd_tconn *tconn);
Philipp Reisner13e60372011-02-08 09:54:40 +010064static int drbd_do_auth(struct drbd_tconn *tconn);
Philipp Reisner360cc742011-02-08 14:29:53 +010065static int drbd_disconnected(int vnr, void *p, void *data);
Philipp Reisnerb411b362009-09-25 16:07:19 -070066
67static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *, struct drbd_epoch *, enum epoch_event);
Philipp Reisner00d56942011-02-09 18:09:48 +010068static int e_end_block(struct drbd_work *, int);
Philipp Reisnerb411b362009-09-25 16:07:19 -070069
Philipp Reisnerb411b362009-09-25 16:07:19 -070070
71#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
72
Lars Ellenberg45bb9122010-05-14 17:10:48 +020073/*
74 * some helper functions to deal with single linked page lists,
75 * page->private being our "next" pointer.
76 */
77
78/* If at least n pages are linked at head, get n pages off.
79 * Otherwise, don't modify head, and return NULL.
80 * Locking is the responsibility of the caller.
81 */
82static struct page *page_chain_del(struct page **head, int n)
83{
84 struct page *page;
85 struct page *tmp;
86
87 BUG_ON(!n);
88 BUG_ON(!head);
89
90 page = *head;
Philipp Reisner23ce4222010-05-20 13:35:31 +020091
92 if (!page)
93 return NULL;
94
Lars Ellenberg45bb9122010-05-14 17:10:48 +020095 while (page) {
96 tmp = page_chain_next(page);
97 if (--n == 0)
98 break; /* found sufficient pages */
99 if (tmp == NULL)
100 /* insufficient pages, don't use any of them. */
101 return NULL;
102 page = tmp;
103 }
104
105 /* add end of list marker for the returned list */
106 set_page_private(page, 0);
107 /* actual return value, and adjustment of head */
108 page = *head;
109 *head = tmp;
110 return page;
111}
112
113/* may be used outside of locks to find the tail of a (usually short)
114 * "private" page chain, before adding it back to a global chain head
115 * with page_chain_add() under a spinlock. */
116static struct page *page_chain_tail(struct page *page, int *len)
117{
118 struct page *tmp;
119 int i = 1;
120 while ((tmp = page_chain_next(page)))
121 ++i, page = tmp;
122 if (len)
123 *len = i;
124 return page;
125}
126
127static int page_chain_free(struct page *page)
128{
129 struct page *tmp;
130 int i = 0;
131 page_chain_for_each_safe(page, tmp) {
132 put_page(page);
133 ++i;
134 }
135 return i;
136}
137
138static void page_chain_add(struct page **head,
139 struct page *chain_first, struct page *chain_last)
140{
141#if 1
142 struct page *tmp;
143 tmp = page_chain_tail(chain_first, NULL);
144 BUG_ON(tmp != chain_last);
145#endif
146
147 /* add chain to head */
148 set_page_private(chain_last, (unsigned long)*head);
149 *head = chain_first;
150}
151
152static struct page *drbd_pp_first_pages_or_try_alloc(struct drbd_conf *mdev, int number)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700153{
154 struct page *page = NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200155 struct page *tmp = NULL;
156 int i = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700157
158 /* Yes, testing drbd_pp_vacant outside the lock is racy.
159 * So what. It saves a spin_lock. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200160 if (drbd_pp_vacant >= number) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700161 spin_lock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200162 page = page_chain_del(&drbd_pp_pool, number);
163 if (page)
164 drbd_pp_vacant -= number;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700165 spin_unlock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200166 if (page)
167 return page;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700168 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200169
Philipp Reisnerb411b362009-09-25 16:07:19 -0700170 /* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD
171 * "criss-cross" setup, that might cause write-out on some other DRBD,
172 * which in turn might block on the other node at this very place. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200173 for (i = 0; i < number; i++) {
174 tmp = alloc_page(GFP_TRY);
175 if (!tmp)
176 break;
177 set_page_private(tmp, (unsigned long)page);
178 page = tmp;
179 }
180
181 if (i == number)
182 return page;
183
184 /* Not enough pages immediately available this time.
185 * No need to jump around here, drbd_pp_alloc will retry this
186 * function "soon". */
187 if (page) {
188 tmp = page_chain_tail(page, NULL);
189 spin_lock(&drbd_pp_lock);
190 page_chain_add(&drbd_pp_pool, page, tmp);
191 drbd_pp_vacant += i;
192 spin_unlock(&drbd_pp_lock);
193 }
194 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700195}
196
Philipp Reisnerb411b362009-09-25 16:07:19 -0700197static void reclaim_net_ee(struct drbd_conf *mdev, struct list_head *to_be_freed)
198{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100199 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700200 struct list_head *le, *tle;
201
202 /* The EEs are always appended to the end of the list. Since
203 they are sent in order over the wire, they have to finish
204 in order. As soon as we see the first not finished we can
205 stop to examine the list... */
206
207 list_for_each_safe(le, tle, &mdev->net_ee) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100208 peer_req = list_entry(le, struct drbd_peer_request, w.list);
209 if (drbd_ee_has_active_page(peer_req))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700210 break;
211 list_move(le, to_be_freed);
212 }
213}
214
215static void drbd_kick_lo_and_reclaim_net(struct drbd_conf *mdev)
216{
217 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100218 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700219
Philipp Reisner87eeee42011-01-19 14:16:30 +0100220 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700221 reclaim_net_ee(mdev, &reclaimed);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100222 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700223
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100224 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
225 drbd_free_net_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700226}
227
228/**
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200229 * drbd_pp_alloc() - Returns @number pages, retries forever (or until signalled)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700230 * @mdev: DRBD device.
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200231 * @number: number of pages requested
232 * @retry: whether to retry, if not enough pages are available right now
Philipp Reisnerb411b362009-09-25 16:07:19 -0700233 *
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200234 * Tries to allocate number pages, first from our own page pool, then from
235 * the kernel, unless this allocation would exceed the max_buffers setting.
236 * Possibly retry until DRBD frees sufficient pages somewhere else.
237 *
238 * Returns a page chain linked via page->private.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700239 */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200240static struct page *drbd_pp_alloc(struct drbd_conf *mdev, unsigned number, bool retry)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700241{
242 struct page *page = NULL;
243 DEFINE_WAIT(wait);
244
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200245 /* Yes, we may run up to @number over max_buffers. If we
246 * follow it strictly, the admin will get it wrong anyways. */
Philipp Reisner89e58e72011-01-19 13:12:45 +0100247 if (atomic_read(&mdev->pp_in_use) < mdev->tconn->net_conf->max_buffers)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200248 page = drbd_pp_first_pages_or_try_alloc(mdev, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700249
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200250 while (page == NULL) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700251 prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
252
253 drbd_kick_lo_and_reclaim_net(mdev);
254
Philipp Reisner89e58e72011-01-19 13:12:45 +0100255 if (atomic_read(&mdev->pp_in_use) < mdev->tconn->net_conf->max_buffers) {
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200256 page = drbd_pp_first_pages_or_try_alloc(mdev, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700257 if (page)
258 break;
259 }
260
261 if (!retry)
262 break;
263
264 if (signal_pending(current)) {
265 dev_warn(DEV, "drbd_pp_alloc interrupted!\n");
266 break;
267 }
268
269 schedule();
270 }
271 finish_wait(&drbd_pp_wait, &wait);
272
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200273 if (page)
274 atomic_add(number, &mdev->pp_in_use);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700275 return page;
276}
277
278/* Must not be used from irq, as that may deadlock: see drbd_pp_alloc.
Philipp Reisner87eeee42011-01-19 14:16:30 +0100279 * Is also used from inside an other spin_lock_irq(&mdev->tconn->req_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200280 * Either links the page chain back to the global pool,
281 * or returns all pages to the system. */
Lars Ellenberg435f0742010-09-06 12:30:25 +0200282static void drbd_pp_free(struct drbd_conf *mdev, struct page *page, int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700283{
Lars Ellenberg435f0742010-09-06 12:30:25 +0200284 atomic_t *a = is_net ? &mdev->pp_in_use_by_net : &mdev->pp_in_use;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700285 int i;
Lars Ellenberg435f0742010-09-06 12:30:25 +0200286
Philipp Reisner81a5d602011-02-22 19:53:16 -0500287 if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200288 i = page_chain_free(page);
289 else {
290 struct page *tmp;
291 tmp = page_chain_tail(page, &i);
292 spin_lock(&drbd_pp_lock);
293 page_chain_add(&drbd_pp_pool, page, tmp);
294 drbd_pp_vacant += i;
295 spin_unlock(&drbd_pp_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700296 }
Lars Ellenberg435f0742010-09-06 12:30:25 +0200297 i = atomic_sub_return(i, a);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200298 if (i < 0)
Lars Ellenberg435f0742010-09-06 12:30:25 +0200299 dev_warn(DEV, "ASSERTION FAILED: %s: %d < 0\n",
300 is_net ? "pp_in_use_by_net" : "pp_in_use", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700301 wake_up(&drbd_pp_wait);
302}
303
304/*
305You need to hold the req_lock:
306 _drbd_wait_ee_list_empty()
307
308You must not have the req_lock:
309 drbd_free_ee()
310 drbd_alloc_ee()
311 drbd_init_ee()
312 drbd_release_ee()
313 drbd_ee_fix_bhs()
314 drbd_process_done_ee()
315 drbd_clear_done_ee()
316 drbd_wait_ee_list_empty()
317*/
318
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100319struct drbd_peer_request *
320drbd_alloc_ee(struct drbd_conf *mdev, u64 id, sector_t sector,
321 unsigned int data_size, gfp_t gfp_mask) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700322{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100323 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700324 struct page *page;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200325 unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700326
Andreas Gruenbacher0cf9d272010-12-07 10:43:29 +0100327 if (drbd_insert_fault(mdev, DRBD_FAULT_AL_EE))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700328 return NULL;
329
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100330 peer_req = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
331 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700332 if (!(gfp_mask & __GFP_NOWARN))
333 dev_err(DEV, "alloc_ee: Allocation of an EE failed\n");
334 return NULL;
335 }
336
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200337 page = drbd_pp_alloc(mdev, nr_pages, (gfp_mask & __GFP_WAIT));
338 if (!page)
339 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700340
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100341 drbd_clear_interval(&peer_req->i);
342 peer_req->i.size = data_size;
343 peer_req->i.sector = sector;
344 peer_req->i.local = false;
345 peer_req->i.waiting = false;
Andreas Gruenbacher53840642011-01-28 10:31:04 +0100346
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100347 peer_req->epoch = NULL;
Philipp Reisnera21e9292011-02-08 15:08:49 +0100348 peer_req->w.mdev = mdev;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100349 peer_req->pages = page;
350 atomic_set(&peer_req->pending_bios, 0);
351 peer_req->flags = 0;
Andreas Gruenbacher9a8e7752011-01-11 14:04:09 +0100352 /*
353 * The block_id is opaque to the receiver. It is not endianness
354 * converted, and sent back to the sender unchanged.
355 */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100356 peer_req->block_id = id;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700357
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100358 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700359
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200360 fail:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100361 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700362 return NULL;
363}
364
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100365void drbd_free_some_ee(struct drbd_conf *mdev, struct drbd_peer_request *peer_req,
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100366 int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700367{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100368 if (peer_req->flags & EE_HAS_DIGEST)
369 kfree(peer_req->digest);
370 drbd_pp_free(mdev, peer_req->pages, is_net);
371 D_ASSERT(atomic_read(&peer_req->pending_bios) == 0);
372 D_ASSERT(drbd_interval_empty(&peer_req->i));
373 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700374}
375
376int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list)
377{
378 LIST_HEAD(work_list);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100379 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700380 int count = 0;
Lars Ellenberg435f0742010-09-06 12:30:25 +0200381 int is_net = list == &mdev->net_ee;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700382
Philipp Reisner87eeee42011-01-19 14:16:30 +0100383 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700384 list_splice_init(list, &work_list);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100385 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700386
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100387 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
388 drbd_free_some_ee(mdev, peer_req, is_net);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700389 count++;
390 }
391 return count;
392}
393
394
Philipp Reisner32862ec2011-02-08 16:41:01 +0100395/* See also comments in _req_mod(,BARRIER_ACKED)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700396 * and receive_Barrier.
397 *
398 * Move entries from net_ee to done_ee, if ready.
399 * Grab done_ee, call all callbacks, free the entries.
400 * The callbacks typically send out ACKs.
401 */
402static int drbd_process_done_ee(struct drbd_conf *mdev)
403{
404 LIST_HEAD(work_list);
405 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100406 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700407 int ok = (mdev->state.conn >= C_WF_REPORT_PARAMS);
408
Philipp Reisner87eeee42011-01-19 14:16:30 +0100409 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700410 reclaim_net_ee(mdev, &reclaimed);
411 list_splice_init(&mdev->done_ee, &work_list);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100412 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700413
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100414 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
415 drbd_free_net_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700416
417 /* possible callbacks here:
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +0100418 * e_end_block, and e_end_resync_block, e_send_discard_write.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700419 * all ignore the last argument.
420 */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100421 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700422 /* list_del not necessary, next/prev members not touched */
Philipp Reisner00d56942011-02-09 18:09:48 +0100423 ok = peer_req->w.cb(&peer_req->w, !ok) && ok;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100424 drbd_free_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700425 }
426 wake_up(&mdev->ee_wait);
427
428 return ok;
429}
430
431void _drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head)
432{
433 DEFINE_WAIT(wait);
434
435 /* avoids spin_lock/unlock
436 * and calling prepare_to_wait in the fast path */
437 while (!list_empty(head)) {
438 prepare_to_wait(&mdev->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100439 spin_unlock_irq(&mdev->tconn->req_lock);
Jens Axboe7eaceac2011-03-10 08:52:07 +0100440 io_schedule();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700441 finish_wait(&mdev->ee_wait, &wait);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100442 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700443 }
444}
445
446void drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head)
447{
Philipp Reisner87eeee42011-01-19 14:16:30 +0100448 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700449 _drbd_wait_ee_list_empty(mdev, head);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100450 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700451}
452
453/* see also kernel_accept; which is only present since 2.6.18.
454 * also we want to log which part of it failed, exactly */
Philipp Reisner76536202011-02-07 14:09:54 +0100455static int drbd_accept(const char **what, struct socket *sock, struct socket **newsock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700456{
457 struct sock *sk = sock->sk;
458 int err = 0;
459
460 *what = "listen";
461 err = sock->ops->listen(sock, 5);
462 if (err < 0)
463 goto out;
464
465 *what = "sock_create_lite";
466 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
467 newsock);
468 if (err < 0)
469 goto out;
470
471 *what = "accept";
472 err = sock->ops->accept(sock, *newsock, 0);
473 if (err < 0) {
474 sock_release(*newsock);
475 *newsock = NULL;
476 goto out;
477 }
478 (*newsock)->ops = sock->ops;
479
480out:
481 return err;
482}
483
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100484static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700485{
486 mm_segment_t oldfs;
487 struct kvec iov = {
488 .iov_base = buf,
489 .iov_len = size,
490 };
491 struct msghdr msg = {
492 .msg_iovlen = 1,
493 .msg_iov = (struct iovec *)&iov,
494 .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
495 };
496 int rv;
497
498 oldfs = get_fs();
499 set_fs(KERNEL_DS);
500 rv = sock_recvmsg(sock, &msg, size, msg.msg_flags);
501 set_fs(oldfs);
502
503 return rv;
504}
505
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100506static int drbd_recv(struct drbd_tconn *tconn, void *buf, size_t size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700507{
508 mm_segment_t oldfs;
509 struct kvec iov = {
510 .iov_base = buf,
511 .iov_len = size,
512 };
513 struct msghdr msg = {
514 .msg_iovlen = 1,
515 .msg_iov = (struct iovec *)&iov,
516 .msg_flags = MSG_WAITALL | MSG_NOSIGNAL
517 };
518 int rv;
519
520 oldfs = get_fs();
521 set_fs(KERNEL_DS);
522
523 for (;;) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100524 rv = sock_recvmsg(tconn->data.socket, &msg, size, msg.msg_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700525 if (rv == size)
526 break;
527
528 /* Note:
529 * ECONNRESET other side closed the connection
530 * ERESTARTSYS (on sock) we got a signal
531 */
532
533 if (rv < 0) {
534 if (rv == -ECONNRESET)
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100535 conn_info(tconn, "sock was reset by peer\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700536 else if (rv != -ERESTARTSYS)
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100537 conn_err(tconn, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700538 break;
539 } else if (rv == 0) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100540 conn_info(tconn, "sock was shut down by peer\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700541 break;
542 } else {
543 /* signal came in, or peer/link went down,
544 * after we read a partial message
545 */
546 /* D_ASSERT(signal_pending(current)); */
547 break;
548 }
549 };
550
551 set_fs(oldfs);
552
553 if (rv != size)
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100554 conn_request_state(tconn, NS(conn, C_BROKEN_PIPE), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700555
556 return rv;
557}
558
Lars Ellenberg5dbf1672010-05-25 16:18:01 +0200559/* quoting tcp(7):
560 * On individual connections, the socket buffer size must be set prior to the
561 * listen(2) or connect(2) calls in order to have it take effect.
562 * This is our wrapper to do so.
563 */
564static void drbd_setbufsize(struct socket *sock, unsigned int snd,
565 unsigned int rcv)
566{
567 /* open coded SO_SNDBUF, SO_RCVBUF */
568 if (snd) {
569 sock->sk->sk_sndbuf = snd;
570 sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
571 }
572 if (rcv) {
573 sock->sk->sk_rcvbuf = rcv;
574 sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
575 }
576}
577
Philipp Reisnereac3e992011-02-07 14:05:07 +0100578static struct socket *drbd_try_connect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700579{
580 const char *what;
581 struct socket *sock;
582 struct sockaddr_in6 src_in6;
583 int err;
584 int disconnect_on_error = 1;
585
Philipp Reisnereac3e992011-02-07 14:05:07 +0100586 if (!get_net_conf(tconn))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700587 return NULL;
588
589 what = "sock_create_kern";
Philipp Reisnereac3e992011-02-07 14:05:07 +0100590 err = sock_create_kern(((struct sockaddr *)tconn->net_conf->my_addr)->sa_family,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700591 SOCK_STREAM, IPPROTO_TCP, &sock);
592 if (err < 0) {
593 sock = NULL;
594 goto out;
595 }
596
597 sock->sk->sk_rcvtimeo =
Philipp Reisnereac3e992011-02-07 14:05:07 +0100598 sock->sk->sk_sndtimeo = tconn->net_conf->try_connect_int*HZ;
599 drbd_setbufsize(sock, tconn->net_conf->sndbuf_size,
600 tconn->net_conf->rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700601
602 /* explicitly bind to the configured IP as source IP
603 * for the outgoing connections.
604 * This is needed for multihomed hosts and to be
605 * able to use lo: interfaces for drbd.
606 * Make sure to use 0 as port number, so linux selects
607 * a free one dynamically.
608 */
Philipp Reisnereac3e992011-02-07 14:05:07 +0100609 memcpy(&src_in6, tconn->net_conf->my_addr,
610 min_t(int, tconn->net_conf->my_addr_len, sizeof(src_in6)));
611 if (((struct sockaddr *)tconn->net_conf->my_addr)->sa_family == AF_INET6)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700612 src_in6.sin6_port = 0;
613 else
614 ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
615
616 what = "bind before connect";
617 err = sock->ops->bind(sock,
618 (struct sockaddr *) &src_in6,
Philipp Reisnereac3e992011-02-07 14:05:07 +0100619 tconn->net_conf->my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700620 if (err < 0)
621 goto out;
622
623 /* connect may fail, peer not yet available.
624 * stay C_WF_CONNECTION, don't go Disconnecting! */
625 disconnect_on_error = 0;
626 what = "connect";
627 err = sock->ops->connect(sock,
Philipp Reisnereac3e992011-02-07 14:05:07 +0100628 (struct sockaddr *)tconn->net_conf->peer_addr,
629 tconn->net_conf->peer_addr_len, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700630
631out:
632 if (err < 0) {
633 if (sock) {
634 sock_release(sock);
635 sock = NULL;
636 }
637 switch (-err) {
638 /* timeout, busy, signal pending */
639 case ETIMEDOUT: case EAGAIN: case EINPROGRESS:
640 case EINTR: case ERESTARTSYS:
641 /* peer not (yet) available, network problem */
642 case ECONNREFUSED: case ENETUNREACH:
643 case EHOSTDOWN: case EHOSTUNREACH:
644 disconnect_on_error = 0;
645 break;
646 default:
Philipp Reisnereac3e992011-02-07 14:05:07 +0100647 conn_err(tconn, "%s failed, err = %d\n", what, err);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700648 }
649 if (disconnect_on_error)
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100650 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700651 }
Philipp Reisnereac3e992011-02-07 14:05:07 +0100652 put_net_conf(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700653 return sock;
654}
655
Philipp Reisner76536202011-02-07 14:09:54 +0100656static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700657{
658 int timeo, err;
659 struct socket *s_estab = NULL, *s_listen;
660 const char *what;
661
Philipp Reisner76536202011-02-07 14:09:54 +0100662 if (!get_net_conf(tconn))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700663 return NULL;
664
665 what = "sock_create_kern";
Philipp Reisner76536202011-02-07 14:09:54 +0100666 err = sock_create_kern(((struct sockaddr *)tconn->net_conf->my_addr)->sa_family,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700667 SOCK_STREAM, IPPROTO_TCP, &s_listen);
668 if (err) {
669 s_listen = NULL;
670 goto out;
671 }
672
Philipp Reisner76536202011-02-07 14:09:54 +0100673 timeo = tconn->net_conf->try_connect_int * HZ;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700674 timeo += (random32() & 1) ? timeo / 7 : -timeo / 7; /* 28.5% random jitter */
675
676 s_listen->sk->sk_reuse = 1; /* SO_REUSEADDR */
677 s_listen->sk->sk_rcvtimeo = timeo;
678 s_listen->sk->sk_sndtimeo = timeo;
Philipp Reisner76536202011-02-07 14:09:54 +0100679 drbd_setbufsize(s_listen, tconn->net_conf->sndbuf_size,
680 tconn->net_conf->rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700681
682 what = "bind before listen";
683 err = s_listen->ops->bind(s_listen,
Philipp Reisner76536202011-02-07 14:09:54 +0100684 (struct sockaddr *) tconn->net_conf->my_addr,
685 tconn->net_conf->my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700686 if (err < 0)
687 goto out;
688
Philipp Reisner76536202011-02-07 14:09:54 +0100689 err = drbd_accept(&what, s_listen, &s_estab);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700690
691out:
692 if (s_listen)
693 sock_release(s_listen);
694 if (err < 0) {
695 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
Philipp Reisner76536202011-02-07 14:09:54 +0100696 conn_err(tconn, "%s failed, err = %d\n", what, err);
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100697 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700698 }
699 }
Philipp Reisner76536202011-02-07 14:09:54 +0100700 put_net_conf(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700701
702 return s_estab;
703}
704
Philipp Reisnerd38e7872011-02-07 15:32:04 +0100705static int drbd_send_fp(struct drbd_tconn *tconn, struct socket *sock, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700706{
Philipp Reisnerd38e7872011-02-07 15:32:04 +0100707 struct p_header *h = &tconn->data.sbuf.header;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700708
Philipp Reisnerd38e7872011-02-07 15:32:04 +0100709 return _conn_send_cmd(tconn, 0, sock, cmd, h, sizeof(*h), 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700710}
711
Philipp Reisnera25b63f2011-02-07 15:43:45 +0100712static enum drbd_packet drbd_recv_fp(struct drbd_tconn *tconn, struct socket *sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700713{
Philipp Reisnera25b63f2011-02-07 15:43:45 +0100714 struct p_header80 *h = &tconn->data.rbuf.header.h80;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700715 int rr;
716
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100717 rr = drbd_recv_short(sock, h, sizeof(*h), 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700718
Andreas Gruenbacherca9bc122011-01-11 13:47:24 +0100719 if (rr == sizeof(*h) && h->magic == cpu_to_be32(DRBD_MAGIC))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700720 return be16_to_cpu(h->command);
721
722 return 0xffff;
723}
724
725/**
726 * drbd_socket_okay() - Free the socket if its connection is not okay
Philipp Reisnerb411b362009-09-25 16:07:19 -0700727 * @sock: pointer to the pointer to the socket.
728 */
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100729static int drbd_socket_okay(struct socket **sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700730{
731 int rr;
732 char tb[4];
733
734 if (!*sock)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100735 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700736
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100737 rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700738
739 if (rr > 0 || rr == -EAGAIN) {
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100740 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700741 } else {
742 sock_release(*sock);
743 *sock = NULL;
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100744 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700745 }
746}
747
Philipp Reisner907599e2011-02-08 11:25:37 +0100748static int drbd_connected(int vnr, void *p, void *data)
749{
750 struct drbd_conf *mdev = (struct drbd_conf *)p;
751 int ok = 1;
752
753 atomic_set(&mdev->packet_seq, 0);
754 mdev->peer_seq = 0;
755
Philipp Reisner8410da82011-02-11 20:11:10 +0100756 mdev->state_mutex = mdev->tconn->agreed_pro_version < 100 ?
757 &mdev->tconn->cstate_mutex :
758 &mdev->own_state_mutex;
759
Philipp Reisner907599e2011-02-08 11:25:37 +0100760 ok &= drbd_send_sync_param(mdev, &mdev->sync_conf);
761 ok &= drbd_send_sizes(mdev, 0, 0);
762 ok &= drbd_send_uuids(mdev);
763 ok &= drbd_send_state(mdev);
764 clear_bit(USE_DEGR_WFC_T, &mdev->flags);
765 clear_bit(RESIZE_PENDING, &mdev->flags);
766
Philipp Reisner8410da82011-02-11 20:11:10 +0100767
Philipp Reisner907599e2011-02-08 11:25:37 +0100768 return !ok;
769}
770
Philipp Reisnerb411b362009-09-25 16:07:19 -0700771/*
772 * return values:
773 * 1 yes, we have a valid connection
774 * 0 oops, did not work out, please try again
775 * -1 peer talks different language,
776 * no point in trying again, please go standalone.
777 * -2 We do not have a network config...
778 */
Philipp Reisner907599e2011-02-08 11:25:37 +0100779static int drbd_connect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700780{
781 struct socket *s, *sock, *msock;
782 int try, h, ok;
783
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100784 if (conn_request_state(tconn, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700785 return -2;
786
Philipp Reisner907599e2011-02-08 11:25:37 +0100787 clear_bit(DISCARD_CONCURRENT, &tconn->flags);
788 tconn->agreed_pro_version = 99;
Philipp Reisnerfd340c12011-01-19 16:57:39 +0100789 /* agreed_pro_version must be smaller than 100 so we send the old
790 header (h80) in the first packet and in the handshake packet. */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700791
792 sock = NULL;
793 msock = NULL;
794
795 do {
796 for (try = 0;;) {
797 /* 3 tries, this should take less than a second! */
Philipp Reisner907599e2011-02-08 11:25:37 +0100798 s = drbd_try_connect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700799 if (s || ++try >= 3)
800 break;
801 /* give the other side time to call bind() & listen() */
Philipp Reisner20ee6392011-01-18 15:28:59 +0100802 schedule_timeout_interruptible(HZ / 10);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700803 }
804
805 if (s) {
806 if (!sock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100807 drbd_send_fp(tconn, s, P_HAND_SHAKE_S);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700808 sock = s;
809 s = NULL;
810 } else if (!msock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100811 drbd_send_fp(tconn, s, P_HAND_SHAKE_M);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700812 msock = s;
813 s = NULL;
814 } else {
Philipp Reisner907599e2011-02-08 11:25:37 +0100815 conn_err(tconn, "Logic error in drbd_connect()\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700816 goto out_release_sockets;
817 }
818 }
819
820 if (sock && msock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100821 schedule_timeout_interruptible(tconn->net_conf->ping_timeo*HZ/10);
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100822 ok = drbd_socket_okay(&sock);
823 ok = drbd_socket_okay(&msock) && ok;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700824 if (ok)
825 break;
826 }
827
828retry:
Philipp Reisner907599e2011-02-08 11:25:37 +0100829 s = drbd_wait_for_connect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700830 if (s) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100831 try = drbd_recv_fp(tconn, s);
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100832 drbd_socket_okay(&sock);
833 drbd_socket_okay(&msock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700834 switch (try) {
835 case P_HAND_SHAKE_S:
836 if (sock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100837 conn_warn(tconn, "initial packet S crossed\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700838 sock_release(sock);
839 }
840 sock = s;
841 break;
842 case P_HAND_SHAKE_M:
843 if (msock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100844 conn_warn(tconn, "initial packet M crossed\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700845 sock_release(msock);
846 }
847 msock = s;
Philipp Reisner907599e2011-02-08 11:25:37 +0100848 set_bit(DISCARD_CONCURRENT, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700849 break;
850 default:
Philipp Reisner907599e2011-02-08 11:25:37 +0100851 conn_warn(tconn, "Error receiving initial packet\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700852 sock_release(s);
853 if (random32() & 1)
854 goto retry;
855 }
856 }
857
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100858 if (tconn->cstate <= C_DISCONNECTING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700859 goto out_release_sockets;
860 if (signal_pending(current)) {
861 flush_signals(current);
862 smp_rmb();
Philipp Reisner907599e2011-02-08 11:25:37 +0100863 if (get_t_state(&tconn->receiver) == EXITING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700864 goto out_release_sockets;
865 }
866
867 if (sock && msock) {
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100868 ok = drbd_socket_okay(&sock);
869 ok = drbd_socket_okay(&msock) && ok;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700870 if (ok)
871 break;
872 }
873 } while (1);
874
875 msock->sk->sk_reuse = 1; /* SO_REUSEADDR */
876 sock->sk->sk_reuse = 1; /* SO_REUSEADDR */
877
878 sock->sk->sk_allocation = GFP_NOIO;
879 msock->sk->sk_allocation = GFP_NOIO;
880
881 sock->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
882 msock->sk->sk_priority = TC_PRIO_INTERACTIVE;
883
Philipp Reisnerb411b362009-09-25 16:07:19 -0700884 /* NOT YET ...
Philipp Reisner907599e2011-02-08 11:25:37 +0100885 * sock->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700886 * sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
887 * first set it to the P_HAND_SHAKE timeout,
888 * which we set to 4x the configured ping_timeout. */
889 sock->sk->sk_sndtimeo =
Philipp Reisner907599e2011-02-08 11:25:37 +0100890 sock->sk->sk_rcvtimeo = tconn->net_conf->ping_timeo*4*HZ/10;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700891
Philipp Reisner907599e2011-02-08 11:25:37 +0100892 msock->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10;
893 msock->sk->sk_rcvtimeo = tconn->net_conf->ping_int*HZ;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700894
895 /* we don't want delays.
Lucas De Marchi25985ed2011-03-30 22:57:33 -0300896 * we use TCP_CORK where appropriate, though */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700897 drbd_tcp_nodelay(sock);
898 drbd_tcp_nodelay(msock);
899
Philipp Reisner907599e2011-02-08 11:25:37 +0100900 tconn->data.socket = sock;
901 tconn->meta.socket = msock;
902 tconn->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700903
Philipp Reisner907599e2011-02-08 11:25:37 +0100904 h = drbd_do_handshake(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700905 if (h <= 0)
906 return h;
907
Philipp Reisner907599e2011-02-08 11:25:37 +0100908 if (tconn->cram_hmac_tfm) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700909 /* drbd_request_state(mdev, NS(conn, WFAuth)); */
Philipp Reisner907599e2011-02-08 11:25:37 +0100910 switch (drbd_do_auth(tconn)) {
Johannes Thomab10d96c2010-01-07 16:02:50 +0100911 case -1:
Philipp Reisner907599e2011-02-08 11:25:37 +0100912 conn_err(tconn, "Authentication of peer failed\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700913 return -1;
Johannes Thomab10d96c2010-01-07 16:02:50 +0100914 case 0:
Philipp Reisner907599e2011-02-08 11:25:37 +0100915 conn_err(tconn, "Authentication of peer failed, trying again.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +0100916 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700917 }
918 }
919
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100920 if (conn_request_state(tconn, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE) < SS_SUCCESS)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700921 return 0;
922
Philipp Reisner907599e2011-02-08 11:25:37 +0100923 sock->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700924 sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
925
Philipp Reisner907599e2011-02-08 11:25:37 +0100926 drbd_thread_start(&tconn->asender);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700927
Philipp Reisner907599e2011-02-08 11:25:37 +0100928 if (drbd_send_protocol(tconn) == -1)
Philipp Reisner7e2455c2010-04-22 14:50:23 +0200929 return -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700930
Philipp Reisner907599e2011-02-08 11:25:37 +0100931 return !idr_for_each(&tconn->volumes, drbd_connected, tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700932
933out_release_sockets:
934 if (sock)
935 sock_release(sock);
936 if (msock)
937 sock_release(msock);
938 return -1;
939}
940
Philipp Reisnerce243852011-02-07 17:27:47 +0100941static bool decode_header(struct drbd_tconn *tconn, struct p_header *h, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700942{
Philipp Reisnerfd340c12011-01-19 16:57:39 +0100943 if (h->h80.magic == cpu_to_be32(DRBD_MAGIC)) {
Philipp Reisner77351055b2011-02-07 17:24:26 +0100944 pi->cmd = be16_to_cpu(h->h80.command);
945 pi->size = be16_to_cpu(h->h80.length);
Philipp Reisnereefc2f72011-02-08 12:55:24 +0100946 pi->vnr = 0;
Andreas Gruenbacherca9bc122011-01-11 13:47:24 +0100947 } else if (h->h95.magic == cpu_to_be16(DRBD_MAGIC_BIG)) {
Philipp Reisner77351055b2011-02-07 17:24:26 +0100948 pi->cmd = be16_to_cpu(h->h95.command);
949 pi->size = be32_to_cpu(h->h95.length) & 0x00ffffff;
950 pi->vnr = 0;
Philipp Reisner02918be2010-08-20 14:35:10 +0200951 } else {
Philipp Reisnerce243852011-02-07 17:27:47 +0100952 conn_err(tconn, "magic?? on data m: 0x%08x c: %d l: %d\n",
Lars Ellenberg004352f2010-10-05 20:13:58 +0200953 be32_to_cpu(h->h80.magic),
954 be16_to_cpu(h->h80.command),
955 be16_to_cpu(h->h80.length));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100956 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700957 }
Philipp Reisner257d0af2011-01-26 12:15:29 +0100958 return true;
959}
960
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100961static int drbd_recv_header(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisner257d0af2011-01-26 12:15:29 +0100962{
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100963 struct p_header *h = &tconn->data.rbuf.header;
Philipp Reisner257d0af2011-01-26 12:15:29 +0100964 int r;
965
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100966 r = drbd_recv(tconn, h, sizeof(*h));
Philipp Reisner257d0af2011-01-26 12:15:29 +0100967 if (unlikely(r != sizeof(*h))) {
968 if (!signal_pending(current))
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100969 conn_warn(tconn, "short read expecting header on sock: r=%d\n", r);
Philipp Reisner257d0af2011-01-26 12:15:29 +0100970 return false;
971 }
972
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100973 r = decode_header(tconn, h, pi);
974 tconn->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700975
Philipp Reisner257d0af2011-01-26 12:15:29 +0100976 return r;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700977}
978
Philipp Reisner2451fc32010-08-24 13:43:11 +0200979static void drbd_flush(struct drbd_conf *mdev)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700980{
981 int rv;
982
983 if (mdev->write_ordering >= WO_bdev_flush && get_ldev(mdev)) {
Dmitry Monakhovfbd9b092010-04-28 17:55:06 +0400984 rv = blkdev_issue_flush(mdev->ldev->backing_bdev, GFP_KERNEL,
Christoph Hellwigdd3932e2010-09-16 20:51:46 +0200985 NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700986 if (rv) {
987 dev_err(DEV, "local disk flush failed with status %d\n", rv);
988 /* would rather check on EOPNOTSUPP, but that is not reliable.
989 * don't try again for ANY return value != 0
990 * if (rv == -EOPNOTSUPP) */
991 drbd_bump_write_ordering(mdev, WO_drain_io);
992 }
993 put_ldev(mdev);
994 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700995}
996
997/**
998 * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it.
999 * @mdev: DRBD device.
1000 * @epoch: Epoch object.
1001 * @ev: Epoch event.
1002 */
1003static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev,
1004 struct drbd_epoch *epoch,
1005 enum epoch_event ev)
1006{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001007 int epoch_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001008 struct drbd_epoch *next_epoch;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001009 enum finish_epoch rv = FE_STILL_LIVE;
1010
1011 spin_lock(&mdev->epoch_lock);
1012 do {
1013 next_epoch = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001014
1015 epoch_size = atomic_read(&epoch->epoch_size);
1016
1017 switch (ev & ~EV_CLEANUP) {
1018 case EV_PUT:
1019 atomic_dec(&epoch->active);
1020 break;
1021 case EV_GOT_BARRIER_NR:
1022 set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001023 break;
1024 case EV_BECAME_LAST:
1025 /* nothing to do*/
1026 break;
1027 }
1028
Philipp Reisnerb411b362009-09-25 16:07:19 -07001029 if (epoch_size != 0 &&
1030 atomic_read(&epoch->active) == 0 &&
Philipp Reisner2451fc32010-08-24 13:43:11 +02001031 test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001032 if (!(ev & EV_CLEANUP)) {
1033 spin_unlock(&mdev->epoch_lock);
1034 drbd_send_b_ack(mdev, epoch->barrier_nr, epoch_size);
1035 spin_lock(&mdev->epoch_lock);
1036 }
1037 dec_unacked(mdev);
1038
1039 if (mdev->current_epoch != epoch) {
1040 next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
1041 list_del(&epoch->list);
1042 ev = EV_BECAME_LAST | (ev & EV_CLEANUP);
1043 mdev->epochs--;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001044 kfree(epoch);
1045
1046 if (rv == FE_STILL_LIVE)
1047 rv = FE_DESTROYED;
1048 } else {
1049 epoch->flags = 0;
1050 atomic_set(&epoch->epoch_size, 0);
Uwe Kleine-König698f9312010-07-02 20:41:51 +02001051 /* atomic_set(&epoch->active, 0); is already zero */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001052 if (rv == FE_STILL_LIVE)
1053 rv = FE_RECYCLED;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001054 wake_up(&mdev->ee_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001055 }
1056 }
1057
1058 if (!next_epoch)
1059 break;
1060
1061 epoch = next_epoch;
1062 } while (1);
1063
1064 spin_unlock(&mdev->epoch_lock);
1065
Philipp Reisnerb411b362009-09-25 16:07:19 -07001066 return rv;
1067}
1068
1069/**
1070 * drbd_bump_write_ordering() - Fall back to an other write ordering method
1071 * @mdev: DRBD device.
1072 * @wo: Write ordering method to try.
1073 */
1074void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo) __must_hold(local)
1075{
1076 enum write_ordering_e pwo;
1077 static char *write_ordering_str[] = {
1078 [WO_none] = "none",
1079 [WO_drain_io] = "drain",
1080 [WO_bdev_flush] = "flush",
Philipp Reisnerb411b362009-09-25 16:07:19 -07001081 };
1082
1083 pwo = mdev->write_ordering;
1084 wo = min(pwo, wo);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001085 if (wo == WO_bdev_flush && mdev->ldev->dc.no_disk_flush)
1086 wo = WO_drain_io;
1087 if (wo == WO_drain_io && mdev->ldev->dc.no_disk_drain)
1088 wo = WO_none;
1089 mdev->write_ordering = wo;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001090 if (pwo != mdev->write_ordering || wo == WO_bdev_flush)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001091 dev_info(DEV, "Method to ensure write ordering: %s\n", write_ordering_str[mdev->write_ordering]);
1092}
1093
1094/**
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001095 * drbd_submit_peer_request()
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001096 * @mdev: DRBD device.
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001097 * @peer_req: peer request
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001098 * @rw: flag field, see bio->bi_rw
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001099 *
1100 * May spread the pages to multiple bios,
1101 * depending on bio_add_page restrictions.
1102 *
1103 * Returns 0 if all bios have been submitted,
1104 * -ENOMEM if we could not allocate enough bios,
1105 * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a
1106 * single page to an empty bio (which should never happen and likely indicates
1107 * that the lower level IO stack is in some way broken). This has been observed
1108 * on certain Xen deployments.
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001109 */
1110/* TODO allocate from our own bio_set. */
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001111int drbd_submit_peer_request(struct drbd_conf *mdev,
1112 struct drbd_peer_request *peer_req,
1113 const unsigned rw, const int fault_type)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001114{
1115 struct bio *bios = NULL;
1116 struct bio *bio;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001117 struct page *page = peer_req->pages;
1118 sector_t sector = peer_req->i.sector;
1119 unsigned ds = peer_req->i.size;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001120 unsigned n_bios = 0;
1121 unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT;
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001122 int err = -ENOMEM;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001123
1124 /* In most cases, we will only need one bio. But in case the lower
1125 * level restrictions happen to be different at this offset on this
1126 * side than those of the sending peer, we may need to submit the
Lars Ellenbergda4a75d2011-02-23 17:02:01 +01001127 * request in more than one bio.
1128 *
1129 * Plain bio_alloc is good enough here, this is no DRBD internally
1130 * generated bio, but a bio allocated on behalf of the peer.
1131 */
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001132next_bio:
1133 bio = bio_alloc(GFP_NOIO, nr_pages);
1134 if (!bio) {
1135 dev_err(DEV, "submit_ee: Allocation of a bio failed\n");
1136 goto fail;
1137 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001138 /* > peer_req->i.sector, unless this is the first bio */
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001139 bio->bi_sector = sector;
1140 bio->bi_bdev = mdev->ldev->backing_bdev;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001141 bio->bi_rw = rw;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001142 bio->bi_private = peer_req;
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001143 bio->bi_end_io = drbd_peer_request_endio;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001144
1145 bio->bi_next = bios;
1146 bios = bio;
1147 ++n_bios;
1148
1149 page_chain_for_each(page) {
1150 unsigned len = min_t(unsigned, ds, PAGE_SIZE);
1151 if (!bio_add_page(bio, page, len, 0)) {
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001152 /* A single page must always be possible!
1153 * But in case it fails anyways,
1154 * we deal with it, and complain (below). */
1155 if (bio->bi_vcnt == 0) {
1156 dev_err(DEV,
1157 "bio_add_page failed for len=%u, "
1158 "bi_vcnt=0 (bi_sector=%llu)\n",
1159 len, (unsigned long long)bio->bi_sector);
1160 err = -ENOSPC;
1161 goto fail;
1162 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001163 goto next_bio;
1164 }
1165 ds -= len;
1166 sector += len >> 9;
1167 --nr_pages;
1168 }
1169 D_ASSERT(page == NULL);
1170 D_ASSERT(ds == 0);
1171
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001172 atomic_set(&peer_req->pending_bios, n_bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001173 do {
1174 bio = bios;
1175 bios = bios->bi_next;
1176 bio->bi_next = NULL;
1177
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001178 drbd_generic_make_request(mdev, fault_type, bio);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001179 } while (bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001180 return 0;
1181
1182fail:
1183 while (bios) {
1184 bio = bios;
1185 bios = bios->bi_next;
1186 bio_put(bio);
1187 }
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001188 return err;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001189}
1190
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001191static void drbd_remove_epoch_entry_interval(struct drbd_conf *mdev,
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001192 struct drbd_peer_request *peer_req)
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001193{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001194 struct drbd_interval *i = &peer_req->i;
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001195
1196 drbd_remove_interval(&mdev->write_requests, i);
1197 drbd_clear_interval(i);
1198
Andreas Gruenbacher6c852be2011-02-04 15:38:52 +01001199 /* Wake up any processes waiting for this peer request to complete. */
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001200 if (i->waiting)
1201 wake_up(&mdev->misc_wait);
1202}
1203
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001204static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packet cmd,
1205 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001206{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001207 int rv;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001208 struct p_barrier *p = &mdev->tconn->data.rbuf.barrier;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001209 struct drbd_epoch *epoch;
1210
Philipp Reisnerb411b362009-09-25 16:07:19 -07001211 inc_unacked(mdev);
1212
Philipp Reisnerb411b362009-09-25 16:07:19 -07001213 mdev->current_epoch->barrier_nr = p->barrier;
1214 rv = drbd_may_finish_epoch(mdev, mdev->current_epoch, EV_GOT_BARRIER_NR);
1215
1216 /* P_BARRIER_ACK may imply that the corresponding extent is dropped from
1217 * the activity log, which means it would not be resynced in case the
1218 * R_PRIMARY crashes now.
1219 * Therefore we must send the barrier_ack after the barrier request was
1220 * completed. */
1221 switch (mdev->write_ordering) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001222 case WO_none:
1223 if (rv == FE_RECYCLED)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001224 return true;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001225
1226 /* receiver context, in the writeout path of the other node.
1227 * avoid potential distributed deadlock */
1228 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1229 if (epoch)
1230 break;
1231 else
1232 dev_warn(DEV, "Allocation of an epoch failed, slowing down\n");
1233 /* Fall through */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001234
1235 case WO_bdev_flush:
1236 case WO_drain_io:
Philipp Reisnerb411b362009-09-25 16:07:19 -07001237 drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
Philipp Reisner2451fc32010-08-24 13:43:11 +02001238 drbd_flush(mdev);
1239
1240 if (atomic_read(&mdev->current_epoch->epoch_size)) {
1241 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1242 if (epoch)
1243 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001244 }
1245
Philipp Reisner2451fc32010-08-24 13:43:11 +02001246 epoch = mdev->current_epoch;
1247 wait_event(mdev->ee_wait, atomic_read(&epoch->epoch_size) == 0);
1248
1249 D_ASSERT(atomic_read(&epoch->active) == 0);
1250 D_ASSERT(epoch->flags == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001251
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001252 return true;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001253 default:
1254 dev_err(DEV, "Strangeness in mdev->write_ordering %d\n", mdev->write_ordering);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001255 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001256 }
1257
1258 epoch->flags = 0;
1259 atomic_set(&epoch->epoch_size, 0);
1260 atomic_set(&epoch->active, 0);
1261
1262 spin_lock(&mdev->epoch_lock);
1263 if (atomic_read(&mdev->current_epoch->epoch_size)) {
1264 list_add(&epoch->list, &mdev->current_epoch->list);
1265 mdev->current_epoch = epoch;
1266 mdev->epochs++;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001267 } else {
1268 /* The current_epoch got recycled while we allocated this one... */
1269 kfree(epoch);
1270 }
1271 spin_unlock(&mdev->epoch_lock);
1272
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001273 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001274}
1275
1276/* used from receive_RSDataReply (recv_resync_read)
1277 * and from receive_Data */
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +01001278static struct drbd_peer_request *
1279read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector,
1280 int data_size) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001281{
Lars Ellenberg66660322010-04-06 12:15:04 +02001282 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001283 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001284 struct page *page;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001285 int dgs, ds, rr;
Philipp Reisnera0638452011-01-19 14:31:32 +01001286 void *dig_in = mdev->tconn->int_dig_in;
1287 void *dig_vv = mdev->tconn->int_dig_vv;
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001288 unsigned long *data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001289
Philipp Reisnera0638452011-01-19 14:31:32 +01001290 dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_r_tfm) ?
1291 crypto_hash_digestsize(mdev->tconn->integrity_r_tfm) : 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001292
1293 if (dgs) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001294 rr = drbd_recv(mdev->tconn, dig_in, dgs);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001295 if (rr != dgs) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001296 if (!signal_pending(current))
1297 dev_warn(DEV,
1298 "short read receiving data digest: read %d expected %d\n",
1299 rr, dgs);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001300 return NULL;
1301 }
1302 }
1303
1304 data_size -= dgs;
1305
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01001306 if (!expect(data_size != 0))
1307 return NULL;
1308 if (!expect(IS_ALIGNED(data_size, 512)))
1309 return NULL;
1310 if (!expect(data_size <= DRBD_MAX_BIO_SIZE))
1311 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001312
Lars Ellenberg66660322010-04-06 12:15:04 +02001313 /* even though we trust out peer,
1314 * we sometimes have to double check. */
1315 if (sector + (data_size>>9) > capacity) {
Lars Ellenbergfdda6542011-01-24 15:11:01 +01001316 dev_err(DEV, "request from peer beyond end of local disk: "
1317 "capacity: %llus < sector: %llus + size: %u\n",
Lars Ellenberg66660322010-04-06 12:15:04 +02001318 (unsigned long long)capacity,
1319 (unsigned long long)sector, data_size);
1320 return NULL;
1321 }
1322
Philipp Reisnerb411b362009-09-25 16:07:19 -07001323 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
1324 * "criss-cross" setup, that might cause write-out on some other DRBD,
1325 * which in turn might block on the other node at this very place. */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001326 peer_req = drbd_alloc_ee(mdev, id, sector, data_size, GFP_NOIO);
1327 if (!peer_req)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001328 return NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001329
Philipp Reisnerb411b362009-09-25 16:07:19 -07001330 ds = data_size;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001331 page = peer_req->pages;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001332 page_chain_for_each(page) {
1333 unsigned len = min_t(int, ds, PAGE_SIZE);
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001334 data = kmap(page);
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001335 rr = drbd_recv(mdev->tconn, data, len);
Andreas Gruenbacher0cf9d272010-12-07 10:43:29 +01001336 if (drbd_insert_fault(mdev, DRBD_FAULT_RECEIVE)) {
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001337 dev_err(DEV, "Fault injection: Corrupting data on receive\n");
1338 data[0] = data[0] ^ (unsigned long)-1;
1339 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001340 kunmap(page);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001341 if (rr != len) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001342 drbd_free_ee(mdev, peer_req);
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001343 if (!signal_pending(current))
1344 dev_warn(DEV, "short read receiving data: read %d expected %d\n",
1345 rr, len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001346 return NULL;
1347 }
1348 ds -= rr;
1349 }
1350
1351 if (dgs) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001352 drbd_csum_ee(mdev, mdev->tconn->integrity_r_tfm, peer_req, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001353 if (memcmp(dig_in, dig_vv, dgs)) {
Lars Ellenberg470be442010-11-10 10:36:52 +01001354 dev_err(DEV, "Digest integrity check FAILED: %llus +%u\n",
1355 (unsigned long long)sector, data_size);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001356 drbd_free_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001357 return NULL;
1358 }
1359 }
1360 mdev->recv_cnt += data_size>>9;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001361 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001362}
1363
1364/* drbd_drain_block() just takes a data block
1365 * out of the socket input buffer, and discards it.
1366 */
1367static int drbd_drain_block(struct drbd_conf *mdev, int data_size)
1368{
1369 struct page *page;
1370 int rr, rv = 1;
1371 void *data;
1372
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001373 if (!data_size)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001374 return true;
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001375
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001376 page = drbd_pp_alloc(mdev, 1, 1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001377
1378 data = kmap(page);
1379 while (data_size) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001380 rr = drbd_recv(mdev->tconn, data, min_t(int, data_size, PAGE_SIZE));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001381 if (rr != min_t(int, data_size, PAGE_SIZE)) {
1382 rv = 0;
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001383 if (!signal_pending(current))
1384 dev_warn(DEV,
1385 "short read receiving data: read %d expected %d\n",
1386 rr, min_t(int, data_size, PAGE_SIZE));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001387 break;
1388 }
1389 data_size -= rr;
1390 }
1391 kunmap(page);
Lars Ellenberg435f0742010-09-06 12:30:25 +02001392 drbd_pp_free(mdev, page, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001393 return rv;
1394}
1395
1396static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req,
1397 sector_t sector, int data_size)
1398{
1399 struct bio_vec *bvec;
1400 struct bio *bio;
1401 int dgs, rr, i, expect;
Philipp Reisnera0638452011-01-19 14:31:32 +01001402 void *dig_in = mdev->tconn->int_dig_in;
1403 void *dig_vv = mdev->tconn->int_dig_vv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001404
Philipp Reisnera0638452011-01-19 14:31:32 +01001405 dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_r_tfm) ?
1406 crypto_hash_digestsize(mdev->tconn->integrity_r_tfm) : 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001407
1408 if (dgs) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001409 rr = drbd_recv(mdev->tconn, dig_in, dgs);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001410 if (rr != dgs) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001411 if (!signal_pending(current))
1412 dev_warn(DEV,
1413 "short read receiving data reply digest: read %d expected %d\n",
1414 rr, dgs);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001415 return 0;
1416 }
1417 }
1418
1419 data_size -= dgs;
1420
1421 /* optimistically update recv_cnt. if receiving fails below,
1422 * we disconnect anyways, and counters will be reset. */
1423 mdev->recv_cnt += data_size>>9;
1424
1425 bio = req->master_bio;
1426 D_ASSERT(sector == bio->bi_sector);
1427
1428 bio_for_each_segment(bvec, bio, i) {
1429 expect = min_t(int, data_size, bvec->bv_len);
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001430 rr = drbd_recv(mdev->tconn,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001431 kmap(bvec->bv_page)+bvec->bv_offset,
1432 expect);
1433 kunmap(bvec->bv_page);
1434 if (rr != expect) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001435 if (!signal_pending(current))
1436 dev_warn(DEV, "short read receiving data reply: "
1437 "read %d expected %d\n",
1438 rr, expect);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001439 return 0;
1440 }
1441 data_size -= rr;
1442 }
1443
1444 if (dgs) {
Philipp Reisnera0638452011-01-19 14:31:32 +01001445 drbd_csum_bio(mdev, mdev->tconn->integrity_r_tfm, bio, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001446 if (memcmp(dig_in, dig_vv, dgs)) {
1447 dev_err(DEV, "Digest integrity check FAILED. Broken NICs?\n");
1448 return 0;
1449 }
1450 }
1451
1452 D_ASSERT(data_size == 0);
1453 return 1;
1454}
1455
1456/* e_end_resync_block() is called via
1457 * drbd_process_done_ee() by asender only */
Philipp Reisner00d56942011-02-09 18:09:48 +01001458static int e_end_resync_block(struct drbd_work *w, int unused)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001459{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001460 struct drbd_peer_request *peer_req =
1461 container_of(w, struct drbd_peer_request, w);
Philipp Reisner00d56942011-02-09 18:09:48 +01001462 struct drbd_conf *mdev = w->mdev;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001463 sector_t sector = peer_req->i.sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001464 int ok;
1465
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001466 D_ASSERT(drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001467
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001468 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
1469 drbd_set_in_sync(mdev, sector, peer_req->i.size);
1470 ok = drbd_send_ack(mdev, P_RS_WRITE_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001471 } else {
1472 /* Record failure to sync */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001473 drbd_rs_failed_io(mdev, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001474
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001475 ok = drbd_send_ack(mdev, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001476 }
1477 dec_unacked(mdev);
1478
1479 return ok;
1480}
1481
1482static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_size) __releases(local)
1483{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001484 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001485
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001486 peer_req = read_in_block(mdev, ID_SYNCER, sector, data_size);
1487 if (!peer_req)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001488 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001489
1490 dec_rs_pending(mdev);
1491
Philipp Reisnerb411b362009-09-25 16:07:19 -07001492 inc_unacked(mdev);
1493 /* corresponding dec_unacked() in e_end_resync_block()
1494 * respective _drbd_clear_done_ee */
1495
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001496 peer_req->w.cb = e_end_resync_block;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001497
Philipp Reisner87eeee42011-01-19 14:16:30 +01001498 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001499 list_add(&peer_req->w.list, &mdev->sync_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001500 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001501
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001502 atomic_add(data_size >> 9, &mdev->rs_sect_ev);
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001503 if (drbd_submit_peer_request(mdev, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001504 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001505
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001506 /* don't care for the reason here */
1507 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01001508 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001509 list_del(&peer_req->w.list);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001510 spin_unlock_irq(&mdev->tconn->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02001511
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001512 drbd_free_ee(mdev, peer_req);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001513fail:
1514 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001515 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001516}
1517
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001518static struct drbd_request *
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001519find_request(struct drbd_conf *mdev, struct rb_root *root, u64 id,
1520 sector_t sector, bool missing_ok, const char *func)
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001521{
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001522 struct drbd_request *req;
1523
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001524 /* Request object according to our peer */
1525 req = (struct drbd_request *)(unsigned long)id;
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001526 if (drbd_contains_interval(root, sector, &req->i) && req->i.local)
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001527 return req;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001528 if (!missing_ok) {
1529 dev_err(DEV, "%s: failed to find request %lu, sector %llus\n", func,
1530 (unsigned long)id, (unsigned long long)sector);
1531 }
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001532 return NULL;
1533}
1534
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001535static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packet cmd,
1536 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001537{
1538 struct drbd_request *req;
1539 sector_t sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001540 int ok;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001541 struct p_data *p = &mdev->tconn->data.rbuf.data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001542
1543 sector = be64_to_cpu(p->sector);
1544
Philipp Reisner87eeee42011-01-19 14:16:30 +01001545 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001546 req = find_request(mdev, &mdev->read_requests, p->block_id, sector, false, __func__);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001547 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001548 if (unlikely(!req))
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001549 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001550
Bart Van Assche24c48302011-05-21 18:32:29 +02001551 /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid
Philipp Reisnerb411b362009-09-25 16:07:19 -07001552 * special casing it there for the various failure cases.
1553 * still no race with drbd_fail_pending_reads */
1554 ok = recv_dless_read(mdev, req, sector, data_size);
1555
1556 if (ok)
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001557 req_mod(req, DATA_RECEIVED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001558 /* else: nothing. handled from drbd_disconnect...
1559 * I don't think we may complete this just yet
1560 * in case we are "on-disconnect: freeze" */
1561
1562 return ok;
1563}
1564
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001565static int receive_RSDataReply(struct drbd_conf *mdev, enum drbd_packet cmd,
1566 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001567{
1568 sector_t sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001569 int ok;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001570 struct p_data *p = &mdev->tconn->data.rbuf.data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001571
1572 sector = be64_to_cpu(p->sector);
1573 D_ASSERT(p->block_id == ID_SYNCER);
1574
1575 if (get_ldev(mdev)) {
1576 /* data is submitted to disk within recv_resync_read.
1577 * corresponding put_ldev done below on error,
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001578 * or in drbd_peer_request_endio. */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001579 ok = recv_resync_read(mdev, sector, data_size);
1580 } else {
1581 if (__ratelimit(&drbd_ratelimit_state))
1582 dev_err(DEV, "Can not write resync data to local disk.\n");
1583
1584 ok = drbd_drain_block(mdev, data_size);
1585
Lars Ellenberg2b2bf212010-10-06 11:46:55 +02001586 drbd_send_ack_dp(mdev, P_NEG_ACK, p, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001587 }
1588
Philipp Reisner778f2712010-07-06 11:14:00 +02001589 atomic_add(data_size >> 9, &mdev->rs_sect_in);
1590
Philipp Reisnerb411b362009-09-25 16:07:19 -07001591 return ok;
1592}
1593
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001594static int w_restart_write(struct drbd_work *w, int cancel)
1595{
1596 struct drbd_request *req = container_of(w, struct drbd_request, w);
1597 struct drbd_conf *mdev = w->mdev;
1598 struct bio *bio;
1599 unsigned long start_time;
1600 unsigned long flags;
1601
1602 spin_lock_irqsave(&mdev->tconn->req_lock, flags);
1603 if (!expect(req->rq_state & RQ_POSTPONED)) {
1604 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
1605 return 0;
1606 }
1607 bio = req->master_bio;
1608 start_time = req->start_time;
1609 /* Postponed requests will not have their master_bio completed! */
1610 __req_mod(req, DISCARD_WRITE, NULL);
1611 spin_unlock_irqrestore(&mdev->tconn->req_lock, flags);
1612
1613 while (__drbd_make_request(mdev, bio, start_time))
1614 /* retry */ ;
1615 return 1;
1616}
1617
1618static void restart_conflicting_writes(struct drbd_conf *mdev,
1619 sector_t sector, int size)
1620{
1621 struct drbd_interval *i;
1622 struct drbd_request *req;
1623
1624 drbd_for_each_overlap(i, &mdev->write_requests, sector, size) {
1625 if (!i->local)
1626 continue;
1627 req = container_of(i, struct drbd_request, i);
1628 if (req->rq_state & RQ_LOCAL_PENDING ||
1629 !(req->rq_state & RQ_POSTPONED))
1630 continue;
1631 if (expect(list_empty(&req->w.list))) {
1632 req->w.mdev = mdev;
1633 req->w.cb = w_restart_write;
1634 drbd_queue_work(&mdev->tconn->data.work, &req->w);
1635 }
1636 }
1637}
1638
Philipp Reisnerb411b362009-09-25 16:07:19 -07001639/* e_end_block() is called via drbd_process_done_ee().
1640 * this means this function only runs in the asender thread
1641 */
Philipp Reisner00d56942011-02-09 18:09:48 +01001642static int e_end_block(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001643{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001644 struct drbd_peer_request *peer_req =
1645 container_of(w, struct drbd_peer_request, w);
Philipp Reisner00d56942011-02-09 18:09:48 +01001646 struct drbd_conf *mdev = w->mdev;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001647 sector_t sector = peer_req->i.sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001648 int ok = 1, pcmd;
1649
Philipp Reisner89e58e72011-01-19 13:12:45 +01001650 if (mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001651 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001652 pcmd = (mdev->state.conn >= C_SYNC_SOURCE &&
1653 mdev->state.conn <= C_PAUSED_SYNC_T &&
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001654 peer_req->flags & EE_MAY_SET_IN_SYNC) ?
Philipp Reisnerb411b362009-09-25 16:07:19 -07001655 P_RS_WRITE_ACK : P_WRITE_ACK;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001656 ok &= drbd_send_ack(mdev, pcmd, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001657 if (pcmd == P_RS_WRITE_ACK)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001658 drbd_set_in_sync(mdev, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001659 } else {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001660 ok = drbd_send_ack(mdev, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001661 /* we expect it to be marked out of sync anyways...
1662 * maybe assert this? */
1663 }
1664 dec_unacked(mdev);
1665 }
1666 /* we delete from the conflict detection hash _after_ we sent out the
1667 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */
Philipp Reisner89e58e72011-01-19 13:12:45 +01001668 if (mdev->tconn->net_conf->two_primaries) {
Philipp Reisner87eeee42011-01-19 14:16:30 +01001669 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001670 D_ASSERT(!drbd_interval_empty(&peer_req->i));
1671 drbd_remove_epoch_entry_interval(mdev, peer_req);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001672 if (peer_req->flags & EE_RESTART_REQUESTS)
1673 restart_conflicting_writes(mdev, sector, peer_req->i.size);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001674 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +01001675 } else
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001676 D_ASSERT(drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001677
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001678 drbd_may_finish_epoch(mdev, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001679
1680 return ok;
1681}
1682
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001683static int e_send_ack(struct drbd_work *w, enum drbd_packet ack)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001684{
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001685 struct drbd_conf *mdev = w->mdev;
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001686 struct drbd_peer_request *peer_req =
1687 container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacher206d3582011-02-26 23:19:15 +01001688 int ok;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001689
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001690 ok = drbd_send_ack(mdev, ack, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001691 dec_unacked(mdev);
1692
1693 return ok;
1694}
1695
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001696static int e_send_discard_write(struct drbd_work *w, int unused)
1697{
1698 return e_send_ack(w, P_DISCARD_WRITE);
1699}
1700
1701static int e_send_retry_write(struct drbd_work *w, int unused)
1702{
1703 struct drbd_tconn *tconn = w->mdev->tconn;
1704
1705 return e_send_ack(w, tconn->agreed_pro_version >= 100 ?
1706 P_RETRY_WRITE : P_DISCARD_WRITE);
1707}
1708
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001709static bool seq_greater(u32 a, u32 b)
1710{
1711 /*
1712 * We assume 32-bit wrap-around here.
1713 * For 24-bit wrap-around, we would have to shift:
1714 * a <<= 8; b <<= 8;
1715 */
1716 return (s32)a - (s32)b > 0;
1717}
1718
1719static u32 seq_max(u32 a, u32 b)
1720{
1721 return seq_greater(a, b) ? a : b;
1722}
1723
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001724static bool need_peer_seq(struct drbd_conf *mdev)
1725{
1726 struct drbd_tconn *tconn = mdev->tconn;
1727
1728 /*
1729 * We only need to keep track of the last packet_seq number of our peer
1730 * if we are in dual-primary mode and we have the discard flag set; see
1731 * handle_write_conflicts().
1732 */
1733 return tconn->net_conf->two_primaries &&
1734 test_bit(DISCARD_CONCURRENT, &tconn->flags);
1735}
1736
Andreas Gruenbacher43ae0772011-02-03 18:42:08 +01001737static void update_peer_seq(struct drbd_conf *mdev, unsigned int peer_seq)
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001738{
Lars Ellenberg3c13b682011-02-23 16:10:01 +01001739 unsigned int newest_peer_seq;
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001740
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001741 if (need_peer_seq(mdev)) {
1742 spin_lock(&mdev->peer_seq_lock);
Lars Ellenberg3c13b682011-02-23 16:10:01 +01001743 newest_peer_seq = seq_max(mdev->peer_seq, peer_seq);
1744 mdev->peer_seq = newest_peer_seq;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001745 spin_unlock(&mdev->peer_seq_lock);
Lars Ellenberg3c13b682011-02-23 16:10:01 +01001746 /* wake up only if we actually changed mdev->peer_seq */
1747 if (peer_seq == newest_peer_seq)
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001748 wake_up(&mdev->seq_wait);
1749 }
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001750}
1751
Philipp Reisnerb411b362009-09-25 16:07:19 -07001752/* Called from receive_Data.
1753 * Synchronize packets on sock with packets on msock.
1754 *
1755 * This is here so even when a P_DATA packet traveling via sock overtook an Ack
1756 * packet traveling on msock, they are still processed in the order they have
1757 * been sent.
1758 *
1759 * Note: we don't care for Ack packets overtaking P_DATA packets.
1760 *
1761 * In case packet_seq is larger than mdev->peer_seq number, there are
1762 * outstanding packets on the msock. We wait for them to arrive.
1763 * In case we are the logically next packet, we update mdev->peer_seq
1764 * ourselves. Correctly handles 32bit wrap around.
1765 *
1766 * Assume we have a 10 GBit connection, that is about 1<<30 byte per second,
1767 * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds
1768 * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have
1769 * 1<<9 == 512 seconds aka ages for the 32bit wrap around...
1770 *
1771 * returns 0 if we may process the packet,
1772 * -ERESTARTSYS if we were interrupted (by disconnect signal). */
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001773static int wait_for_and_update_peer_seq(struct drbd_conf *mdev, const u32 peer_seq)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001774{
1775 DEFINE_WAIT(wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001776 long timeout;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001777 int ret;
1778
1779 if (!need_peer_seq(mdev))
1780 return 0;
1781
Philipp Reisnerb411b362009-09-25 16:07:19 -07001782 spin_lock(&mdev->peer_seq_lock);
1783 for (;;) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001784 if (!seq_greater(peer_seq - 1, mdev->peer_seq)) {
1785 mdev->peer_seq = seq_max(mdev->peer_seq, peer_seq);
1786 ret = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001787 break;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001788 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001789 if (signal_pending(current)) {
1790 ret = -ERESTARTSYS;
1791 break;
1792 }
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001793 prepare_to_wait(&mdev->seq_wait, &wait, TASK_INTERRUPTIBLE);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001794 spin_unlock(&mdev->peer_seq_lock);
Andreas Gruenbacher71b1c1e2011-03-01 15:40:43 +01001795 timeout = mdev->tconn->net_conf->ping_timeo*HZ/10;
1796 timeout = schedule_timeout(timeout);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001797 spin_lock(&mdev->peer_seq_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001798 if (!timeout) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001799 ret = -ETIMEDOUT;
Andreas Gruenbacher71b1c1e2011-03-01 15:40:43 +01001800 dev_err(DEV, "Timed out waiting for missing ack packets; disconnecting\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001801 break;
1802 }
1803 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001804 spin_unlock(&mdev->peer_seq_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001805 finish_wait(&mdev->seq_wait, &wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001806 return ret;
1807}
1808
Lars Ellenberg688593c2010-11-17 22:25:03 +01001809/* see also bio_flags_to_wire()
1810 * DRBD_REQ_*, because we need to semantically map the flags to data packet
1811 * flags and back. We may replicate to other kernel versions. */
1812static unsigned long wire_flags_to_bio(struct drbd_conf *mdev, u32 dpf)
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02001813{
Lars Ellenberg688593c2010-11-17 22:25:03 +01001814 return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
1815 (dpf & DP_FUA ? REQ_FUA : 0) |
1816 (dpf & DP_FLUSH ? REQ_FLUSH : 0) |
1817 (dpf & DP_DISCARD ? REQ_DISCARD : 0);
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02001818}
1819
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001820static void fail_postponed_requests(struct drbd_conf *mdev, sector_t sector,
1821 unsigned int size)
1822{
1823 struct drbd_interval *i;
1824
1825 repeat:
1826 drbd_for_each_overlap(i, &mdev->write_requests, sector, size) {
1827 struct drbd_request *req;
1828 struct bio_and_error m;
1829
1830 if (!i->local)
1831 continue;
1832 req = container_of(i, struct drbd_request, i);
1833 if (!(req->rq_state & RQ_POSTPONED))
1834 continue;
1835 req->rq_state &= ~RQ_POSTPONED;
1836 __req_mod(req, NEG_ACKED, &m);
1837 spin_unlock_irq(&mdev->tconn->req_lock);
1838 if (m.bio)
1839 complete_master_bio(mdev, &m);
1840 spin_lock_irq(&mdev->tconn->req_lock);
1841 goto repeat;
1842 }
1843}
1844
1845static int handle_write_conflicts(struct drbd_conf *mdev,
1846 struct drbd_peer_request *peer_req)
1847{
1848 struct drbd_tconn *tconn = mdev->tconn;
1849 bool resolve_conflicts = test_bit(DISCARD_CONCURRENT, &tconn->flags);
1850 sector_t sector = peer_req->i.sector;
1851 const unsigned int size = peer_req->i.size;
1852 struct drbd_interval *i;
1853 bool equal;
1854 int err;
1855
1856 /*
1857 * Inserting the peer request into the write_requests tree will prevent
1858 * new conflicting local requests from being added.
1859 */
1860 drbd_insert_interval(&mdev->write_requests, &peer_req->i);
1861
1862 repeat:
1863 drbd_for_each_overlap(i, &mdev->write_requests, sector, size) {
1864 if (i == &peer_req->i)
1865 continue;
1866
1867 if (!i->local) {
1868 /*
1869 * Our peer has sent a conflicting remote request; this
1870 * should not happen in a two-node setup. Wait for the
1871 * earlier peer request to complete.
1872 */
1873 err = drbd_wait_misc(mdev, i);
1874 if (err)
1875 goto out;
1876 goto repeat;
1877 }
1878
1879 equal = i->sector == sector && i->size == size;
1880 if (resolve_conflicts) {
1881 /*
1882 * If the peer request is fully contained within the
1883 * overlapping request, it can be discarded; otherwise,
1884 * it will be retried once all overlapping requests
1885 * have completed.
1886 */
1887 bool discard = i->sector <= sector && i->sector +
1888 (i->size >> 9) >= sector + (size >> 9);
1889
1890 if (!equal)
1891 dev_alert(DEV, "Concurrent writes detected: "
1892 "local=%llus +%u, remote=%llus +%u, "
1893 "assuming %s came first\n",
1894 (unsigned long long)i->sector, i->size,
1895 (unsigned long long)sector, size,
1896 discard ? "local" : "remote");
1897
1898 inc_unacked(mdev);
1899 peer_req->w.cb = discard ? e_send_discard_write :
1900 e_send_retry_write;
1901 list_add_tail(&peer_req->w.list, &mdev->done_ee);
1902 wake_asender(mdev->tconn);
1903
1904 err = -ENOENT;
1905 goto out;
1906 } else {
1907 struct drbd_request *req =
1908 container_of(i, struct drbd_request, i);
1909
1910 if (!equal)
1911 dev_alert(DEV, "Concurrent writes detected: "
1912 "local=%llus +%u, remote=%llus +%u\n",
1913 (unsigned long long)i->sector, i->size,
1914 (unsigned long long)sector, size);
1915
1916 if (req->rq_state & RQ_LOCAL_PENDING ||
1917 !(req->rq_state & RQ_POSTPONED)) {
1918 /*
1919 * Wait for the node with the discard flag to
1920 * decide if this request will be discarded or
1921 * retried. Requests that are discarded will
1922 * disappear from the write_requests tree.
1923 *
1924 * In addition, wait for the conflicting
1925 * request to finish locally before submitting
1926 * the conflicting peer request.
1927 */
1928 err = drbd_wait_misc(mdev, &req->i);
1929 if (err) {
1930 _conn_request_state(mdev->tconn,
1931 NS(conn, C_TIMEOUT),
1932 CS_HARD);
1933 fail_postponed_requests(mdev, sector, size);
1934 goto out;
1935 }
1936 goto repeat;
1937 }
1938 /*
1939 * Remember to restart the conflicting requests after
1940 * the new peer request has completed.
1941 */
1942 peer_req->flags |= EE_RESTART_REQUESTS;
1943 }
1944 }
1945 err = 0;
1946
1947 out:
1948 if (err)
1949 drbd_remove_epoch_entry_interval(mdev, peer_req);
1950 return err;
1951}
1952
Philipp Reisnerb411b362009-09-25 16:07:19 -07001953/* mirrored write */
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001954static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd,
1955 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001956{
1957 sector_t sector;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001958 struct drbd_peer_request *peer_req;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001959 struct p_data *p = &mdev->tconn->data.rbuf.data;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001960 u32 peer_seq = be32_to_cpu(p->seq_num);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001961 int rw = WRITE;
1962 u32 dp_flags;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001963 int err;
1964
Philipp Reisnerb411b362009-09-25 16:07:19 -07001965
Philipp Reisnerb411b362009-09-25 16:07:19 -07001966 if (!get_ldev(mdev)) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001967 err = wait_for_and_update_peer_seq(mdev, peer_seq);
Lars Ellenberg2b2bf212010-10-06 11:46:55 +02001968 drbd_send_ack_dp(mdev, P_NEG_ACK, p, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001969 atomic_inc(&mdev->current_epoch->epoch_size);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001970 return drbd_drain_block(mdev, data_size) && err == 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001971 }
1972
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001973 /*
1974 * Corresponding put_ldev done either below (on various errors), or in
1975 * drbd_peer_request_endio, if we successfully submit the data at the
1976 * end of this function.
1977 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001978
1979 sector = be64_to_cpu(p->sector);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001980 peer_req = read_in_block(mdev, p->block_id, sector, data_size);
1981 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001982 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001983 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001984 }
1985
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001986 peer_req->w.cb = e_end_block;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001987
Lars Ellenberg688593c2010-11-17 22:25:03 +01001988 dp_flags = be32_to_cpu(p->dp_flags);
1989 rw |= wire_flags_to_bio(mdev, dp_flags);
1990
1991 if (dp_flags & DP_MAY_SET_IN_SYNC)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001992 peer_req->flags |= EE_MAY_SET_IN_SYNC;
Lars Ellenberg688593c2010-11-17 22:25:03 +01001993
Philipp Reisnerb411b362009-09-25 16:07:19 -07001994 spin_lock(&mdev->epoch_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001995 peer_req->epoch = mdev->current_epoch;
1996 atomic_inc(&peer_req->epoch->epoch_size);
1997 atomic_inc(&peer_req->epoch->active);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001998 spin_unlock(&mdev->epoch_lock);
1999
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002000 if (mdev->tconn->net_conf->two_primaries) {
2001 err = wait_for_and_update_peer_seq(mdev, peer_seq);
2002 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002003 goto out_interrupted;
Philipp Reisner87eeee42011-01-19 14:16:30 +01002004 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002005 err = handle_write_conflicts(mdev, peer_req);
2006 if (err) {
2007 spin_unlock_irq(&mdev->tconn->req_lock);
2008 if (err == -ENOENT) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002009 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002010 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002011 }
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002012 goto out_interrupted;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002013 }
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002014 } else
2015 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002016 list_add(&peer_req->w.list, &mdev->active_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002017 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002018
Philipp Reisner89e58e72011-01-19 13:12:45 +01002019 switch (mdev->tconn->net_conf->wire_protocol) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002020 case DRBD_PROT_C:
2021 inc_unacked(mdev);
2022 /* corresponding dec_unacked() in e_end_block()
2023 * respective _drbd_clear_done_ee */
2024 break;
2025 case DRBD_PROT_B:
2026 /* I really don't like it that the receiver thread
2027 * sends on the msock, but anyways */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002028 drbd_send_ack(mdev, P_RECV_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002029 break;
2030 case DRBD_PROT_A:
2031 /* nothing to do */
2032 break;
2033 }
2034
Lars Ellenberg6719fb02010-10-18 23:04:07 +02002035 if (mdev->state.pdsk < D_INCONSISTENT) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002036 /* In case we have the only disk of the cluster, */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002037 drbd_set_out_of_sync(mdev, peer_req->i.sector, peer_req->i.size);
2038 peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
2039 peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
2040 drbd_al_begin_io(mdev, peer_req->i.sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002041 }
2042
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01002043 if (drbd_submit_peer_request(mdev, peer_req, rw, DRBD_FAULT_DT_WR) == 0)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002044 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002045
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002046 /* don't care for the reason here */
2047 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01002048 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002049 list_del(&peer_req->w.list);
2050 drbd_remove_epoch_entry_interval(mdev, peer_req);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002051 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002052 if (peer_req->flags & EE_CALL_AL_COMPLETE_IO)
2053 drbd_al_complete_io(mdev, peer_req->i.sector);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002054
Philipp Reisnerb411b362009-09-25 16:07:19 -07002055out_interrupted:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002056 drbd_may_finish_epoch(mdev, peer_req->epoch, EV_PUT + EV_CLEANUP);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002057 put_ldev(mdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002058 drbd_free_ee(mdev, peer_req);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002059 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002060}
2061
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002062/* We may throttle resync, if the lower device seems to be busy,
2063 * and current sync rate is above c_min_rate.
2064 *
2065 * To decide whether or not the lower device is busy, we use a scheme similar
2066 * to MD RAID is_mddev_idle(): if the partition stats reveal "significant"
2067 * (more than 64 sectors) of activity we cannot account for with our own resync
2068 * activity, it obviously is "busy".
2069 *
2070 * The current sync rate used here uses only the most recent two step marks,
2071 * to have a short time average so we can react faster.
2072 */
Philipp Reisnere3555d82010-11-07 15:56:29 +01002073int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002074{
2075 struct gendisk *disk = mdev->ldev->backing_bdev->bd_contains->bd_disk;
2076 unsigned long db, dt, dbdt;
Philipp Reisnere3555d82010-11-07 15:56:29 +01002077 struct lc_element *tmp;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002078 int curr_events;
2079 int throttle = 0;
2080
2081 /* feature disabled? */
2082 if (mdev->sync_conf.c_min_rate == 0)
2083 return 0;
2084
Philipp Reisnere3555d82010-11-07 15:56:29 +01002085 spin_lock_irq(&mdev->al_lock);
2086 tmp = lc_find(mdev->resync, BM_SECT_TO_EXT(sector));
2087 if (tmp) {
2088 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
2089 if (test_bit(BME_PRIORITY, &bm_ext->flags)) {
2090 spin_unlock_irq(&mdev->al_lock);
2091 return 0;
2092 }
2093 /* Do not slow down if app IO is already waiting for this extent */
2094 }
2095 spin_unlock_irq(&mdev->al_lock);
2096
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002097 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
2098 (int)part_stat_read(&disk->part0, sectors[1]) -
2099 atomic_read(&mdev->rs_sect_ev);
Philipp Reisnere3555d82010-11-07 15:56:29 +01002100
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002101 if (!mdev->rs_last_events || curr_events - mdev->rs_last_events > 64) {
2102 unsigned long rs_left;
2103 int i;
2104
2105 mdev->rs_last_events = curr_events;
2106
2107 /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
2108 * approx. */
Lars Ellenberg2649f082010-11-05 10:05:47 +01002109 i = (mdev->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
2110
2111 if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T)
2112 rs_left = mdev->ov_left;
2113 else
2114 rs_left = drbd_bm_total_weight(mdev) - mdev->rs_failed;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002115
2116 dt = ((long)jiffies - (long)mdev->rs_mark_time[i]) / HZ;
2117 if (!dt)
2118 dt++;
2119 db = mdev->rs_mark_left[i] - rs_left;
2120 dbdt = Bit2KB(db/dt);
2121
2122 if (dbdt > mdev->sync_conf.c_min_rate)
2123 throttle = 1;
2124 }
2125 return throttle;
2126}
2127
2128
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01002129static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packet cmd,
2130 unsigned int digest_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002131{
2132 sector_t sector;
2133 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002134 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002135 struct digest_info *di = NULL;
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002136 int size, verb;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002137 unsigned int fault_type;
Philipp Reisnere42325a2011-01-19 13:55:45 +01002138 struct p_block_req *p = &mdev->tconn->data.rbuf.block_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002139
2140 sector = be64_to_cpu(p->sector);
2141 size = be32_to_cpu(p->blksize);
2142
Andreas Gruenbacherc670a392011-02-21 12:41:39 +01002143 if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002144 dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2145 (unsigned long long)sector, size);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002146 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002147 }
2148 if (sector + (size>>9) > capacity) {
2149 dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2150 (unsigned long long)sector, size);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002151 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002152 }
2153
2154 if (!get_ldev_if_state(mdev, D_UP_TO_DATE)) {
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002155 verb = 1;
2156 switch (cmd) {
2157 case P_DATA_REQUEST:
2158 drbd_send_ack_rp(mdev, P_NEG_DREPLY, p);
2159 break;
2160 case P_RS_DATA_REQUEST:
2161 case P_CSUM_RS_REQUEST:
2162 case P_OV_REQUEST:
2163 drbd_send_ack_rp(mdev, P_NEG_RS_DREPLY , p);
2164 break;
2165 case P_OV_REPLY:
2166 verb = 0;
2167 dec_rs_pending(mdev);
2168 drbd_send_ack_ex(mdev, P_OV_RESULT, sector, size, ID_IN_SYNC);
2169 break;
2170 default:
2171 dev_err(DEV, "unexpected command (%s) in receive_DataRequest\n",
2172 cmdname(cmd));
2173 }
2174 if (verb && __ratelimit(&drbd_ratelimit_state))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002175 dev_err(DEV, "Can not satisfy peer's read request, "
2176 "no local data.\n");
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002177
Lars Ellenberga821cc42010-09-06 12:31:37 +02002178 /* drain possibly payload */
2179 return drbd_drain_block(mdev, digest_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002180 }
2181
2182 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
2183 * "criss-cross" setup, that might cause write-out on some other DRBD,
2184 * which in turn might block on the other node at this very place. */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002185 peer_req = drbd_alloc_ee(mdev, p->block_id, sector, size, GFP_NOIO);
2186 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002187 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002188 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002189 }
2190
Philipp Reisner02918be2010-08-20 14:35:10 +02002191 switch (cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002192 case P_DATA_REQUEST:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002193 peer_req->w.cb = w_e_end_data_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002194 fault_type = DRBD_FAULT_DT_RD;
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002195 /* application IO, don't drbd_rs_begin_io */
2196 goto submit;
2197
Philipp Reisnerb411b362009-09-25 16:07:19 -07002198 case P_RS_DATA_REQUEST:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002199 peer_req->w.cb = w_e_end_rsdata_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002200 fault_type = DRBD_FAULT_RS_RD;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002201 /* used in the sector offset progress display */
2202 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002203 break;
2204
2205 case P_OV_REPLY:
2206 case P_CSUM_RS_REQUEST:
2207 fault_type = DRBD_FAULT_RS_RD;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002208 di = kmalloc(sizeof(*di) + digest_size, GFP_NOIO);
2209 if (!di)
2210 goto out_free_e;
2211
2212 di->digest_size = digest_size;
2213 di->digest = (((char *)di)+sizeof(struct digest_info));
2214
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002215 peer_req->digest = di;
2216 peer_req->flags |= EE_HAS_DIGEST;
Lars Ellenbergc36c3ce2010-08-11 20:42:55 +02002217
Philipp Reisnerde0ff332011-02-07 16:56:20 +01002218 if (drbd_recv(mdev->tconn, di->digest, digest_size) != digest_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002219 goto out_free_e;
2220
Philipp Reisner02918be2010-08-20 14:35:10 +02002221 if (cmd == P_CSUM_RS_REQUEST) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002222 D_ASSERT(mdev->tconn->agreed_pro_version >= 89);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002223 peer_req->w.cb = w_e_end_csum_rs_req;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002224 /* used in the sector offset progress display */
2225 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
Philipp Reisner02918be2010-08-20 14:35:10 +02002226 } else if (cmd == P_OV_REPLY) {
Lars Ellenberg2649f082010-11-05 10:05:47 +01002227 /* track progress, we may need to throttle */
2228 atomic_add(size >> 9, &mdev->rs_sect_in);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002229 peer_req->w.cb = w_e_end_ov_reply;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002230 dec_rs_pending(mdev);
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002231 /* drbd_rs_begin_io done when we sent this request,
2232 * but accounting still needs to be done. */
2233 goto submit_for_resync;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002234 }
2235 break;
2236
2237 case P_OV_REQUEST:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002238 if (mdev->ov_start_sector == ~(sector_t)0 &&
Philipp Reisner31890f42011-01-19 14:12:51 +01002239 mdev->tconn->agreed_pro_version >= 90) {
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002240 unsigned long now = jiffies;
2241 int i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002242 mdev->ov_start_sector = sector;
2243 mdev->ov_position = sector;
Lars Ellenberg30b743a2010-11-05 09:39:06 +01002244 mdev->ov_left = drbd_bm_bits(mdev) - BM_SECT_TO_BIT(sector);
2245 mdev->rs_total = mdev->ov_left;
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002246 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
2247 mdev->rs_mark_left[i] = mdev->ov_left;
2248 mdev->rs_mark_time[i] = now;
2249 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002250 dev_info(DEV, "Online Verify start sector: %llu\n",
2251 (unsigned long long)sector);
2252 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002253 peer_req->w.cb = w_e_end_ov_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002254 fault_type = DRBD_FAULT_RS_RD;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002255 break;
2256
Philipp Reisnerb411b362009-09-25 16:07:19 -07002257 default:
2258 dev_err(DEV, "unexpected command (%s) in receive_DataRequest\n",
Philipp Reisner02918be2010-08-20 14:35:10 +02002259 cmdname(cmd));
Philipp Reisnerb411b362009-09-25 16:07:19 -07002260 fault_type = DRBD_FAULT_MAX;
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002261 goto out_free_e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002262 }
2263
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002264 /* Throttle, drbd_rs_begin_io and submit should become asynchronous
2265 * wrt the receiver, but it is not as straightforward as it may seem.
2266 * Various places in the resync start and stop logic assume resync
2267 * requests are processed in order, requeuing this on the worker thread
2268 * introduces a bunch of new code for synchronization between threads.
2269 *
2270 * Unlimited throttling before drbd_rs_begin_io may stall the resync
2271 * "forever", throttling after drbd_rs_begin_io will lock that extent
2272 * for application writes for the same time. For now, just throttle
2273 * here, where the rest of the code expects the receiver to sleep for
2274 * a while, anyways.
2275 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002276
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002277 /* Throttle before drbd_rs_begin_io, as that locks out application IO;
2278 * this defers syncer requests for some time, before letting at least
2279 * on request through. The resync controller on the receiving side
2280 * will adapt to the incoming rate accordingly.
2281 *
2282 * We cannot throttle here if remote is Primary/SyncTarget:
2283 * we would also throttle its application reads.
2284 * In that case, throttling is done on the SyncTarget only.
2285 */
Philipp Reisnere3555d82010-11-07 15:56:29 +01002286 if (mdev->state.peer != R_PRIMARY && drbd_rs_should_slow_down(mdev, sector))
2287 schedule_timeout_uninterruptible(HZ/10);
2288 if (drbd_rs_begin_io(mdev, sector))
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002289 goto out_free_e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002290
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002291submit_for_resync:
2292 atomic_add(size >> 9, &mdev->rs_sect_ev);
2293
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002294submit:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002295 inc_unacked(mdev);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002296 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002297 list_add_tail(&peer_req->w.list, &mdev->read_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002298 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002299
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01002300 if (drbd_submit_peer_request(mdev, peer_req, READ, fault_type) == 0)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002301 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002302
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002303 /* don't care for the reason here */
2304 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01002305 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002306 list_del(&peer_req->w.list);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002307 spin_unlock_irq(&mdev->tconn->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002308 /* no drbd_rs_complete_io(), we are dropping the connection anyways */
2309
Philipp Reisnerb411b362009-09-25 16:07:19 -07002310out_free_e:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002311 put_ldev(mdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002312 drbd_free_ee(mdev, peer_req);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002313 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002314}
2315
2316static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local)
2317{
2318 int self, peer, rv = -100;
2319 unsigned long ch_self, ch_peer;
2320
2321 self = mdev->ldev->md.uuid[UI_BITMAP] & 1;
2322 peer = mdev->p_uuid[UI_BITMAP] & 1;
2323
2324 ch_peer = mdev->p_uuid[UI_SIZE];
2325 ch_self = mdev->comm_bm_set;
2326
Philipp Reisner89e58e72011-01-19 13:12:45 +01002327 switch (mdev->tconn->net_conf->after_sb_0p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002328 case ASB_CONSENSUS:
2329 case ASB_DISCARD_SECONDARY:
2330 case ASB_CALL_HELPER:
2331 dev_err(DEV, "Configuration error.\n");
2332 break;
2333 case ASB_DISCONNECT:
2334 break;
2335 case ASB_DISCARD_YOUNGER_PRI:
2336 if (self == 0 && peer == 1) {
2337 rv = -1;
2338 break;
2339 }
2340 if (self == 1 && peer == 0) {
2341 rv = 1;
2342 break;
2343 }
2344 /* Else fall through to one of the other strategies... */
2345 case ASB_DISCARD_OLDER_PRI:
2346 if (self == 0 && peer == 1) {
2347 rv = 1;
2348 break;
2349 }
2350 if (self == 1 && peer == 0) {
2351 rv = -1;
2352 break;
2353 }
2354 /* Else fall through to one of the other strategies... */
Lars Ellenbergad19bf62009-10-14 09:36:49 +02002355 dev_warn(DEV, "Discard younger/older primary did not find a decision\n"
Philipp Reisnerb411b362009-09-25 16:07:19 -07002356 "Using discard-least-changes instead\n");
2357 case ASB_DISCARD_ZERO_CHG:
2358 if (ch_peer == 0 && ch_self == 0) {
Philipp Reisner25703f82011-02-07 14:35:25 +01002359 rv = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002360 ? -1 : 1;
2361 break;
2362 } else {
2363 if (ch_peer == 0) { rv = 1; break; }
2364 if (ch_self == 0) { rv = -1; break; }
2365 }
Philipp Reisner89e58e72011-01-19 13:12:45 +01002366 if (mdev->tconn->net_conf->after_sb_0p == ASB_DISCARD_ZERO_CHG)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002367 break;
2368 case ASB_DISCARD_LEAST_CHG:
2369 if (ch_self < ch_peer)
2370 rv = -1;
2371 else if (ch_self > ch_peer)
2372 rv = 1;
2373 else /* ( ch_self == ch_peer ) */
2374 /* Well, then use something else. */
Philipp Reisner25703f82011-02-07 14:35:25 +01002375 rv = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002376 ? -1 : 1;
2377 break;
2378 case ASB_DISCARD_LOCAL:
2379 rv = -1;
2380 break;
2381 case ASB_DISCARD_REMOTE:
2382 rv = 1;
2383 }
2384
2385 return rv;
2386}
2387
2388static int drbd_asb_recover_1p(struct drbd_conf *mdev) __must_hold(local)
2389{
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002390 int hg, rv = -100;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002391
Philipp Reisner89e58e72011-01-19 13:12:45 +01002392 switch (mdev->tconn->net_conf->after_sb_1p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002393 case ASB_DISCARD_YOUNGER_PRI:
2394 case ASB_DISCARD_OLDER_PRI:
2395 case ASB_DISCARD_LEAST_CHG:
2396 case ASB_DISCARD_LOCAL:
2397 case ASB_DISCARD_REMOTE:
2398 dev_err(DEV, "Configuration error.\n");
2399 break;
2400 case ASB_DISCONNECT:
2401 break;
2402 case ASB_CONSENSUS:
2403 hg = drbd_asb_recover_0p(mdev);
2404 if (hg == -1 && mdev->state.role == R_SECONDARY)
2405 rv = hg;
2406 if (hg == 1 && mdev->state.role == R_PRIMARY)
2407 rv = hg;
2408 break;
2409 case ASB_VIOLENTLY:
2410 rv = drbd_asb_recover_0p(mdev);
2411 break;
2412 case ASB_DISCARD_SECONDARY:
2413 return mdev->state.role == R_PRIMARY ? 1 : -1;
2414 case ASB_CALL_HELPER:
2415 hg = drbd_asb_recover_0p(mdev);
2416 if (hg == -1 && mdev->state.role == R_PRIMARY) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002417 enum drbd_state_rv rv2;
2418
2419 drbd_set_role(mdev, R_SECONDARY, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002420 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2421 * we might be here in C_WF_REPORT_PARAMS which is transient.
2422 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002423 rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
2424 if (rv2 != SS_SUCCESS) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002425 drbd_khelper(mdev, "pri-lost-after-sb");
2426 } else {
2427 dev_warn(DEV, "Successfully gave up primary role.\n");
2428 rv = hg;
2429 }
2430 } else
2431 rv = hg;
2432 }
2433
2434 return rv;
2435}
2436
2437static int drbd_asb_recover_2p(struct drbd_conf *mdev) __must_hold(local)
2438{
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002439 int hg, rv = -100;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002440
Philipp Reisner89e58e72011-01-19 13:12:45 +01002441 switch (mdev->tconn->net_conf->after_sb_2p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002442 case ASB_DISCARD_YOUNGER_PRI:
2443 case ASB_DISCARD_OLDER_PRI:
2444 case ASB_DISCARD_LEAST_CHG:
2445 case ASB_DISCARD_LOCAL:
2446 case ASB_DISCARD_REMOTE:
2447 case ASB_CONSENSUS:
2448 case ASB_DISCARD_SECONDARY:
2449 dev_err(DEV, "Configuration error.\n");
2450 break;
2451 case ASB_VIOLENTLY:
2452 rv = drbd_asb_recover_0p(mdev);
2453 break;
2454 case ASB_DISCONNECT:
2455 break;
2456 case ASB_CALL_HELPER:
2457 hg = drbd_asb_recover_0p(mdev);
2458 if (hg == -1) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002459 enum drbd_state_rv rv2;
2460
Philipp Reisnerb411b362009-09-25 16:07:19 -07002461 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2462 * we might be here in C_WF_REPORT_PARAMS which is transient.
2463 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002464 rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
2465 if (rv2 != SS_SUCCESS) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002466 drbd_khelper(mdev, "pri-lost-after-sb");
2467 } else {
2468 dev_warn(DEV, "Successfully gave up primary role.\n");
2469 rv = hg;
2470 }
2471 } else
2472 rv = hg;
2473 }
2474
2475 return rv;
2476}
2477
2478static void drbd_uuid_dump(struct drbd_conf *mdev, char *text, u64 *uuid,
2479 u64 bits, u64 flags)
2480{
2481 if (!uuid) {
2482 dev_info(DEV, "%s uuid info vanished while I was looking!\n", text);
2483 return;
2484 }
2485 dev_info(DEV, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n",
2486 text,
2487 (unsigned long long)uuid[UI_CURRENT],
2488 (unsigned long long)uuid[UI_BITMAP],
2489 (unsigned long long)uuid[UI_HISTORY_START],
2490 (unsigned long long)uuid[UI_HISTORY_END],
2491 (unsigned long long)bits,
2492 (unsigned long long)flags);
2493}
2494
2495/*
2496 100 after split brain try auto recover
2497 2 C_SYNC_SOURCE set BitMap
2498 1 C_SYNC_SOURCE use BitMap
2499 0 no Sync
2500 -1 C_SYNC_TARGET use BitMap
2501 -2 C_SYNC_TARGET set BitMap
2502 -100 after split brain, disconnect
2503-1000 unrelated data
Philipp Reisner4a23f262011-01-11 17:42:17 +01002504-1091 requires proto 91
2505-1096 requires proto 96
Philipp Reisnerb411b362009-09-25 16:07:19 -07002506 */
2507static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(local)
2508{
2509 u64 self, peer;
2510 int i, j;
2511
2512 self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2513 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
2514
2515 *rule_nr = 10;
2516 if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED)
2517 return 0;
2518
2519 *rule_nr = 20;
2520 if ((self == UUID_JUST_CREATED || self == (u64)0) &&
2521 peer != UUID_JUST_CREATED)
2522 return -2;
2523
2524 *rule_nr = 30;
2525 if (self != UUID_JUST_CREATED &&
2526 (peer == UUID_JUST_CREATED || peer == (u64)0))
2527 return 2;
2528
2529 if (self == peer) {
2530 int rct, dc; /* roles at crash time */
2531
2532 if (mdev->p_uuid[UI_BITMAP] == (u64)0 && mdev->ldev->md.uuid[UI_BITMAP] != (u64)0) {
2533
Philipp Reisner31890f42011-01-19 14:12:51 +01002534 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002535 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002536
2537 if ((mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
2538 (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
2539 dev_info(DEV, "was SyncSource, missed the resync finished event, corrected myself:\n");
2540 drbd_uuid_set_bm(mdev, 0UL);
2541
2542 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
2543 mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
2544 *rule_nr = 34;
2545 } else {
2546 dev_info(DEV, "was SyncSource (peer failed to write sync_uuid)\n");
2547 *rule_nr = 36;
2548 }
2549
2550 return 1;
2551 }
2552
2553 if (mdev->ldev->md.uuid[UI_BITMAP] == (u64)0 && mdev->p_uuid[UI_BITMAP] != (u64)0) {
2554
Philipp Reisner31890f42011-01-19 14:12:51 +01002555 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002556 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002557
2558 if ((mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_BITMAP] & ~((u64)1)) &&
2559 (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
2560 dev_info(DEV, "was SyncTarget, peer missed the resync finished event, corrected peer:\n");
2561
2562 mdev->p_uuid[UI_HISTORY_START + 1] = mdev->p_uuid[UI_HISTORY_START];
2563 mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_BITMAP];
2564 mdev->p_uuid[UI_BITMAP] = 0UL;
2565
2566 drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2567 *rule_nr = 35;
2568 } else {
2569 dev_info(DEV, "was SyncTarget (failed to write sync_uuid)\n");
2570 *rule_nr = 37;
2571 }
2572
2573 return -1;
2574 }
2575
2576 /* Common power [off|failure] */
2577 rct = (test_bit(CRASHED_PRIMARY, &mdev->flags) ? 1 : 0) +
2578 (mdev->p_uuid[UI_FLAGS] & 2);
2579 /* lowest bit is set when we were primary,
2580 * next bit (weight 2) is set when peer was primary */
2581 *rule_nr = 40;
2582
2583 switch (rct) {
2584 case 0: /* !self_pri && !peer_pri */ return 0;
2585 case 1: /* self_pri && !peer_pri */ return 1;
2586 case 2: /* !self_pri && peer_pri */ return -1;
2587 case 3: /* self_pri && peer_pri */
Philipp Reisner25703f82011-02-07 14:35:25 +01002588 dc = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002589 return dc ? -1 : 1;
2590 }
2591 }
2592
2593 *rule_nr = 50;
2594 peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1);
2595 if (self == peer)
2596 return -1;
2597
2598 *rule_nr = 51;
2599 peer = mdev->p_uuid[UI_HISTORY_START] & ~((u64)1);
2600 if (self == peer) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002601 if (mdev->tconn->agreed_pro_version < 96 ?
Philipp Reisner4a23f262011-01-11 17:42:17 +01002602 (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
2603 (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
2604 peer + UUID_NEW_BM_OFFSET == (mdev->p_uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002605 /* The last P_SYNC_UUID did not get though. Undo the last start of
2606 resync as sync source modifications of the peer's UUIDs. */
2607
Philipp Reisner31890f42011-01-19 14:12:51 +01002608 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002609 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002610
2611 mdev->p_uuid[UI_BITMAP] = mdev->p_uuid[UI_HISTORY_START];
2612 mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_HISTORY_START + 1];
Philipp Reisner4a23f262011-01-11 17:42:17 +01002613
2614 dev_info(DEV, "Did not got last syncUUID packet, corrected:\n");
2615 drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2616
Philipp Reisnerb411b362009-09-25 16:07:19 -07002617 return -1;
2618 }
2619 }
2620
2621 *rule_nr = 60;
2622 self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2623 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2624 peer = mdev->p_uuid[i] & ~((u64)1);
2625 if (self == peer)
2626 return -2;
2627 }
2628
2629 *rule_nr = 70;
2630 self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2631 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
2632 if (self == peer)
2633 return 1;
2634
2635 *rule_nr = 71;
2636 self = mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
2637 if (self == peer) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002638 if (mdev->tconn->agreed_pro_version < 96 ?
Philipp Reisner4a23f262011-01-11 17:42:17 +01002639 (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
2640 (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
2641 self + UUID_NEW_BM_OFFSET == (mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002642 /* The last P_SYNC_UUID did not get though. Undo the last start of
2643 resync as sync source modifications of our UUIDs. */
2644
Philipp Reisner31890f42011-01-19 14:12:51 +01002645 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002646 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002647
2648 _drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_HISTORY_START]);
2649 _drbd_uuid_set(mdev, UI_HISTORY_START, mdev->ldev->md.uuid[UI_HISTORY_START + 1]);
2650
Philipp Reisner4a23f262011-01-11 17:42:17 +01002651 dev_info(DEV, "Last syncUUID did not get through, corrected:\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002652 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
2653 mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
2654
2655 return 1;
2656 }
2657 }
2658
2659
2660 *rule_nr = 80;
Philipp Reisnerd8c2a362009-11-18 15:52:51 +01002661 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002662 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2663 self = mdev->ldev->md.uuid[i] & ~((u64)1);
2664 if (self == peer)
2665 return 2;
2666 }
2667
2668 *rule_nr = 90;
2669 self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2670 peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1);
2671 if (self == peer && self != ((u64)0))
2672 return 100;
2673
2674 *rule_nr = 100;
2675 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2676 self = mdev->ldev->md.uuid[i] & ~((u64)1);
2677 for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) {
2678 peer = mdev->p_uuid[j] & ~((u64)1);
2679 if (self == peer)
2680 return -100;
2681 }
2682 }
2683
2684 return -1000;
2685}
2686
2687/* drbd_sync_handshake() returns the new conn state on success, or
2688 CONN_MASK (-1) on failure.
2689 */
2690static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_role peer_role,
2691 enum drbd_disk_state peer_disk) __must_hold(local)
2692{
2693 int hg, rule_nr;
2694 enum drbd_conns rv = C_MASK;
2695 enum drbd_disk_state mydisk;
2696
2697 mydisk = mdev->state.disk;
2698 if (mydisk == D_NEGOTIATING)
2699 mydisk = mdev->new_state_tmp.disk;
2700
2701 dev_info(DEV, "drbd_sync_handshake:\n");
2702 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid, mdev->comm_bm_set, 0);
2703 drbd_uuid_dump(mdev, "peer", mdev->p_uuid,
2704 mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2705
2706 hg = drbd_uuid_compare(mdev, &rule_nr);
2707
2708 dev_info(DEV, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
2709
2710 if (hg == -1000) {
2711 dev_alert(DEV, "Unrelated data, aborting!\n");
2712 return C_MASK;
2713 }
Philipp Reisner4a23f262011-01-11 17:42:17 +01002714 if (hg < -1000) {
2715 dev_alert(DEV, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002716 return C_MASK;
2717 }
2718
2719 if ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) ||
2720 (peer_disk == D_INCONSISTENT && mydisk > D_INCONSISTENT)) {
2721 int f = (hg == -100) || abs(hg) == 2;
2722 hg = mydisk > D_INCONSISTENT ? 1 : -1;
2723 if (f)
2724 hg = hg*2;
2725 dev_info(DEV, "Becoming sync %s due to disk states.\n",
2726 hg > 0 ? "source" : "target");
2727 }
2728
Adam Gandelman3a11a482010-04-08 16:48:23 -07002729 if (abs(hg) == 100)
2730 drbd_khelper(mdev, "initial-split-brain");
2731
Philipp Reisner89e58e72011-01-19 13:12:45 +01002732 if (hg == 100 || (hg == -100 && mdev->tconn->net_conf->always_asbp)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002733 int pcount = (mdev->state.role == R_PRIMARY)
2734 + (peer_role == R_PRIMARY);
2735 int forced = (hg == -100);
2736
2737 switch (pcount) {
2738 case 0:
2739 hg = drbd_asb_recover_0p(mdev);
2740 break;
2741 case 1:
2742 hg = drbd_asb_recover_1p(mdev);
2743 break;
2744 case 2:
2745 hg = drbd_asb_recover_2p(mdev);
2746 break;
2747 }
2748 if (abs(hg) < 100) {
2749 dev_warn(DEV, "Split-Brain detected, %d primaries, "
2750 "automatically solved. Sync from %s node\n",
2751 pcount, (hg < 0) ? "peer" : "this");
2752 if (forced) {
2753 dev_warn(DEV, "Doing a full sync, since"
2754 " UUIDs where ambiguous.\n");
2755 hg = hg*2;
2756 }
2757 }
2758 }
2759
2760 if (hg == -100) {
Philipp Reisner89e58e72011-01-19 13:12:45 +01002761 if (mdev->tconn->net_conf->want_lose && !(mdev->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002762 hg = -1;
Philipp Reisner89e58e72011-01-19 13:12:45 +01002763 if (!mdev->tconn->net_conf->want_lose && (mdev->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002764 hg = 1;
2765
2766 if (abs(hg) < 100)
2767 dev_warn(DEV, "Split-Brain detected, manually solved. "
2768 "Sync from %s node\n",
2769 (hg < 0) ? "peer" : "this");
2770 }
2771
2772 if (hg == -100) {
Lars Ellenberg580b9762010-02-26 23:15:23 +01002773 /* FIXME this log message is not correct if we end up here
2774 * after an attempted attach on a diskless node.
2775 * We just refuse to attach -- well, we drop the "connection"
2776 * to that disk, in a way... */
Adam Gandelman3a11a482010-04-08 16:48:23 -07002777 dev_alert(DEV, "Split-Brain detected but unresolved, dropping connection!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002778 drbd_khelper(mdev, "split-brain");
2779 return C_MASK;
2780 }
2781
2782 if (hg > 0 && mydisk <= D_INCONSISTENT) {
2783 dev_err(DEV, "I shall become SyncSource, but I am inconsistent!\n");
2784 return C_MASK;
2785 }
2786
2787 if (hg < 0 && /* by intention we do not use mydisk here. */
2788 mdev->state.role == R_PRIMARY && mdev->state.disk >= D_CONSISTENT) {
Philipp Reisner89e58e72011-01-19 13:12:45 +01002789 switch (mdev->tconn->net_conf->rr_conflict) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002790 case ASB_CALL_HELPER:
2791 drbd_khelper(mdev, "pri-lost");
2792 /* fall through */
2793 case ASB_DISCONNECT:
2794 dev_err(DEV, "I shall become SyncTarget, but I am primary!\n");
2795 return C_MASK;
2796 case ASB_VIOLENTLY:
2797 dev_warn(DEV, "Becoming SyncTarget, violating the stable-data"
2798 "assumption\n");
2799 }
2800 }
2801
Philipp Reisner89e58e72011-01-19 13:12:45 +01002802 if (mdev->tconn->net_conf->dry_run || test_bit(CONN_DRY_RUN, &mdev->flags)) {
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002803 if (hg == 0)
2804 dev_info(DEV, "dry-run connect: No resync, would become Connected immediately.\n");
2805 else
2806 dev_info(DEV, "dry-run connect: Would become %s, doing a %s resync.",
2807 drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET),
2808 abs(hg) >= 2 ? "full" : "bit-map based");
2809 return C_MASK;
2810 }
2811
Philipp Reisnerb411b362009-09-25 16:07:19 -07002812 if (abs(hg) >= 2) {
2813 dev_info(DEV, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01002814 if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
2815 BM_LOCKED_SET_ALLOWED))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002816 return C_MASK;
2817 }
2818
2819 if (hg > 0) { /* become sync source. */
2820 rv = C_WF_BITMAP_S;
2821 } else if (hg < 0) { /* become sync target */
2822 rv = C_WF_BITMAP_T;
2823 } else {
2824 rv = C_CONNECTED;
2825 if (drbd_bm_total_weight(mdev)) {
2826 dev_info(DEV, "No resync, but %lu bits in bitmap!\n",
2827 drbd_bm_total_weight(mdev));
2828 }
2829 }
2830
2831 return rv;
2832}
2833
2834/* returns 1 if invalid */
2835static int cmp_after_sb(enum drbd_after_sb_p peer, enum drbd_after_sb_p self)
2836{
2837 /* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */
2838 if ((peer == ASB_DISCARD_REMOTE && self == ASB_DISCARD_LOCAL) ||
2839 (self == ASB_DISCARD_REMOTE && peer == ASB_DISCARD_LOCAL))
2840 return 0;
2841
2842 /* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */
2843 if (peer == ASB_DISCARD_REMOTE || peer == ASB_DISCARD_LOCAL ||
2844 self == ASB_DISCARD_REMOTE || self == ASB_DISCARD_LOCAL)
2845 return 1;
2846
2847 /* everything else is valid if they are equal on both sides. */
2848 if (peer == self)
2849 return 0;
2850
2851 /* everything es is invalid. */
2852 return 1;
2853}
2854
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01002855static int receive_protocol(struct drbd_conf *mdev, enum drbd_packet cmd,
2856 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002857{
Philipp Reisnere42325a2011-01-19 13:55:45 +01002858 struct p_protocol *p = &mdev->tconn->data.rbuf.protocol;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002859 int p_proto, p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002860 int p_want_lose, p_two_primaries, cf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002861 char p_integrity_alg[SHARED_SECRET_MAX] = "";
2862
Philipp Reisnerb411b362009-09-25 16:07:19 -07002863 p_proto = be32_to_cpu(p->protocol);
2864 p_after_sb_0p = be32_to_cpu(p->after_sb_0p);
2865 p_after_sb_1p = be32_to_cpu(p->after_sb_1p);
2866 p_after_sb_2p = be32_to_cpu(p->after_sb_2p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002867 p_two_primaries = be32_to_cpu(p->two_primaries);
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002868 cf = be32_to_cpu(p->conn_flags);
2869 p_want_lose = cf & CF_WANT_LOSE;
2870
2871 clear_bit(CONN_DRY_RUN, &mdev->flags);
2872
2873 if (cf & CF_DRY_RUN)
2874 set_bit(CONN_DRY_RUN, &mdev->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002875
Philipp Reisner89e58e72011-01-19 13:12:45 +01002876 if (p_proto != mdev->tconn->net_conf->wire_protocol) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002877 dev_err(DEV, "incompatible communication protocols\n");
2878 goto disconnect;
2879 }
2880
Philipp Reisner89e58e72011-01-19 13:12:45 +01002881 if (cmp_after_sb(p_after_sb_0p, mdev->tconn->net_conf->after_sb_0p)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002882 dev_err(DEV, "incompatible after-sb-0pri settings\n");
2883 goto disconnect;
2884 }
2885
Philipp Reisner89e58e72011-01-19 13:12:45 +01002886 if (cmp_after_sb(p_after_sb_1p, mdev->tconn->net_conf->after_sb_1p)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002887 dev_err(DEV, "incompatible after-sb-1pri settings\n");
2888 goto disconnect;
2889 }
2890
Philipp Reisner89e58e72011-01-19 13:12:45 +01002891 if (cmp_after_sb(p_after_sb_2p, mdev->tconn->net_conf->after_sb_2p)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002892 dev_err(DEV, "incompatible after-sb-2pri settings\n");
2893 goto disconnect;
2894 }
2895
Philipp Reisner89e58e72011-01-19 13:12:45 +01002896 if (p_want_lose && mdev->tconn->net_conf->want_lose) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002897 dev_err(DEV, "both sides have the 'want_lose' flag set\n");
2898 goto disconnect;
2899 }
2900
Philipp Reisner89e58e72011-01-19 13:12:45 +01002901 if (p_two_primaries != mdev->tconn->net_conf->two_primaries) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002902 dev_err(DEV, "incompatible setting of the two-primaries options\n");
2903 goto disconnect;
2904 }
2905
Philipp Reisner31890f42011-01-19 14:12:51 +01002906 if (mdev->tconn->agreed_pro_version >= 87) {
Philipp Reisner89e58e72011-01-19 13:12:45 +01002907 unsigned char *my_alg = mdev->tconn->net_conf->integrity_alg;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002908
Philipp Reisnerde0ff332011-02-07 16:56:20 +01002909 if (drbd_recv(mdev->tconn, p_integrity_alg, data_size) != data_size)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002910 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002911
2912 p_integrity_alg[SHARED_SECRET_MAX-1] = 0;
2913 if (strcmp(p_integrity_alg, my_alg)) {
2914 dev_err(DEV, "incompatible setting of the data-integrity-alg\n");
2915 goto disconnect;
2916 }
2917 dev_info(DEV, "data-integrity-alg: %s\n",
2918 my_alg[0] ? my_alg : (unsigned char *)"<not-used>");
2919 }
2920
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002921 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002922
2923disconnect:
2924 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002925 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002926}
2927
2928/* helper function
2929 * input: alg name, feature name
2930 * return: NULL (alg name was "")
2931 * ERR_PTR(error) if something goes wrong
2932 * or the crypto hash ptr, if it worked out ok. */
2933struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_conf *mdev,
2934 const char *alg, const char *name)
2935{
2936 struct crypto_hash *tfm;
2937
2938 if (!alg[0])
2939 return NULL;
2940
2941 tfm = crypto_alloc_hash(alg, 0, CRYPTO_ALG_ASYNC);
2942 if (IS_ERR(tfm)) {
2943 dev_err(DEV, "Can not allocate \"%s\" as %s (reason: %ld)\n",
2944 alg, name, PTR_ERR(tfm));
2945 return tfm;
2946 }
2947 if (!drbd_crypto_is_hash(crypto_hash_tfm(tfm))) {
2948 crypto_free_hash(tfm);
2949 dev_err(DEV, "\"%s\" is not a digest (%s)\n", alg, name);
2950 return ERR_PTR(-EINVAL);
2951 }
2952 return tfm;
2953}
2954
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01002955static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packet cmd,
2956 unsigned int packet_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002957{
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002958 int ok = true;
Philipp Reisnere42325a2011-01-19 13:55:45 +01002959 struct p_rs_param_95 *p = &mdev->tconn->data.rbuf.rs_param_95;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002960 unsigned int header_size, data_size, exp_max_sz;
2961 struct crypto_hash *verify_tfm = NULL;
2962 struct crypto_hash *csums_tfm = NULL;
Philipp Reisner31890f42011-01-19 14:12:51 +01002963 const int apv = mdev->tconn->agreed_pro_version;
Philipp Reisner778f2712010-07-06 11:14:00 +02002964 int *rs_plan_s = NULL;
2965 int fifo_size = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002966
2967 exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param)
2968 : apv == 88 ? sizeof(struct p_rs_param)
2969 + SHARED_SECRET_MAX
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002970 : apv <= 94 ? sizeof(struct p_rs_param_89)
2971 : /* apv >= 95 */ sizeof(struct p_rs_param_95);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002972
Philipp Reisner02918be2010-08-20 14:35:10 +02002973 if (packet_size > exp_max_sz) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002974 dev_err(DEV, "SyncParam packet too long: received %u, expected <= %u bytes\n",
Philipp Reisner02918be2010-08-20 14:35:10 +02002975 packet_size, exp_max_sz);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002976 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002977 }
2978
2979 if (apv <= 88) {
Philipp Reisner257d0af2011-01-26 12:15:29 +01002980 header_size = sizeof(struct p_rs_param) - sizeof(struct p_header);
Philipp Reisner02918be2010-08-20 14:35:10 +02002981 data_size = packet_size - header_size;
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002982 } else if (apv <= 94) {
Philipp Reisner257d0af2011-01-26 12:15:29 +01002983 header_size = sizeof(struct p_rs_param_89) - sizeof(struct p_header);
Philipp Reisner02918be2010-08-20 14:35:10 +02002984 data_size = packet_size - header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002985 D_ASSERT(data_size == 0);
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002986 } else {
Philipp Reisner257d0af2011-01-26 12:15:29 +01002987 header_size = sizeof(struct p_rs_param_95) - sizeof(struct p_header);
Philipp Reisner02918be2010-08-20 14:35:10 +02002988 data_size = packet_size - header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002989 D_ASSERT(data_size == 0);
2990 }
2991
2992 /* initialize verify_alg and csums_alg */
2993 memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
2994
Philipp Reisnerde0ff332011-02-07 16:56:20 +01002995 if (drbd_recv(mdev->tconn, &p->head.payload, header_size) != header_size)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002996 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002997
2998 mdev->sync_conf.rate = be32_to_cpu(p->rate);
2999
3000 if (apv >= 88) {
3001 if (apv == 88) {
3002 if (data_size > SHARED_SECRET_MAX) {
3003 dev_err(DEV, "verify-alg too long, "
3004 "peer wants %u, accepting only %u byte\n",
3005 data_size, SHARED_SECRET_MAX);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003006 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003007 }
3008
Philipp Reisnerde0ff332011-02-07 16:56:20 +01003009 if (drbd_recv(mdev->tconn, p->verify_alg, data_size) != data_size)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003010 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003011
3012 /* we expect NUL terminated string */
3013 /* but just in case someone tries to be evil */
3014 D_ASSERT(p->verify_alg[data_size-1] == 0);
3015 p->verify_alg[data_size-1] = 0;
3016
3017 } else /* apv >= 89 */ {
3018 /* we still expect NUL terminated strings */
3019 /* but just in case someone tries to be evil */
3020 D_ASSERT(p->verify_alg[SHARED_SECRET_MAX-1] == 0);
3021 D_ASSERT(p->csums_alg[SHARED_SECRET_MAX-1] == 0);
3022 p->verify_alg[SHARED_SECRET_MAX-1] = 0;
3023 p->csums_alg[SHARED_SECRET_MAX-1] = 0;
3024 }
3025
3026 if (strcmp(mdev->sync_conf.verify_alg, p->verify_alg)) {
3027 if (mdev->state.conn == C_WF_REPORT_PARAMS) {
3028 dev_err(DEV, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
3029 mdev->sync_conf.verify_alg, p->verify_alg);
3030 goto disconnect;
3031 }
3032 verify_tfm = drbd_crypto_alloc_digest_safe(mdev,
3033 p->verify_alg, "verify-alg");
3034 if (IS_ERR(verify_tfm)) {
3035 verify_tfm = NULL;
3036 goto disconnect;
3037 }
3038 }
3039
3040 if (apv >= 89 && strcmp(mdev->sync_conf.csums_alg, p->csums_alg)) {
3041 if (mdev->state.conn == C_WF_REPORT_PARAMS) {
3042 dev_err(DEV, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
3043 mdev->sync_conf.csums_alg, p->csums_alg);
3044 goto disconnect;
3045 }
3046 csums_tfm = drbd_crypto_alloc_digest_safe(mdev,
3047 p->csums_alg, "csums-alg");
3048 if (IS_ERR(csums_tfm)) {
3049 csums_tfm = NULL;
3050 goto disconnect;
3051 }
3052 }
3053
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003054 if (apv > 94) {
3055 mdev->sync_conf.rate = be32_to_cpu(p->rate);
3056 mdev->sync_conf.c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
3057 mdev->sync_conf.c_delay_target = be32_to_cpu(p->c_delay_target);
3058 mdev->sync_conf.c_fill_target = be32_to_cpu(p->c_fill_target);
3059 mdev->sync_conf.c_max_rate = be32_to_cpu(p->c_max_rate);
Philipp Reisner778f2712010-07-06 11:14:00 +02003060
3061 fifo_size = (mdev->sync_conf.c_plan_ahead * 10 * SLEEP_TIME) / HZ;
3062 if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) {
3063 rs_plan_s = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL);
3064 if (!rs_plan_s) {
3065 dev_err(DEV, "kmalloc of fifo_buffer failed");
3066 goto disconnect;
3067 }
3068 }
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003069 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003070
3071 spin_lock(&mdev->peer_seq_lock);
3072 /* lock against drbd_nl_syncer_conf() */
3073 if (verify_tfm) {
3074 strcpy(mdev->sync_conf.verify_alg, p->verify_alg);
3075 mdev->sync_conf.verify_alg_len = strlen(p->verify_alg) + 1;
3076 crypto_free_hash(mdev->verify_tfm);
3077 mdev->verify_tfm = verify_tfm;
3078 dev_info(DEV, "using verify-alg: \"%s\"\n", p->verify_alg);
3079 }
3080 if (csums_tfm) {
3081 strcpy(mdev->sync_conf.csums_alg, p->csums_alg);
3082 mdev->sync_conf.csums_alg_len = strlen(p->csums_alg) + 1;
3083 crypto_free_hash(mdev->csums_tfm);
3084 mdev->csums_tfm = csums_tfm;
3085 dev_info(DEV, "using csums-alg: \"%s\"\n", p->csums_alg);
3086 }
Philipp Reisner778f2712010-07-06 11:14:00 +02003087 if (fifo_size != mdev->rs_plan_s.size) {
3088 kfree(mdev->rs_plan_s.values);
3089 mdev->rs_plan_s.values = rs_plan_s;
3090 mdev->rs_plan_s.size = fifo_size;
3091 mdev->rs_planed = 0;
3092 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003093 spin_unlock(&mdev->peer_seq_lock);
3094 }
3095
3096 return ok;
3097disconnect:
3098 /* just for completeness: actually not needed,
3099 * as this is not reached if csums_tfm was ok. */
3100 crypto_free_hash(csums_tfm);
3101 /* but free the verify_tfm again, if csums_tfm did not work out */
3102 crypto_free_hash(verify_tfm);
3103 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003104 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003105}
3106
Philipp Reisnerb411b362009-09-25 16:07:19 -07003107/* warn if the arguments differ by more than 12.5% */
3108static void warn_if_differ_considerably(struct drbd_conf *mdev,
3109 const char *s, sector_t a, sector_t b)
3110{
3111 sector_t d;
3112 if (a == 0 || b == 0)
3113 return;
3114 d = (a > b) ? (a - b) : (b - a);
3115 if (d > (a>>3) || d > (b>>3))
3116 dev_warn(DEV, "Considerable difference in %s: %llus vs. %llus\n", s,
3117 (unsigned long long)a, (unsigned long long)b);
3118}
3119
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003120static int receive_sizes(struct drbd_conf *mdev, enum drbd_packet cmd,
3121 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003122{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003123 struct p_sizes *p = &mdev->tconn->data.rbuf.sizes;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003124 enum determine_dev_size dd = unchanged;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003125 sector_t p_size, p_usize, my_usize;
3126 int ldsc = 0; /* local disk size changed */
Philipp Reisnere89b5912010-03-24 17:11:33 +01003127 enum dds_flags ddsf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003128
Philipp Reisnerb411b362009-09-25 16:07:19 -07003129 p_size = be64_to_cpu(p->d_size);
3130 p_usize = be64_to_cpu(p->u_size);
3131
3132 if (p_size == 0 && mdev->state.disk == D_DISKLESS) {
3133 dev_err(DEV, "some backing storage is needed\n");
3134 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003135 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003136 }
3137
3138 /* just store the peer's disk size for now.
3139 * we still need to figure out whether we accept that. */
3140 mdev->p_size = p_size;
3141
Philipp Reisnerb411b362009-09-25 16:07:19 -07003142 if (get_ldev(mdev)) {
3143 warn_if_differ_considerably(mdev, "lower level device sizes",
3144 p_size, drbd_get_max_capacity(mdev->ldev));
3145 warn_if_differ_considerably(mdev, "user requested size",
3146 p_usize, mdev->ldev->dc.disk_size);
3147
3148 /* if this is the first connect, or an otherwise expected
3149 * param exchange, choose the minimum */
3150 if (mdev->state.conn == C_WF_REPORT_PARAMS)
3151 p_usize = min_not_zero((sector_t)mdev->ldev->dc.disk_size,
3152 p_usize);
3153
3154 my_usize = mdev->ldev->dc.disk_size;
3155
3156 if (mdev->ldev->dc.disk_size != p_usize) {
3157 mdev->ldev->dc.disk_size = p_usize;
3158 dev_info(DEV, "Peer sets u_size to %lu sectors\n",
3159 (unsigned long)mdev->ldev->dc.disk_size);
3160 }
3161
3162 /* Never shrink a device with usable data during connect.
3163 But allow online shrinking if we are connected. */
Philipp Reisnera393db62009-12-22 13:35:52 +01003164 if (drbd_new_dev_size(mdev, mdev->ldev, 0) <
Philipp Reisnerb411b362009-09-25 16:07:19 -07003165 drbd_get_capacity(mdev->this_bdev) &&
3166 mdev->state.disk >= D_OUTDATED &&
3167 mdev->state.conn < C_CONNECTED) {
3168 dev_err(DEV, "The peer's disk size is too small!\n");
3169 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
3170 mdev->ldev->dc.disk_size = my_usize;
3171 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003172 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003173 }
3174 put_ldev(mdev);
3175 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003176
Philipp Reisnere89b5912010-03-24 17:11:33 +01003177 ddsf = be16_to_cpu(p->dds_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003178 if (get_ldev(mdev)) {
Bart Van Assche24c48302011-05-21 18:32:29 +02003179 dd = drbd_determine_dev_size(mdev, ddsf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003180 put_ldev(mdev);
3181 if (dd == dev_size_error)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003182 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003183 drbd_md_sync(mdev);
3184 } else {
3185 /* I am diskless, need to accept the peer's size. */
3186 drbd_set_my_capacity(mdev, p_size);
3187 }
3188
Philipp Reisner99432fc2011-05-20 16:39:13 +02003189 mdev->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
3190 drbd_reconsider_max_bio_size(mdev);
3191
Philipp Reisnerb411b362009-09-25 16:07:19 -07003192 if (get_ldev(mdev)) {
3193 if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) {
3194 mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev);
3195 ldsc = 1;
3196 }
3197
Philipp Reisnerb411b362009-09-25 16:07:19 -07003198 put_ldev(mdev);
3199 }
3200
3201 if (mdev->state.conn > C_WF_REPORT_PARAMS) {
3202 if (be64_to_cpu(p->c_size) !=
3203 drbd_get_capacity(mdev->this_bdev) || ldsc) {
3204 /* we have different sizes, probably peer
3205 * needs to know my new size... */
Philipp Reisnere89b5912010-03-24 17:11:33 +01003206 drbd_send_sizes(mdev, 0, ddsf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003207 }
3208 if (test_and_clear_bit(RESIZE_PENDING, &mdev->flags) ||
3209 (dd == grew && mdev->state.conn == C_CONNECTED)) {
3210 if (mdev->state.pdsk >= D_INCONSISTENT &&
Philipp Reisnere89b5912010-03-24 17:11:33 +01003211 mdev->state.disk >= D_INCONSISTENT) {
3212 if (ddsf & DDSF_NO_RESYNC)
3213 dev_info(DEV, "Resync of new storage suppressed with --assume-clean\n");
3214 else
3215 resync_after_online_grow(mdev);
3216 } else
Philipp Reisnerb411b362009-09-25 16:07:19 -07003217 set_bit(RESYNC_AFTER_NEG, &mdev->flags);
3218 }
3219 }
3220
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003221 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003222}
3223
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003224static int receive_uuids(struct drbd_conf *mdev, enum drbd_packet cmd,
3225 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003226{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003227 struct p_uuids *p = &mdev->tconn->data.rbuf.uuids;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003228 u64 *p_uuid;
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003229 int i, updated_uuids = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003230
Philipp Reisnerb411b362009-09-25 16:07:19 -07003231 p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO);
3232
3233 for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
3234 p_uuid[i] = be64_to_cpu(p->uuid[i]);
3235
3236 kfree(mdev->p_uuid);
3237 mdev->p_uuid = p_uuid;
3238
3239 if (mdev->state.conn < C_CONNECTED &&
3240 mdev->state.disk < D_INCONSISTENT &&
3241 mdev->state.role == R_PRIMARY &&
3242 (mdev->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
3243 dev_err(DEV, "Can only connect to data with current UUID=%016llX\n",
3244 (unsigned long long)mdev->ed_uuid);
3245 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003246 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003247 }
3248
3249 if (get_ldev(mdev)) {
3250 int skip_initial_sync =
3251 mdev->state.conn == C_CONNECTED &&
Philipp Reisner31890f42011-01-19 14:12:51 +01003252 mdev->tconn->agreed_pro_version >= 90 &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003253 mdev->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
3254 (p_uuid[UI_FLAGS] & 8);
3255 if (skip_initial_sync) {
3256 dev_info(DEV, "Accepted new current UUID, preparing to skip initial sync\n");
3257 drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write,
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003258 "clear_n_write from receive_uuids",
3259 BM_LOCKED_TEST_ALLOWED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003260 _drbd_uuid_set(mdev, UI_CURRENT, p_uuid[UI_CURRENT]);
3261 _drbd_uuid_set(mdev, UI_BITMAP, 0);
3262 _drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
3263 CS_VERBOSE, NULL);
3264 drbd_md_sync(mdev);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003265 updated_uuids = 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003266 }
3267 put_ldev(mdev);
Philipp Reisner18a50fa2010-06-21 14:14:15 +02003268 } else if (mdev->state.disk < D_INCONSISTENT &&
3269 mdev->state.role == R_PRIMARY) {
3270 /* I am a diskless primary, the peer just created a new current UUID
3271 for me. */
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003272 updated_uuids = drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003273 }
3274
3275 /* Before we test for the disk state, we should wait until an eventually
3276 ongoing cluster wide state change is finished. That is important if
3277 we are primary and are detaching from our disk. We need to see the
3278 new disk state... */
Philipp Reisner8410da82011-02-11 20:11:10 +01003279 mutex_lock(mdev->state_mutex);
3280 mutex_unlock(mdev->state_mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003281 if (mdev->state.conn >= C_CONNECTED && mdev->state.disk < D_INCONSISTENT)
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003282 updated_uuids |= drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
3283
3284 if (updated_uuids)
3285 drbd_print_uuids(mdev, "receiver updated UUIDs to");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003286
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003287 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003288}
3289
3290/**
3291 * convert_state() - Converts the peer's view of the cluster state to our point of view
3292 * @ps: The state as seen by the peer.
3293 */
3294static union drbd_state convert_state(union drbd_state ps)
3295{
3296 union drbd_state ms;
3297
3298 static enum drbd_conns c_tab[] = {
3299 [C_CONNECTED] = C_CONNECTED,
3300
3301 [C_STARTING_SYNC_S] = C_STARTING_SYNC_T,
3302 [C_STARTING_SYNC_T] = C_STARTING_SYNC_S,
3303 [C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */
3304 [C_VERIFY_S] = C_VERIFY_T,
3305 [C_MASK] = C_MASK,
3306 };
3307
3308 ms.i = ps.i;
3309
3310 ms.conn = c_tab[ps.conn];
3311 ms.peer = ps.role;
3312 ms.role = ps.peer;
3313 ms.pdsk = ps.disk;
3314 ms.disk = ps.pdsk;
3315 ms.peer_isp = (ps.aftr_isp | ps.user_isp);
3316
3317 return ms;
3318}
3319
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003320static int receive_req_state(struct drbd_conf *mdev, enum drbd_packet cmd,
3321 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003322{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003323 struct p_req_state *p = &mdev->tconn->data.rbuf.req_state;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003324 union drbd_state mask, val;
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +01003325 enum drbd_state_rv rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003326
Philipp Reisnerb411b362009-09-25 16:07:19 -07003327 mask.i = be32_to_cpu(p->mask);
3328 val.i = be32_to_cpu(p->val);
3329
Philipp Reisner25703f82011-02-07 14:35:25 +01003330 if (test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags) &&
Philipp Reisner8410da82011-02-11 20:11:10 +01003331 mutex_is_locked(mdev->state_mutex)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003332 drbd_send_sr_reply(mdev, SS_CONCURRENT_ST_CHG);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003333 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003334 }
3335
3336 mask = convert_state(mask);
3337 val = convert_state(val);
3338
Philipp Reisner047cd4a2011-02-15 11:09:33 +01003339 if (cmd == P_CONN_ST_CHG_REQ) {
3340 rv = conn_request_state(mdev->tconn, mask, val, CS_VERBOSE | CS_LOCAL_ONLY);
3341 conn_send_sr_reply(mdev->tconn, rv);
3342 } else {
3343 rv = drbd_change_state(mdev, CS_VERBOSE, mask, val);
3344 drbd_send_sr_reply(mdev, rv);
3345 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003346
Philipp Reisnerb411b362009-09-25 16:07:19 -07003347 drbd_md_sync(mdev);
3348
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003349 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003350}
3351
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003352static int receive_state(struct drbd_conf *mdev, enum drbd_packet cmd,
3353 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003354{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003355 struct p_state *p = &mdev->tconn->data.rbuf.state;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003356 union drbd_state os, ns, peer_state;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003357 enum drbd_disk_state real_peer_disk;
Philipp Reisner65d922c2010-06-16 16:18:09 +02003358 enum chg_state_flags cs_flags;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003359 int rv;
3360
Philipp Reisnerb411b362009-09-25 16:07:19 -07003361 peer_state.i = be32_to_cpu(p->state);
3362
3363 real_peer_disk = peer_state.disk;
3364 if (peer_state.disk == D_NEGOTIATING) {
3365 real_peer_disk = mdev->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT;
3366 dev_info(DEV, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
3367 }
3368
Philipp Reisner87eeee42011-01-19 14:16:30 +01003369 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003370 retry:
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003371 os = ns = mdev->state;
Philipp Reisner87eeee42011-01-19 14:16:30 +01003372 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003373
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003374 /* peer says his disk is uptodate, while we think it is inconsistent,
3375 * and this happens while we think we have a sync going on. */
3376 if (os.pdsk == D_INCONSISTENT && real_peer_disk == D_UP_TO_DATE &&
3377 os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
3378 /* If we are (becoming) SyncSource, but peer is still in sync
3379 * preparation, ignore its uptodate-ness to avoid flapping, it
3380 * will change to inconsistent once the peer reaches active
3381 * syncing states.
3382 * It may have changed syncer-paused flags, however, so we
3383 * cannot ignore this completely. */
3384 if (peer_state.conn > C_CONNECTED &&
3385 peer_state.conn < C_SYNC_SOURCE)
3386 real_peer_disk = D_INCONSISTENT;
3387
3388 /* if peer_state changes to connected at the same time,
3389 * it explicitly notifies us that it finished resync.
3390 * Maybe we should finish it up, too? */
3391 else if (os.conn >= C_SYNC_SOURCE &&
3392 peer_state.conn == C_CONNECTED) {
3393 if (drbd_bm_total_weight(mdev) <= mdev->rs_failed)
3394 drbd_resync_finished(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003395 return true;
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003396 }
3397 }
3398
3399 /* peer says his disk is inconsistent, while we think it is uptodate,
3400 * and this happens while the peer still thinks we have a sync going on,
3401 * but we think we are already done with the sync.
3402 * We ignore this to avoid flapping pdsk.
3403 * This should not happen, if the peer is a recent version of drbd. */
3404 if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT &&
3405 os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE)
3406 real_peer_disk = D_UP_TO_DATE;
3407
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003408 if (ns.conn == C_WF_REPORT_PARAMS)
3409 ns.conn = C_CONNECTED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003410
Philipp Reisner67531712010-10-27 12:21:30 +02003411 if (peer_state.conn == C_AHEAD)
3412 ns.conn = C_BEHIND;
3413
Philipp Reisnerb411b362009-09-25 16:07:19 -07003414 if (mdev->p_uuid && peer_state.disk >= D_NEGOTIATING &&
3415 get_ldev_if_state(mdev, D_NEGOTIATING)) {
3416 int cr; /* consider resync */
3417
3418 /* if we established a new connection */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003419 cr = (os.conn < C_CONNECTED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003420 /* if we had an established connection
3421 * and one of the nodes newly attaches a disk */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003422 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003423 (peer_state.disk == D_NEGOTIATING ||
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003424 os.disk == D_NEGOTIATING));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003425 /* if we have both been inconsistent, and the peer has been
3426 * forced to be UpToDate with --overwrite-data */
3427 cr |= test_bit(CONSIDER_RESYNC, &mdev->flags);
3428 /* if we had been plain connected, and the admin requested to
3429 * start a sync by "invalidate" or "invalidate-remote" */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003430 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003431 (peer_state.conn >= C_STARTING_SYNC_S &&
3432 peer_state.conn <= C_WF_BITMAP_T));
3433
3434 if (cr)
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003435 ns.conn = drbd_sync_handshake(mdev, peer_state.role, real_peer_disk);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003436
3437 put_ldev(mdev);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003438 if (ns.conn == C_MASK) {
3439 ns.conn = C_CONNECTED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003440 if (mdev->state.disk == D_NEGOTIATING) {
Lars Ellenberg82f59cc2010-10-16 12:13:47 +02003441 drbd_force_state(mdev, NS(disk, D_FAILED));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003442 } else if (peer_state.disk == D_NEGOTIATING) {
3443 dev_err(DEV, "Disk attach process on the peer node was aborted.\n");
3444 peer_state.disk = D_DISKLESS;
Lars Ellenberg580b9762010-02-26 23:15:23 +01003445 real_peer_disk = D_DISKLESS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003446 } else {
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003447 if (test_and_clear_bit(CONN_DRY_RUN, &mdev->flags))
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003448 return false;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003449 D_ASSERT(os.conn == C_WF_REPORT_PARAMS);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003450 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003451 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003452 }
3453 }
3454 }
3455
Philipp Reisner87eeee42011-01-19 14:16:30 +01003456 spin_lock_irq(&mdev->tconn->req_lock);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003457 if (mdev->state.i != os.i)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003458 goto retry;
3459 clear_bit(CONSIDER_RESYNC, &mdev->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003460 ns.peer = peer_state.role;
3461 ns.pdsk = real_peer_disk;
3462 ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003463 if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003464 ns.disk = mdev->new_state_tmp.disk;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003465 cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
3466 if (ns.pdsk == D_CONSISTENT && is_susp(ns) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
Philipp Reisner481c6f52010-06-22 14:03:27 +02003467 test_bit(NEW_CUR_UUID, &mdev->flags)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01003468 /* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this
Philipp Reisner481c6f52010-06-22 14:03:27 +02003469 for temporal network outages! */
Philipp Reisner87eeee42011-01-19 14:16:30 +01003470 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisner481c6f52010-06-22 14:03:27 +02003471 dev_err(DEV, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
Philipp Reisner2f5cdd02011-02-21 14:29:27 +01003472 tl_clear(mdev->tconn);
Philipp Reisner481c6f52010-06-22 14:03:27 +02003473 drbd_uuid_new_current(mdev);
3474 clear_bit(NEW_CUR_UUID, &mdev->flags);
3475 drbd_force_state(mdev, NS2(conn, C_PROTOCOL_ERROR, susp, 0));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003476 return false;
Philipp Reisner481c6f52010-06-22 14:03:27 +02003477 }
Philipp Reisner65d922c2010-06-16 16:18:09 +02003478 rv = _drbd_set_state(mdev, ns, cs_flags, NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003479 ns = mdev->state;
Philipp Reisner87eeee42011-01-19 14:16:30 +01003480 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003481
3482 if (rv < SS_SUCCESS) {
3483 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003484 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003485 }
3486
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003487 if (os.conn > C_WF_REPORT_PARAMS) {
3488 if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003489 peer_state.disk != D_NEGOTIATING ) {
3490 /* we want resync, peer has not yet decided to sync... */
3491 /* Nowadays only used when forcing a node into primary role and
3492 setting its disk to UpToDate with that */
3493 drbd_send_uuids(mdev);
3494 drbd_send_state(mdev);
3495 }
3496 }
3497
Philipp Reisner89e58e72011-01-19 13:12:45 +01003498 mdev->tconn->net_conf->want_lose = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003499
3500 drbd_md_sync(mdev); /* update connected indicator, la_size, ... */
3501
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003502 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003503}
3504
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003505static int receive_sync_uuid(struct drbd_conf *mdev, enum drbd_packet cmd,
3506 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003507{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003508 struct p_rs_uuid *p = &mdev->tconn->data.rbuf.rs_uuid;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003509
3510 wait_event(mdev->misc_wait,
3511 mdev->state.conn == C_WF_SYNC_UUID ||
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02003512 mdev->state.conn == C_BEHIND ||
Philipp Reisnerb411b362009-09-25 16:07:19 -07003513 mdev->state.conn < C_CONNECTED ||
3514 mdev->state.disk < D_NEGOTIATING);
3515
3516 /* D_ASSERT( mdev->state.conn == C_WF_SYNC_UUID ); */
3517
Philipp Reisnerb411b362009-09-25 16:07:19 -07003518 /* Here the _drbd_uuid_ functions are right, current should
3519 _not_ be rotated into the history */
3520 if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
3521 _drbd_uuid_set(mdev, UI_CURRENT, be64_to_cpu(p->uuid));
3522 _drbd_uuid_set(mdev, UI_BITMAP, 0UL);
3523
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003524 drbd_print_uuids(mdev, "updated sync uuid");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003525 drbd_start_resync(mdev, C_SYNC_TARGET);
3526
3527 put_ldev(mdev);
3528 } else
3529 dev_err(DEV, "Ignoring SyncUUID packet!\n");
3530
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003531 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003532}
3533
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003534/**
3535 * receive_bitmap_plain
3536 *
3537 * Return 0 when done, 1 when another iteration is needed, and a negative error
3538 * code upon failure.
3539 */
3540static int
Philipp Reisner02918be2010-08-20 14:35:10 +02003541receive_bitmap_plain(struct drbd_conf *mdev, unsigned int data_size,
3542 unsigned long *buffer, struct bm_xfer_ctx *c)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003543{
3544 unsigned num_words = min_t(size_t, BM_PACKET_WORDS, c->bm_words - c->word_offset);
3545 unsigned want = num_words * sizeof(long);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003546 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003547
Philipp Reisner02918be2010-08-20 14:35:10 +02003548 if (want != data_size) {
3549 dev_err(DEV, "%s:want (%u) != data_size (%u)\n", __func__, want, data_size);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003550 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003551 }
3552 if (want == 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003553 return 0;
Philipp Reisnerde0ff332011-02-07 16:56:20 +01003554 err = drbd_recv(mdev->tconn, buffer, want);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003555 if (err != want) {
3556 if (err >= 0)
3557 err = -EIO;
3558 return err;
3559 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003560
3561 drbd_bm_merge_lel(mdev, c->word_offset, num_words, buffer);
3562
3563 c->word_offset += num_words;
3564 c->bit_offset = c->word_offset * BITS_PER_LONG;
3565 if (c->bit_offset > c->bm_bits)
3566 c->bit_offset = c->bm_bits;
3567
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003568 return 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003569}
3570
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003571/**
3572 * recv_bm_rle_bits
3573 *
3574 * Return 0 when done, 1 when another iteration is needed, and a negative error
3575 * code upon failure.
3576 */
3577static int
Philipp Reisnerb411b362009-09-25 16:07:19 -07003578recv_bm_rle_bits(struct drbd_conf *mdev,
3579 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003580 struct bm_xfer_ctx *c,
3581 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003582{
3583 struct bitstream bs;
3584 u64 look_ahead;
3585 u64 rl;
3586 u64 tmp;
3587 unsigned long s = c->bit_offset;
3588 unsigned long e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003589 int toggle = DCBP_get_start(p);
3590 int have;
3591 int bits;
3592
3593 bitstream_init(&bs, p->code, len, DCBP_get_pad_bits(p));
3594
3595 bits = bitstream_get_bits(&bs, &look_ahead, 64);
3596 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003597 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003598
3599 for (have = bits; have > 0; s += rl, toggle = !toggle) {
3600 bits = vli_decode_bits(&rl, look_ahead);
3601 if (bits <= 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003602 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003603
3604 if (toggle) {
3605 e = s + rl -1;
3606 if (e >= c->bm_bits) {
3607 dev_err(DEV, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003608 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003609 }
3610 _drbd_bm_set_bits(mdev, s, e);
3611 }
3612
3613 if (have < bits) {
3614 dev_err(DEV, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n",
3615 have, bits, look_ahead,
3616 (unsigned int)(bs.cur.b - p->code),
3617 (unsigned int)bs.buf_len);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003618 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003619 }
3620 look_ahead >>= bits;
3621 have -= bits;
3622
3623 bits = bitstream_get_bits(&bs, &tmp, 64 - have);
3624 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003625 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003626 look_ahead |= tmp << have;
3627 have += bits;
3628 }
3629
3630 c->bit_offset = s;
3631 bm_xfer_ctx_bit_to_word_offset(c);
3632
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003633 return (s != c->bm_bits);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003634}
3635
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003636/**
3637 * decode_bitmap_c
3638 *
3639 * Return 0 when done, 1 when another iteration is needed, and a negative error
3640 * code upon failure.
3641 */
3642static int
Philipp Reisnerb411b362009-09-25 16:07:19 -07003643decode_bitmap_c(struct drbd_conf *mdev,
3644 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003645 struct bm_xfer_ctx *c,
3646 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003647{
3648 if (DCBP_get_code(p) == RLE_VLI_Bits)
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003649 return recv_bm_rle_bits(mdev, p, c, len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003650
3651 /* other variants had been implemented for evaluation,
3652 * but have been dropped as this one turned out to be "best"
3653 * during all our tests. */
3654
3655 dev_err(DEV, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
3656 drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR));
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003657 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003658}
3659
3660void INFO_bm_xfer_stats(struct drbd_conf *mdev,
3661 const char *direction, struct bm_xfer_ctx *c)
3662{
3663 /* what would it take to transfer it "plaintext" */
Philipp Reisnerc0129492011-01-19 16:58:16 +01003664 unsigned plain = sizeof(struct p_header) *
Philipp Reisnerb411b362009-09-25 16:07:19 -07003665 ((c->bm_words+BM_PACKET_WORDS-1)/BM_PACKET_WORDS+1)
3666 + c->bm_words * sizeof(long);
3667 unsigned total = c->bytes[0] + c->bytes[1];
3668 unsigned r;
3669
3670 /* total can not be zero. but just in case: */
3671 if (total == 0)
3672 return;
3673
3674 /* don't report if not compressed */
3675 if (total >= plain)
3676 return;
3677
3678 /* total < plain. check for overflow, still */
3679 r = (total > UINT_MAX/1000) ? (total / (plain/1000))
3680 : (1000 * total / plain);
3681
3682 if (r > 1000)
3683 r = 1000;
3684
3685 r = 1000 - r;
3686 dev_info(DEV, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
3687 "total %u; compression: %u.%u%%\n",
3688 direction,
3689 c->bytes[1], c->packets[1],
3690 c->bytes[0], c->packets[0],
3691 total, r/10, r % 10);
3692}
3693
3694/* Since we are processing the bitfield from lower addresses to higher,
3695 it does not matter if the process it in 32 bit chunks or 64 bit
3696 chunks as long as it is little endian. (Understand it as byte stream,
3697 beginning with the lowest byte...) If we would use big endian
3698 we would need to process it from the highest address to the lowest,
3699 in order to be agnostic to the 32 vs 64 bits issue.
3700
3701 returns 0 on failure, 1 if we successfully received it. */
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003702static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packet cmd,
3703 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003704{
3705 struct bm_xfer_ctx c;
3706 void *buffer;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003707 int err;
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003708 int ok = false;
Philipp Reisner257d0af2011-01-26 12:15:29 +01003709 struct p_header *h = &mdev->tconn->data.rbuf.header;
Philipp Reisner77351055b2011-02-07 17:24:26 +01003710 struct packet_info pi;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003711
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003712 drbd_bm_lock(mdev, "receive bitmap", BM_LOCKED_SET_ALLOWED);
3713 /* you are supposed to send additional out-of-sync information
3714 * if you actually set bits during this phase */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003715
3716 /* maybe we should use some per thread scratch page,
3717 * and allocate that during initial device creation? */
3718 buffer = (unsigned long *) __get_free_page(GFP_NOIO);
3719 if (!buffer) {
3720 dev_err(DEV, "failed to allocate one page buffer in %s\n", __func__);
3721 goto out;
3722 }
3723
3724 c = (struct bm_xfer_ctx) {
3725 .bm_bits = drbd_bm_bits(mdev),
3726 .bm_words = drbd_bm_words(mdev),
3727 };
3728
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003729 for(;;) {
Philipp Reisner02918be2010-08-20 14:35:10 +02003730 if (cmd == P_BITMAP) {
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003731 err = receive_bitmap_plain(mdev, data_size, buffer, &c);
Philipp Reisner02918be2010-08-20 14:35:10 +02003732 } else if (cmd == P_COMPRESSED_BITMAP) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003733 /* MAYBE: sanity check that we speak proto >= 90,
3734 * and the feature is enabled! */
3735 struct p_compressed_bm *p;
3736
Philipp Reisner02918be2010-08-20 14:35:10 +02003737 if (data_size > BM_PACKET_PAYLOAD_BYTES) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003738 dev_err(DEV, "ReportCBitmap packet too large\n");
3739 goto out;
3740 }
3741 /* use the page buff */
3742 p = buffer;
3743 memcpy(p, h, sizeof(*h));
Philipp Reisnerde0ff332011-02-07 16:56:20 +01003744 if (drbd_recv(mdev->tconn, p->head.payload, data_size) != data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003745 goto out;
Lars Ellenberg004352f2010-10-05 20:13:58 +02003746 if (data_size <= (sizeof(*p) - sizeof(p->head))) {
3747 dev_err(DEV, "ReportCBitmap packet too small (l:%u)\n", data_size);
Andreas Gruenbacher78fcbda2010-12-10 22:18:27 +01003748 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003749 }
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003750 err = decode_bitmap_c(mdev, p, &c, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003751 } else {
Philipp Reisner02918be2010-08-20 14:35:10 +02003752 dev_warn(DEV, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003753 goto out;
3754 }
3755
Philipp Reisner02918be2010-08-20 14:35:10 +02003756 c.packets[cmd == P_BITMAP]++;
Philipp Reisner257d0af2011-01-26 12:15:29 +01003757 c.bytes[cmd == P_BITMAP] += sizeof(struct p_header) + data_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003758
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003759 if (err <= 0) {
3760 if (err < 0)
3761 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003762 break;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003763 }
Philipp Reisner9ba7aa02011-02-07 17:32:41 +01003764 if (!drbd_recv_header(mdev->tconn, &pi))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003765 goto out;
Philipp Reisner77351055b2011-02-07 17:24:26 +01003766 cmd = pi.cmd;
3767 data_size = pi.size;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003768 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003769
3770 INFO_bm_xfer_stats(mdev, "receive", &c);
3771
3772 if (mdev->state.conn == C_WF_BITMAP_T) {
Andreas Gruenbacherde1f8e42010-12-10 21:04:00 +01003773 enum drbd_state_rv rv;
3774
Philipp Reisnerb411b362009-09-25 16:07:19 -07003775 ok = !drbd_send_bitmap(mdev);
3776 if (!ok)
3777 goto out;
3778 /* Omit CS_ORDERED with this state transition to avoid deadlocks. */
Andreas Gruenbacherde1f8e42010-12-10 21:04:00 +01003779 rv = _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
3780 D_ASSERT(rv == SS_SUCCESS);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003781 } else if (mdev->state.conn != C_WF_BITMAP_S) {
3782 /* admin may have requested C_DISCONNECTING,
3783 * other threads may have noticed network errors */
3784 dev_info(DEV, "unexpected cstate (%s) in receive_bitmap\n",
3785 drbd_conn_str(mdev->state.conn));
3786 }
3787
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003788 ok = true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003789 out:
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003790 drbd_bm_unlock(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003791 if (ok && mdev->state.conn == C_WF_BITMAP_S)
3792 drbd_start_resync(mdev, C_SYNC_SOURCE);
3793 free_page((unsigned long) buffer);
3794 return ok;
3795}
3796
Philipp Reisner2de876e2011-03-15 14:38:01 +01003797static int _tconn_receive_skip(struct drbd_tconn *tconn, unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003798{
3799 /* TODO zero copy sink :) */
3800 static char sink[128];
3801 int size, want, r;
3802
Philipp Reisner02918be2010-08-20 14:35:10 +02003803 size = data_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003804 while (size > 0) {
3805 want = min_t(int, size, sizeof(sink));
Philipp Reisner2de876e2011-03-15 14:38:01 +01003806 r = drbd_recv(tconn, sink, want);
3807 if (r <= 0)
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01003808 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003809 size -= r;
3810 }
3811 return size == 0;
3812}
3813
Philipp Reisner2de876e2011-03-15 14:38:01 +01003814static int receive_skip(struct drbd_conf *mdev, enum drbd_packet cmd,
3815 unsigned int data_size)
3816{
3817 dev_warn(DEV, "skipping unknown optional packet type %d, l: %d!\n",
3818 cmd, data_size);
3819
3820 return _tconn_receive_skip(mdev->tconn, data_size);
3821}
3822
3823static int tconn_receive_skip(struct drbd_tconn *tconn, enum drbd_packet cmd, unsigned int data_size)
3824{
3825 conn_warn(tconn, "skipping packet for non existing volume type %d, l: %d!\n",
3826 cmd, data_size);
3827
3828 return _tconn_receive_skip(tconn, data_size);
3829}
3830
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003831static int receive_UnplugRemote(struct drbd_conf *mdev, enum drbd_packet cmd,
3832 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003833{
Philipp Reisnerb411b362009-09-25 16:07:19 -07003834 /* Make sure we've acked all the TCP data associated
3835 * with the data requests being unplugged */
Philipp Reisnere42325a2011-01-19 13:55:45 +01003836 drbd_tcp_quickack(mdev->tconn->data.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003837
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003838 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003839}
3840
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003841static int receive_out_of_sync(struct drbd_conf *mdev, enum drbd_packet cmd,
3842 unsigned int data_size)
Philipp Reisner73a01a12010-10-27 14:33:00 +02003843{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003844 struct p_block_desc *p = &mdev->tconn->data.rbuf.block_desc;
Philipp Reisner73a01a12010-10-27 14:33:00 +02003845
Lars Ellenbergf735e3632010-12-17 21:06:18 +01003846 switch (mdev->state.conn) {
3847 case C_WF_SYNC_UUID:
3848 case C_WF_BITMAP_T:
3849 case C_BEHIND:
3850 break;
3851 default:
3852 dev_err(DEV, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n",
3853 drbd_conn_str(mdev->state.conn));
3854 }
3855
Philipp Reisner73a01a12010-10-27 14:33:00 +02003856 drbd_set_out_of_sync(mdev, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
3857
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003858 return true;
Philipp Reisner73a01a12010-10-27 14:33:00 +02003859}
3860
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003861typedef int (*drbd_cmd_handler_f)(struct drbd_conf *, enum drbd_packet cmd,
3862 unsigned int to_receive);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003863
Philipp Reisner02918be2010-08-20 14:35:10 +02003864struct data_cmd {
3865 int expect_payload;
3866 size_t pkt_size;
3867 drbd_cmd_handler_f function;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003868};
3869
Philipp Reisner02918be2010-08-20 14:35:10 +02003870static struct data_cmd drbd_cmd_handler[] = {
3871 [P_DATA] = { 1, sizeof(struct p_data), receive_Data },
3872 [P_DATA_REPLY] = { 1, sizeof(struct p_data), receive_DataReply },
3873 [P_RS_DATA_REPLY] = { 1, sizeof(struct p_data), receive_RSDataReply } ,
3874 [P_BARRIER] = { 0, sizeof(struct p_barrier), receive_Barrier } ,
Philipp Reisner257d0af2011-01-26 12:15:29 +01003875 [P_BITMAP] = { 1, sizeof(struct p_header), receive_bitmap } ,
3876 [P_COMPRESSED_BITMAP] = { 1, sizeof(struct p_header), receive_bitmap } ,
3877 [P_UNPLUG_REMOTE] = { 0, sizeof(struct p_header), receive_UnplugRemote },
Philipp Reisner02918be2010-08-20 14:35:10 +02003878 [P_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
3879 [P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
Philipp Reisner257d0af2011-01-26 12:15:29 +01003880 [P_SYNC_PARAM] = { 1, sizeof(struct p_header), receive_SyncParam },
3881 [P_SYNC_PARAM89] = { 1, sizeof(struct p_header), receive_SyncParam },
Philipp Reisner02918be2010-08-20 14:35:10 +02003882 [P_PROTOCOL] = { 1, sizeof(struct p_protocol), receive_protocol },
3883 [P_UUIDS] = { 0, sizeof(struct p_uuids), receive_uuids },
3884 [P_SIZES] = { 0, sizeof(struct p_sizes), receive_sizes },
3885 [P_STATE] = { 0, sizeof(struct p_state), receive_state },
3886 [P_STATE_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_state },
3887 [P_SYNC_UUID] = { 0, sizeof(struct p_rs_uuid), receive_sync_uuid },
3888 [P_OV_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
3889 [P_OV_REPLY] = { 1, sizeof(struct p_block_req), receive_DataRequest },
3890 [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest },
3891 [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip },
Philipp Reisner73a01a12010-10-27 14:33:00 +02003892 [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
Philipp Reisner047cd4a2011-02-15 11:09:33 +01003893 [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_state },
Philipp Reisner02918be2010-08-20 14:35:10 +02003894};
3895
3896/* All handler functions that expect a sub-header get that sub-heder in
Philipp Reisnere42325a2011-01-19 13:55:45 +01003897 mdev->tconn->data.rbuf.header.head.payload.
Philipp Reisner02918be2010-08-20 14:35:10 +02003898
Philipp Reisnere42325a2011-01-19 13:55:45 +01003899 Usually in mdev->tconn->data.rbuf.header.head the callback can find the usual
Philipp Reisner02918be2010-08-20 14:35:10 +02003900 p_header, but they may not rely on that. Since there is also p_header95 !
3901 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003902
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003903static void drbdd(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003904{
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003905 struct p_header *header = &tconn->data.rbuf.header;
Philipp Reisner2de876e2011-03-15 14:38:01 +01003906 struct drbd_conf *mdev;
Philipp Reisner77351055b2011-02-07 17:24:26 +01003907 struct packet_info pi;
Philipp Reisner02918be2010-08-20 14:35:10 +02003908 size_t shs; /* sub header size */
3909 int rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003910
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003911 while (get_t_state(&tconn->receiver) == RUNNING) {
3912 drbd_thread_current_set_cpu(&tconn->receiver);
3913 if (!drbd_recv_header(tconn, &pi))
Philipp Reisner02918be2010-08-20 14:35:10 +02003914 goto err_out;
3915
Andreas Gruenbacher6e849ce2011-03-14 17:27:45 +01003916 if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) ||
3917 !drbd_cmd_handler[pi.cmd].function)) {
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003918 conn_err(tconn, "unknown packet type %d, l: %d!\n", pi.cmd, pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02003919 goto err_out;
Lars Ellenberg0b33a912009-11-16 15:58:04 +01003920 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003921
Philipp Reisner77351055b2011-02-07 17:24:26 +01003922 shs = drbd_cmd_handler[pi.cmd].pkt_size - sizeof(struct p_header);
3923 if (pi.size - shs > 0 && !drbd_cmd_handler[pi.cmd].expect_payload) {
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003924 conn_err(tconn, "No payload expected %s l:%d\n", cmdname(pi.cmd), pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02003925 goto err_out;
3926 }
3927
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02003928 if (shs) {
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003929 rv = drbd_recv(tconn, &header->payload, shs);
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02003930 if (unlikely(rv != shs)) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01003931 if (!signal_pending(current))
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003932 conn_warn(tconn, "short read while reading sub header: rv=%d\n", rv);
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02003933 goto err_out;
3934 }
3935 }
3936
Philipp Reisner2de876e2011-03-15 14:38:01 +01003937 mdev = vnr_to_mdev(tconn, pi.vnr);
3938 rv = mdev ?
3939 drbd_cmd_handler[pi.cmd].function(mdev, pi.cmd, pi.size - shs) :
3940 tconn_receive_skip(tconn, pi.cmd, pi.size - shs);
Philipp Reisner02918be2010-08-20 14:35:10 +02003941
3942 if (unlikely(!rv)) {
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003943 conn_err(tconn, "error receiving %s, l: %d!\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01003944 cmdname(pi.cmd), pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02003945 goto err_out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003946 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003947 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003948
Philipp Reisner02918be2010-08-20 14:35:10 +02003949 if (0) {
3950 err_out:
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01003951 conn_request_state(tconn, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003952 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003953}
3954
Philipp Reisner0e29d162011-02-18 14:23:11 +01003955void conn_flush_workqueue(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003956{
3957 struct drbd_wq_barrier barr;
3958
3959 barr.w.cb = w_prev_work_done;
Philipp Reisner0e29d162011-02-18 14:23:11 +01003960 barr.w.tconn = tconn;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003961 init_completion(&barr.done);
Philipp Reisner0e29d162011-02-18 14:23:11 +01003962 drbd_queue_work(&tconn->data.work, &barr.w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003963 wait_for_completion(&barr.done);
3964}
3965
Philipp Reisner360cc742011-02-08 14:29:53 +01003966static void drbd_disconnect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003967{
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01003968 enum drbd_conns oc;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003969 int rv = SS_UNKNOWN_ERROR;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003970
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01003971 if (tconn->cstate == C_STANDALONE)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003972 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003973
3974 /* asender does not clean up anything. it must not interfere, either */
Philipp Reisner360cc742011-02-08 14:29:53 +01003975 drbd_thread_stop(&tconn->asender);
3976 drbd_free_sock(tconn);
3977
3978 idr_for_each(&tconn->volumes, drbd_disconnected, tconn);
3979
3980 conn_info(tconn, "Connection closed\n");
3981
3982 spin_lock_irq(&tconn->req_lock);
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01003983 oc = tconn->cstate;
3984 if (oc >= C_UNCONNECTED)
3985 rv = _conn_request_state(tconn, NS(conn, C_UNCONNECTED), CS_VERBOSE);
3986
Philipp Reisner360cc742011-02-08 14:29:53 +01003987 spin_unlock_irq(&tconn->req_lock);
3988
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01003989 if (oc == C_DISCONNECTING) {
Philipp Reisner360cc742011-02-08 14:29:53 +01003990 wait_event(tconn->net_cnt_wait, atomic_read(&tconn->net_cnt) == 0);
3991
3992 crypto_free_hash(tconn->cram_hmac_tfm);
3993 tconn->cram_hmac_tfm = NULL;
3994
3995 kfree(tconn->net_conf);
3996 tconn->net_conf = NULL;
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01003997 conn_request_state(tconn, NS(conn, C_STANDALONE), CS_VERBOSE);
Philipp Reisner360cc742011-02-08 14:29:53 +01003998 }
3999}
4000
4001static int drbd_disconnected(int vnr, void *p, void *data)
4002{
4003 struct drbd_conf *mdev = (struct drbd_conf *)p;
4004 enum drbd_fencing_p fp;
4005 unsigned int i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004006
Philipp Reisner85719572010-07-21 10:20:17 +02004007 /* wait for current activity to cease. */
Philipp Reisner87eeee42011-01-19 14:16:30 +01004008 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004009 _drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
4010 _drbd_wait_ee_list_empty(mdev, &mdev->sync_ee);
4011 _drbd_wait_ee_list_empty(mdev, &mdev->read_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01004012 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004013
4014 /* We do not have data structures that would allow us to
4015 * get the rs_pending_cnt down to 0 again.
4016 * * On C_SYNC_TARGET we do not have any data structures describing
4017 * the pending RSDataRequest's we have sent.
4018 * * On C_SYNC_SOURCE there is no data structure that tracks
4019 * the P_RS_DATA_REPLY blocks that we sent to the SyncTarget.
4020 * And no, it is not the sum of the reference counts in the
4021 * resync_LRU. The resync_LRU tracks the whole operation including
4022 * the disk-IO, while the rs_pending_cnt only tracks the blocks
4023 * on the fly. */
4024 drbd_rs_cancel_all(mdev);
4025 mdev->rs_total = 0;
4026 mdev->rs_failed = 0;
4027 atomic_set(&mdev->rs_pending_cnt, 0);
4028 wake_up(&mdev->misc_wait);
4029
Philipp Reisner7fde2be2011-03-01 11:08:28 +01004030 del_timer(&mdev->request_timer);
4031
Philipp Reisnerb411b362009-09-25 16:07:19 -07004032 del_timer_sync(&mdev->resync_timer);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004033 resync_timer_fn((unsigned long)mdev);
4034
Philipp Reisnerb411b362009-09-25 16:07:19 -07004035 /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
4036 * w_make_resync_request etc. which may still be on the worker queue
4037 * to be "canceled" */
Philipp Reisnera21e9292011-02-08 15:08:49 +01004038 drbd_flush_workqueue(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004039
4040 /* This also does reclaim_net_ee(). If we do this too early, we might
4041 * miss some resync ee and pages.*/
4042 drbd_process_done_ee(mdev);
4043
4044 kfree(mdev->p_uuid);
4045 mdev->p_uuid = NULL;
4046
Philipp Reisnerfb22c402010-09-08 23:20:21 +02004047 if (!is_susp(mdev->state))
Philipp Reisner2f5cdd02011-02-21 14:29:27 +01004048 tl_clear(mdev->tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004049
Philipp Reisnerb411b362009-09-25 16:07:19 -07004050 drbd_md_sync(mdev);
4051
4052 fp = FP_DONT_CARE;
4053 if (get_ldev(mdev)) {
4054 fp = mdev->ldev->dc.fencing;
4055 put_ldev(mdev);
4056 }
4057
Philipp Reisner87f7be42010-06-11 13:56:33 +02004058 if (mdev->state.role == R_PRIMARY && fp >= FP_RESOURCE && mdev->state.pdsk >= D_UNKNOWN)
4059 drbd_try_outdate_peer_async(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004060
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004061 /* serialize with bitmap writeout triggered by the state change,
4062 * if any. */
4063 wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
4064
Philipp Reisnerb411b362009-09-25 16:07:19 -07004065 /* tcp_close and release of sendpage pages can be deferred. I don't
4066 * want to use SO_LINGER, because apparently it can be deferred for
4067 * more than 20 seconds (longest time I checked).
4068 *
4069 * Actually we don't care for exactly when the network stack does its
4070 * put_page(), but release our reference on these pages right here.
4071 */
4072 i = drbd_release_ee(mdev, &mdev->net_ee);
4073 if (i)
4074 dev_info(DEV, "net_ee not empty, killed %u entries\n", i);
Lars Ellenberg435f0742010-09-06 12:30:25 +02004075 i = atomic_read(&mdev->pp_in_use_by_net);
4076 if (i)
4077 dev_info(DEV, "pp_in_use_by_net = %d, expected 0\n", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004078 i = atomic_read(&mdev->pp_in_use);
4079 if (i)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02004080 dev_info(DEV, "pp_in_use = %d, expected 0\n", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004081
4082 D_ASSERT(list_empty(&mdev->read_ee));
4083 D_ASSERT(list_empty(&mdev->active_ee));
4084 D_ASSERT(list_empty(&mdev->sync_ee));
4085 D_ASSERT(list_empty(&mdev->done_ee));
4086
4087 /* ok, no more ee's on the fly, it is safe to reset the epoch_size */
4088 atomic_set(&mdev->current_epoch->epoch_size, 0);
4089 D_ASSERT(list_empty(&mdev->current_epoch->list));
Philipp Reisner360cc742011-02-08 14:29:53 +01004090
4091 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004092}
4093
4094/*
4095 * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version
4096 * we can agree on is stored in agreed_pro_version.
4097 *
4098 * feature flags and the reserved array should be enough room for future
4099 * enhancements of the handshake protocol, and possible plugins...
4100 *
4101 * for now, they are expected to be zero, but ignored.
4102 */
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01004103static int drbd_send_handshake(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004104{
Philipp Reisnere6b3ea82011-01-19 14:02:01 +01004105 /* ASSERT current == mdev->tconn->receiver ... */
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01004106 struct p_handshake *p = &tconn->data.sbuf.handshake;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004107 int ok;
4108
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01004109 if (mutex_lock_interruptible(&tconn->data.mutex)) {
4110 conn_err(tconn, "interrupted during initial handshake\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004111 return 0; /* interrupted. not ok. */
4112 }
4113
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01004114 if (tconn->data.socket == NULL) {
4115 mutex_unlock(&tconn->data.mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004116 return 0;
4117 }
4118
4119 memset(p, 0, sizeof(*p));
4120 p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
4121 p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01004122 ok = _conn_send_cmd(tconn, 0, tconn->data.socket, P_HAND_SHAKE,
4123 &p->head, sizeof(*p), 0);
4124 mutex_unlock(&tconn->data.mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004125 return ok;
4126}
4127
4128/*
4129 * return values:
4130 * 1 yes, we have a valid connection
4131 * 0 oops, did not work out, please try again
4132 * -1 peer talks different language,
4133 * no point in trying again, please go standalone.
4134 */
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004135static int drbd_do_handshake(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004136{
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004137 /* ASSERT current == tconn->receiver ... */
4138 struct p_handshake *p = &tconn->data.rbuf.handshake;
Philipp Reisner02918be2010-08-20 14:35:10 +02004139 const int expect = sizeof(struct p_handshake) - sizeof(struct p_header80);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004140 struct packet_info pi;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004141 int rv;
4142
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004143 rv = drbd_send_handshake(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004144 if (!rv)
4145 return 0;
4146
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004147 rv = drbd_recv_header(tconn, &pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004148 if (!rv)
4149 return 0;
4150
Philipp Reisner77351055b2011-02-07 17:24:26 +01004151 if (pi.cmd != P_HAND_SHAKE) {
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004152 conn_err(tconn, "expected HandShake packet, received: %s (0x%04x)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004153 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004154 return -1;
4155 }
4156
Philipp Reisner77351055b2011-02-07 17:24:26 +01004157 if (pi.size != expect) {
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004158 conn_err(tconn, "expected HandShake length: %u, received: %u\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004159 expect, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004160 return -1;
4161 }
4162
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004163 rv = drbd_recv(tconn, &p->head.payload, expect);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004164
4165 if (rv != expect) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01004166 if (!signal_pending(current))
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004167 conn_warn(tconn, "short read receiving handshake packet: l=%u\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004168 return 0;
4169 }
4170
Philipp Reisnerb411b362009-09-25 16:07:19 -07004171 p->protocol_min = be32_to_cpu(p->protocol_min);
4172 p->protocol_max = be32_to_cpu(p->protocol_max);
4173 if (p->protocol_max == 0)
4174 p->protocol_max = p->protocol_min;
4175
4176 if (PRO_VERSION_MAX < p->protocol_min ||
4177 PRO_VERSION_MIN > p->protocol_max)
4178 goto incompat;
4179
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004180 tconn->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004181
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004182 conn_info(tconn, "Handshake successful: "
4183 "Agreed network protocol version %d\n", tconn->agreed_pro_version);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004184
4185 return 1;
4186
4187 incompat:
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004188 conn_err(tconn, "incompatible DRBD dialects: "
Philipp Reisnerb411b362009-09-25 16:07:19 -07004189 "I support %d-%d, peer supports %d-%d\n",
4190 PRO_VERSION_MIN, PRO_VERSION_MAX,
4191 p->protocol_min, p->protocol_max);
4192 return -1;
4193}
4194
4195#if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
Philipp Reisner13e60372011-02-08 09:54:40 +01004196static int drbd_do_auth(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004197{
4198 dev_err(DEV, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
4199 dev_err(DEV, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004200 return -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004201}
4202#else
4203#define CHALLENGE_LEN 64
Johannes Thomab10d96c2010-01-07 16:02:50 +01004204
4205/* Return value:
4206 1 - auth succeeded,
4207 0 - failed, try again (network error),
4208 -1 - auth failed, don't try again.
4209*/
4210
Philipp Reisner13e60372011-02-08 09:54:40 +01004211static int drbd_do_auth(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004212{
4213 char my_challenge[CHALLENGE_LEN]; /* 64 Bytes... */
4214 struct scatterlist sg;
4215 char *response = NULL;
4216 char *right_response = NULL;
4217 char *peers_ch = NULL;
Philipp Reisner13e60372011-02-08 09:54:40 +01004218 unsigned int key_len = strlen(tconn->net_conf->shared_secret);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004219 unsigned int resp_size;
4220 struct hash_desc desc;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004221 struct packet_info pi;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004222 int rv;
4223
Philipp Reisner13e60372011-02-08 09:54:40 +01004224 desc.tfm = tconn->cram_hmac_tfm;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004225 desc.flags = 0;
4226
Philipp Reisner13e60372011-02-08 09:54:40 +01004227 rv = crypto_hash_setkey(tconn->cram_hmac_tfm,
4228 (u8 *)tconn->net_conf->shared_secret, key_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004229 if (rv) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004230 conn_err(tconn, "crypto_hash_setkey() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004231 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004232 goto fail;
4233 }
4234
4235 get_random_bytes(my_challenge, CHALLENGE_LEN);
4236
Philipp Reisner13e60372011-02-08 09:54:40 +01004237 rv = conn_send_cmd2(tconn, P_AUTH_CHALLENGE, my_challenge, CHALLENGE_LEN);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004238 if (!rv)
4239 goto fail;
4240
Philipp Reisner13e60372011-02-08 09:54:40 +01004241 rv = drbd_recv_header(tconn, &pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004242 if (!rv)
4243 goto fail;
4244
Philipp Reisner77351055b2011-02-07 17:24:26 +01004245 if (pi.cmd != P_AUTH_CHALLENGE) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004246 conn_err(tconn, "expected AuthChallenge packet, received: %s (0x%04x)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004247 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004248 rv = 0;
4249 goto fail;
4250 }
4251
Philipp Reisner77351055b2011-02-07 17:24:26 +01004252 if (pi.size > CHALLENGE_LEN * 2) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004253 conn_err(tconn, "expected AuthChallenge payload too big.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004254 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004255 goto fail;
4256 }
4257
Philipp Reisner77351055b2011-02-07 17:24:26 +01004258 peers_ch = kmalloc(pi.size, GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004259 if (peers_ch == NULL) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004260 conn_err(tconn, "kmalloc of peers_ch failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004261 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004262 goto fail;
4263 }
4264
Philipp Reisner13e60372011-02-08 09:54:40 +01004265 rv = drbd_recv(tconn, peers_ch, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004266
Philipp Reisner77351055b2011-02-07 17:24:26 +01004267 if (rv != pi.size) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01004268 if (!signal_pending(current))
Philipp Reisner13e60372011-02-08 09:54:40 +01004269 conn_warn(tconn, "short read AuthChallenge: l=%u\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004270 rv = 0;
4271 goto fail;
4272 }
4273
Philipp Reisner13e60372011-02-08 09:54:40 +01004274 resp_size = crypto_hash_digestsize(tconn->cram_hmac_tfm);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004275 response = kmalloc(resp_size, GFP_NOIO);
4276 if (response == NULL) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004277 conn_err(tconn, "kmalloc of response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004278 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004279 goto fail;
4280 }
4281
4282 sg_init_table(&sg, 1);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004283 sg_set_buf(&sg, peers_ch, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004284
4285 rv = crypto_hash_digest(&desc, &sg, sg.length, response);
4286 if (rv) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004287 conn_err(tconn, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004288 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004289 goto fail;
4290 }
4291
Philipp Reisner13e60372011-02-08 09:54:40 +01004292 rv = conn_send_cmd2(tconn, P_AUTH_RESPONSE, response, resp_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004293 if (!rv)
4294 goto fail;
4295
Philipp Reisner13e60372011-02-08 09:54:40 +01004296 rv = drbd_recv_header(tconn, &pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004297 if (!rv)
4298 goto fail;
4299
Philipp Reisner77351055b2011-02-07 17:24:26 +01004300 if (pi.cmd != P_AUTH_RESPONSE) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004301 conn_err(tconn, "expected AuthResponse packet, received: %s (0x%04x)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004302 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004303 rv = 0;
4304 goto fail;
4305 }
4306
Philipp Reisner77351055b2011-02-07 17:24:26 +01004307 if (pi.size != resp_size) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004308 conn_err(tconn, "expected AuthResponse payload of wrong size\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004309 rv = 0;
4310 goto fail;
4311 }
4312
Philipp Reisner13e60372011-02-08 09:54:40 +01004313 rv = drbd_recv(tconn, response , resp_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004314
4315 if (rv != resp_size) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01004316 if (!signal_pending(current))
Philipp Reisner13e60372011-02-08 09:54:40 +01004317 conn_warn(tconn, "short read receiving AuthResponse: l=%u\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004318 rv = 0;
4319 goto fail;
4320 }
4321
4322 right_response = kmalloc(resp_size, GFP_NOIO);
Julia Lawall2d1ee872009-12-27 22:27:11 +01004323 if (right_response == NULL) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004324 conn_err(tconn, "kmalloc of right_response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004325 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004326 goto fail;
4327 }
4328
4329 sg_set_buf(&sg, my_challenge, CHALLENGE_LEN);
4330
4331 rv = crypto_hash_digest(&desc, &sg, sg.length, right_response);
4332 if (rv) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004333 conn_err(tconn, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004334 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004335 goto fail;
4336 }
4337
4338 rv = !memcmp(response, right_response, resp_size);
4339
4340 if (rv)
Philipp Reisner13e60372011-02-08 09:54:40 +01004341 conn_info(tconn, "Peer authenticated using %d bytes of '%s' HMAC\n",
4342 resp_size, tconn->net_conf->cram_hmac_alg);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004343 else
4344 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004345
4346 fail:
4347 kfree(peers_ch);
4348 kfree(response);
4349 kfree(right_response);
4350
4351 return rv;
4352}
4353#endif
4354
4355int drbdd_init(struct drbd_thread *thi)
4356{
Philipp Reisner392c8802011-02-09 10:33:31 +01004357 struct drbd_tconn *tconn = thi->tconn;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004358 int h;
4359
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004360 conn_info(tconn, "receiver (re)started\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004361
4362 do {
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004363 h = drbd_connect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004364 if (h == 0) {
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004365 drbd_disconnect(tconn);
Philipp Reisner20ee6392011-01-18 15:28:59 +01004366 schedule_timeout_interruptible(HZ);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004367 }
4368 if (h == -1) {
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004369 conn_warn(tconn, "Discarding network configuration.\n");
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004370 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004371 }
4372 } while (h == 0);
4373
4374 if (h > 0) {
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004375 if (get_net_conf(tconn)) {
4376 drbdd(tconn);
4377 put_net_conf(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004378 }
4379 }
4380
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004381 drbd_disconnect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004382
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004383 conn_info(tconn, "receiver terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004384 return 0;
4385}
4386
4387/* ********* acknowledge sender ******** */
4388
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004389static int got_RqSReply(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004390{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004391 struct p_req_state_reply *p = &mdev->tconn->meta.rbuf.req_state_reply;
Philipp Reisnerfc3b10a2011-02-15 11:07:59 +01004392 struct drbd_tconn *tconn = mdev->tconn;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004393
4394 int retcode = be32_to_cpu(p->retcode);
4395
Philipp Reisnerfc3b10a2011-02-15 11:07:59 +01004396 if (cmd == P_STATE_CHG_REPLY) {
4397 if (retcode >= SS_SUCCESS) {
4398 set_bit(CL_ST_CHG_SUCCESS, &mdev->flags);
4399 } else {
4400 set_bit(CL_ST_CHG_FAIL, &mdev->flags);
4401 dev_err(DEV, "Requested state change failed by peer: %s (%d)\n",
4402 drbd_set_st_err_str(retcode), retcode);
4403 }
4404 wake_up(&mdev->state_wait);
4405 } else /* conn == P_CONN_ST_CHG_REPLY */ {
4406 if (retcode >= SS_SUCCESS) {
4407 set_bit(CONN_WD_ST_CHG_OKAY, &tconn->flags);
4408 } else {
4409 set_bit(CONN_WD_ST_CHG_FAIL, &tconn->flags);
4410 conn_err(tconn, "Requested state change failed by peer: %s (%d)\n",
4411 drbd_set_st_err_str(retcode), retcode);
4412 }
4413 wake_up(&tconn->ping_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004414 }
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004415 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004416}
4417
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004418static int got_Ping(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004419{
Philipp Reisner2a67d8b2011-02-09 14:10:32 +01004420 return drbd_send_ping_ack(mdev->tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004421
4422}
4423
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004424static int got_PingAck(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004425{
Philipp Reisner2a67d8b2011-02-09 14:10:32 +01004426 struct drbd_tconn *tconn = mdev->tconn;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004427 /* restore idle timeout */
Philipp Reisner2a67d8b2011-02-09 14:10:32 +01004428 tconn->meta.socket->sk->sk_rcvtimeo = tconn->net_conf->ping_int*HZ;
4429 if (!test_and_set_bit(GOT_PING_ACK, &tconn->flags))
4430 wake_up(&tconn->ping_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004431
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004432 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004433}
4434
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004435static int got_IsInSync(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004436{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004437 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004438 sector_t sector = be64_to_cpu(p->sector);
4439 int blksize = be32_to_cpu(p->blksize);
4440
Philipp Reisner31890f42011-01-19 14:12:51 +01004441 D_ASSERT(mdev->tconn->agreed_pro_version >= 89);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004442
4443 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4444
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004445 if (get_ldev(mdev)) {
4446 drbd_rs_complete_io(mdev, sector);
4447 drbd_set_in_sync(mdev, sector, blksize);
4448 /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
4449 mdev->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
4450 put_ldev(mdev);
4451 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004452 dec_rs_pending(mdev);
Philipp Reisner778f2712010-07-06 11:14:00 +02004453 atomic_add(blksize >> 9, &mdev->rs_sect_in);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004454
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004455 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004456}
4457
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004458static int
4459validate_req_change_req_state(struct drbd_conf *mdev, u64 id, sector_t sector,
4460 struct rb_root *root, const char *func,
4461 enum drbd_req_event what, bool missing_ok)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004462{
4463 struct drbd_request *req;
4464 struct bio_and_error m;
4465
Philipp Reisner87eeee42011-01-19 14:16:30 +01004466 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004467 req = find_request(mdev, root, id, sector, missing_ok, func);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004468 if (unlikely(!req)) {
Philipp Reisner87eeee42011-01-19 14:16:30 +01004469 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004470 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004471 }
4472 __req_mod(req, what, &m);
Philipp Reisner87eeee42011-01-19 14:16:30 +01004473 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004474
4475 if (m.bio)
4476 complete_master_bio(mdev, &m);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004477 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004478}
4479
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004480static int got_BlockAck(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004481{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004482 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004483 sector_t sector = be64_to_cpu(p->sector);
4484 int blksize = be32_to_cpu(p->blksize);
4485 enum drbd_req_event what;
4486
4487 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4488
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01004489 if (p->block_id == ID_SYNCER) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004490 drbd_set_in_sync(mdev, sector, blksize);
4491 dec_rs_pending(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004492 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004493 }
Philipp Reisner257d0af2011-01-26 12:15:29 +01004494 switch (cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004495 case P_RS_WRITE_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004496 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004497 what = WRITE_ACKED_BY_PEER_AND_SIS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004498 break;
4499 case P_WRITE_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004500 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004501 what = WRITE_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004502 break;
4503 case P_RECV_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004504 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_B);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004505 what = RECV_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004506 break;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01004507 case P_DISCARD_WRITE:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004508 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01004509 what = DISCARD_WRITE;
4510 break;
4511 case P_RETRY_WRITE:
4512 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
4513 what = POSTPONE_WRITE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004514 break;
4515 default:
4516 D_ASSERT(0);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004517 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004518 }
4519
4520 return validate_req_change_req_state(mdev, p->block_id, sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004521 &mdev->write_requests, __func__,
4522 what, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004523}
4524
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004525static int got_NegAck(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004526{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004527 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004528 sector_t sector = be64_to_cpu(p->sector);
Philipp Reisner2deb8332011-01-17 18:39:18 +01004529 int size = be32_to_cpu(p->blksize);
Philipp Reisner89e58e72011-01-19 13:12:45 +01004530 bool missing_ok = mdev->tconn->net_conf->wire_protocol == DRBD_PROT_A ||
4531 mdev->tconn->net_conf->wire_protocol == DRBD_PROT_B;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004532 bool found;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004533
4534 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4535
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01004536 if (p->block_id == ID_SYNCER) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004537 dec_rs_pending(mdev);
4538 drbd_rs_failed_io(mdev, sector, size);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004539 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004540 }
Philipp Reisner2deb8332011-01-17 18:39:18 +01004541
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004542 found = validate_req_change_req_state(mdev, p->block_id, sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004543 &mdev->write_requests, __func__,
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004544 NEG_ACKED, missing_ok);
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004545 if (!found) {
4546 /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs.
4547 The master bio might already be completed, therefore the
4548 request is no longer in the collision hash. */
4549 /* In Protocol B we might already have got a P_RECV_ACK
4550 but then get a P_NEG_ACK afterwards. */
4551 if (!missing_ok)
Philipp Reisner2deb8332011-01-17 18:39:18 +01004552 return false;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004553 drbd_set_out_of_sync(mdev, sector, size);
Philipp Reisner2deb8332011-01-17 18:39:18 +01004554 }
Philipp Reisner2deb8332011-01-17 18:39:18 +01004555 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004556}
4557
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004558static int got_NegDReply(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004559{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004560 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004561 sector_t sector = be64_to_cpu(p->sector);
4562
4563 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01004564
Philipp Reisnerb411b362009-09-25 16:07:19 -07004565 dev_err(DEV, "Got NegDReply; Sector %llus, len %u; Fail original request.\n",
4566 (unsigned long long)sector, be32_to_cpu(p->blksize));
4567
4568 return validate_req_change_req_state(mdev, p->block_id, sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004569 &mdev->read_requests, __func__,
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004570 NEG_ACKED, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004571}
4572
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004573static int got_NegRSDReply(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004574{
4575 sector_t sector;
4576 int size;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004577 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004578
4579 sector = be64_to_cpu(p->sector);
4580 size = be32_to_cpu(p->blksize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004581
4582 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4583
4584 dec_rs_pending(mdev);
4585
4586 if (get_ldev_if_state(mdev, D_FAILED)) {
4587 drbd_rs_complete_io(mdev, sector);
Philipp Reisner257d0af2011-01-26 12:15:29 +01004588 switch (cmd) {
Philipp Reisnerd612d302010-12-27 10:53:28 +01004589 case P_NEG_RS_DREPLY:
4590 drbd_rs_failed_io(mdev, sector, size);
4591 case P_RS_CANCEL:
4592 break;
4593 default:
4594 D_ASSERT(0);
4595 put_ldev(mdev);
4596 return false;
4597 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004598 put_ldev(mdev);
4599 }
4600
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004601 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004602}
4603
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004604static int got_BarrierAck(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004605{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004606 struct p_barrier_ack *p = &mdev->tconn->meta.rbuf.barrier_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004607
Philipp Reisner2f5cdd02011-02-21 14:29:27 +01004608 tl_release(mdev->tconn, p->barrier, be32_to_cpu(p->set_size));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004609
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02004610 if (mdev->state.conn == C_AHEAD &&
4611 atomic_read(&mdev->ap_in_flight) == 0 &&
Philipp Reisner370a43e2011-01-14 16:03:11 +01004612 !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &mdev->current_epoch->flags)) {
4613 mdev->start_resync_timer.expires = jiffies + HZ;
4614 add_timer(&mdev->start_resync_timer);
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02004615 }
4616
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004617 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004618}
4619
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004620static int got_OVResult(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004621{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004622 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004623 struct drbd_work *w;
4624 sector_t sector;
4625 int size;
4626
4627 sector = be64_to_cpu(p->sector);
4628 size = be32_to_cpu(p->blksize);
4629
4630 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4631
4632 if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
4633 drbd_ov_oos_found(mdev, sector, size);
4634 else
4635 ov_oos_print(mdev);
4636
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004637 if (!get_ldev(mdev))
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004638 return true;
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004639
Philipp Reisnerb411b362009-09-25 16:07:19 -07004640 drbd_rs_complete_io(mdev, sector);
4641 dec_rs_pending(mdev);
4642
Lars Ellenbergea5442a2010-11-05 09:48:01 +01004643 --mdev->ov_left;
4644
4645 /* let's advance progress step marks only for every other megabyte */
4646 if ((mdev->ov_left & 0x200) == 0x200)
4647 drbd_advance_rs_marks(mdev, mdev->ov_left);
4648
4649 if (mdev->ov_left == 0) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004650 w = kmalloc(sizeof(*w), GFP_NOIO);
4651 if (w) {
4652 w->cb = w_ov_finished;
Philipp Reisnera21e9292011-02-08 15:08:49 +01004653 w->mdev = mdev;
Philipp Reisnere42325a2011-01-19 13:55:45 +01004654 drbd_queue_work_front(&mdev->tconn->data.work, w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004655 } else {
4656 dev_err(DEV, "kmalloc(w) failed.");
4657 ov_oos_print(mdev);
4658 drbd_resync_finished(mdev);
4659 }
4660 }
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004661 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004662 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004663}
4664
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004665static int got_skip(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisner0ced55a2010-04-30 15:26:20 +02004666{
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004667 return true;
Philipp Reisner0ced55a2010-04-30 15:26:20 +02004668}
4669
Philipp Reisner32862ec2011-02-08 16:41:01 +01004670static int _drbd_process_done_ee(int vnr, void *p, void *data)
4671{
4672 struct drbd_conf *mdev = (struct drbd_conf *)p;
4673 return !drbd_process_done_ee(mdev);
4674}
4675
4676static int _check_ee_empty(int vnr, void *p, void *data)
4677{
4678 struct drbd_conf *mdev = (struct drbd_conf *)p;
4679 struct drbd_tconn *tconn = mdev->tconn;
4680 int not_empty;
4681
4682 spin_lock_irq(&tconn->req_lock);
4683 not_empty = !list_empty(&mdev->done_ee);
4684 spin_unlock_irq(&tconn->req_lock);
4685
4686 return not_empty;
4687}
4688
4689static int tconn_process_done_ee(struct drbd_tconn *tconn)
4690{
4691 int not_empty, err;
4692
4693 do {
4694 clear_bit(SIGNAL_ASENDER, &tconn->flags);
4695 flush_signals(current);
4696 err = idr_for_each(&tconn->volumes, _drbd_process_done_ee, NULL);
4697 if (err)
4698 return err;
4699 set_bit(SIGNAL_ASENDER, &tconn->flags);
4700 not_empty = idr_for_each(&tconn->volumes, _check_ee_empty, NULL);
4701 } while (not_empty);
4702
4703 return 0;
4704}
4705
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01004706struct asender_cmd {
4707 size_t pkt_size;
4708 int (*process)(struct drbd_conf *, enum drbd_packet);
4709};
4710
4711static struct asender_cmd asender_tbl[] = {
4712 [P_PING] = { sizeof(struct p_header), got_Ping },
4713 [P_PING_ACK] = { sizeof(struct p_header), got_PingAck },
4714 [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
4715 [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
4716 [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
4717 [P_DISCARD_WRITE] = { sizeof(struct p_block_ack), got_BlockAck },
4718 [P_NEG_ACK] = { sizeof(struct p_block_ack), got_NegAck },
4719 [P_NEG_DREPLY] = { sizeof(struct p_block_ack), got_NegDReply },
4720 [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), got_NegRSDReply},
4721 [P_OV_RESULT] = { sizeof(struct p_block_ack), got_OVResult },
4722 [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck },
4723 [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
4724 [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync },
4725 [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip },
4726 [P_RS_CANCEL] = { sizeof(struct p_block_ack), got_NegRSDReply},
4727 [P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), got_RqSReply },
4728 [P_RETRY_WRITE] = { sizeof(struct p_block_ack), got_BlockAck },
4729};
4730
Philipp Reisnerb411b362009-09-25 16:07:19 -07004731int drbd_asender(struct drbd_thread *thi)
4732{
Philipp Reisner392c8802011-02-09 10:33:31 +01004733 struct drbd_tconn *tconn = thi->tconn;
Philipp Reisner32862ec2011-02-08 16:41:01 +01004734 struct p_header *h = &tconn->meta.rbuf.header;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004735 struct asender_cmd *cmd = NULL;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004736 struct packet_info pi;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004737 int rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004738 void *buf = h;
4739 int received = 0;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004740 int expect = sizeof(struct p_header);
Lars Ellenbergf36af182011-03-09 22:44:55 +01004741 int ping_timeout_active = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004742
Philipp Reisnerb411b362009-09-25 16:07:19 -07004743 current->policy = SCHED_RR; /* Make this a realtime task! */
4744 current->rt_priority = 2; /* more important than all other tasks */
4745
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +01004746 while (get_t_state(thi) == RUNNING) {
Philipp Reisner80822282011-02-08 12:46:30 +01004747 drbd_thread_current_set_cpu(thi);
Philipp Reisner32862ec2011-02-08 16:41:01 +01004748 if (test_and_clear_bit(SEND_PING, &tconn->flags)) {
Philipp Reisner2a67d8b2011-02-09 14:10:32 +01004749 if (!drbd_send_ping(tconn)) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004750 conn_err(tconn, "drbd_send_ping has failed\n");
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01004751 goto reconnect;
4752 }
Philipp Reisner32862ec2011-02-08 16:41:01 +01004753 tconn->meta.socket->sk->sk_rcvtimeo =
4754 tconn->net_conf->ping_timeo*HZ/10;
Lars Ellenbergf36af182011-03-09 22:44:55 +01004755 ping_timeout_active = 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004756 }
4757
Philipp Reisner32862ec2011-02-08 16:41:01 +01004758 /* TODO: conditionally cork; it may hurt latency if we cork without
4759 much to send */
4760 if (!tconn->net_conf->no_cork)
4761 drbd_tcp_cork(tconn->meta.socket);
4762 if (tconn_process_done_ee(tconn))
4763 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004764 /* but unconditionally uncork unless disabled */
Philipp Reisner32862ec2011-02-08 16:41:01 +01004765 if (!tconn->net_conf->no_cork)
4766 drbd_tcp_uncork(tconn->meta.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004767
4768 /* short circuit, recv_msg would return EINTR anyways. */
4769 if (signal_pending(current))
4770 continue;
4771
Philipp Reisner32862ec2011-02-08 16:41:01 +01004772 rv = drbd_recv_short(tconn->meta.socket, buf, expect-received, 0);
4773 clear_bit(SIGNAL_ASENDER, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004774
4775 flush_signals(current);
4776
4777 /* Note:
4778 * -EINTR (on meta) we got a signal
4779 * -EAGAIN (on meta) rcvtimeo expired
4780 * -ECONNRESET other side closed the connection
4781 * -ERESTARTSYS (on data) we got a signal
4782 * rv < 0 other than above: unexpected error!
4783 * rv == expected: full header or command
4784 * rv < expected: "woken" by signal during receive
4785 * rv == 0 : "connection shut down by peer"
4786 */
4787 if (likely(rv > 0)) {
4788 received += rv;
4789 buf += rv;
4790 } else if (rv == 0) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004791 conn_err(tconn, "meta connection shut down by peer.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004792 goto reconnect;
4793 } else if (rv == -EAGAIN) {
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02004794 /* If the data socket received something meanwhile,
4795 * that is good enough: peer is still alive. */
Philipp Reisner32862ec2011-02-08 16:41:01 +01004796 if (time_after(tconn->last_received,
4797 jiffies - tconn->meta.socket->sk->sk_rcvtimeo))
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02004798 continue;
Lars Ellenbergf36af182011-03-09 22:44:55 +01004799 if (ping_timeout_active) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004800 conn_err(tconn, "PingAck did not arrive in time.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004801 goto reconnect;
4802 }
Philipp Reisner32862ec2011-02-08 16:41:01 +01004803 set_bit(SEND_PING, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004804 continue;
4805 } else if (rv == -EINTR) {
4806 continue;
4807 } else {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004808 conn_err(tconn, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004809 goto reconnect;
4810 }
4811
4812 if (received == expect && cmd == NULL) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004813 if (!decode_header(tconn, h, &pi))
Philipp Reisnerb411b362009-09-25 16:07:19 -07004814 goto reconnect;
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01004815 cmd = &asender_tbl[pi.cmd];
4816 if (pi.cmd >= ARRAY_SIZE(asender_tbl) || !cmd) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004817 conn_err(tconn, "unknown command %d on meta (l: %d)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004818 pi.cmd, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004819 goto disconnect;
4820 }
4821 expect = cmd->pkt_size;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004822 if (pi.size != expect - sizeof(struct p_header)) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004823 conn_err(tconn, "Wrong packet size on meta (c: %d, l: %d)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004824 pi.cmd, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004825 goto reconnect;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004826 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004827 }
4828 if (received == expect) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004829 tconn->last_received = jiffies;
4830 if (!cmd->process(vnr_to_mdev(tconn, pi.vnr), pi.cmd))
Philipp Reisnerb411b362009-09-25 16:07:19 -07004831 goto reconnect;
4832
Lars Ellenbergf36af182011-03-09 22:44:55 +01004833 /* the idle_timeout (ping-int)
4834 * has been restored in got_PingAck() */
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01004835 if (cmd == &asender_tbl[P_PING_ACK])
Lars Ellenbergf36af182011-03-09 22:44:55 +01004836 ping_timeout_active = 0;
4837
Philipp Reisnerb411b362009-09-25 16:07:19 -07004838 buf = h;
4839 received = 0;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004840 expect = sizeof(struct p_header);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004841 cmd = NULL;
4842 }
4843 }
4844
4845 if (0) {
4846reconnect:
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004847 conn_request_state(tconn, NS(conn, C_NETWORK_FAILURE), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004848 }
4849 if (0) {
4850disconnect:
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004851 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004852 }
Philipp Reisner32862ec2011-02-08 16:41:01 +01004853 clear_bit(SIGNAL_ASENDER, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004854
Philipp Reisner32862ec2011-02-08 16:41:01 +01004855 conn_info(tconn, "asender terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004856
4857 return 0;
4858}