blob: 112098bc4c8c353c028bcedebdfd30683f0bf0a1 [file] [log] [blame]
Philipp Reisnerb411b362009-09-25 16:07:19 -07001/*
2 drbd_receiver.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23 */
24
25
Philipp Reisnerb411b362009-09-25 16:07:19 -070026#include <linux/module.h>
27
28#include <asm/uaccess.h>
29#include <net/sock.h>
30
Philipp Reisnerb411b362009-09-25 16:07:19 -070031#include <linux/drbd.h>
32#include <linux/fs.h>
33#include <linux/file.h>
34#include <linux/in.h>
35#include <linux/mm.h>
36#include <linux/memcontrol.h>
37#include <linux/mm_inline.h>
38#include <linux/slab.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070039#include <linux/pkt_sched.h>
40#define __KERNEL_SYSCALLS__
41#include <linux/unistd.h>
42#include <linux/vmalloc.h>
43#include <linux/random.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070044#include <linux/string.h>
45#include <linux/scatterlist.h>
46#include "drbd_int.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070047#include "drbd_req.h"
48
49#include "drbd_vli.h"
50
Philipp Reisner77351055b2011-02-07 17:24:26 +010051struct packet_info {
52 enum drbd_packet cmd;
53 int size;
54 int vnr;
55};
56
Philipp Reisnerb411b362009-09-25 16:07:19 -070057enum finish_epoch {
58 FE_STILL_LIVE,
59 FE_DESTROYED,
60 FE_RECYCLED,
61};
62
Philipp Reisner65d11ed2011-02-07 17:35:59 +010063static int drbd_do_handshake(struct drbd_tconn *tconn);
Philipp Reisner13e60372011-02-08 09:54:40 +010064static int drbd_do_auth(struct drbd_tconn *tconn);
Philipp Reisner360cc742011-02-08 14:29:53 +010065static int drbd_disconnected(int vnr, void *p, void *data);
Philipp Reisnerb411b362009-09-25 16:07:19 -070066
67static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *, struct drbd_epoch *, enum epoch_event);
Philipp Reisner00d56942011-02-09 18:09:48 +010068static int e_end_block(struct drbd_work *, int);
Philipp Reisnerb411b362009-09-25 16:07:19 -070069
Philipp Reisnerb411b362009-09-25 16:07:19 -070070
71#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
72
Lars Ellenberg45bb9122010-05-14 17:10:48 +020073/*
74 * some helper functions to deal with single linked page lists,
75 * page->private being our "next" pointer.
76 */
77
78/* If at least n pages are linked at head, get n pages off.
79 * Otherwise, don't modify head, and return NULL.
80 * Locking is the responsibility of the caller.
81 */
82static struct page *page_chain_del(struct page **head, int n)
83{
84 struct page *page;
85 struct page *tmp;
86
87 BUG_ON(!n);
88 BUG_ON(!head);
89
90 page = *head;
Philipp Reisner23ce4222010-05-20 13:35:31 +020091
92 if (!page)
93 return NULL;
94
Lars Ellenberg45bb9122010-05-14 17:10:48 +020095 while (page) {
96 tmp = page_chain_next(page);
97 if (--n == 0)
98 break; /* found sufficient pages */
99 if (tmp == NULL)
100 /* insufficient pages, don't use any of them. */
101 return NULL;
102 page = tmp;
103 }
104
105 /* add end of list marker for the returned list */
106 set_page_private(page, 0);
107 /* actual return value, and adjustment of head */
108 page = *head;
109 *head = tmp;
110 return page;
111}
112
113/* may be used outside of locks to find the tail of a (usually short)
114 * "private" page chain, before adding it back to a global chain head
115 * with page_chain_add() under a spinlock. */
116static struct page *page_chain_tail(struct page *page, int *len)
117{
118 struct page *tmp;
119 int i = 1;
120 while ((tmp = page_chain_next(page)))
121 ++i, page = tmp;
122 if (len)
123 *len = i;
124 return page;
125}
126
127static int page_chain_free(struct page *page)
128{
129 struct page *tmp;
130 int i = 0;
131 page_chain_for_each_safe(page, tmp) {
132 put_page(page);
133 ++i;
134 }
135 return i;
136}
137
138static void page_chain_add(struct page **head,
139 struct page *chain_first, struct page *chain_last)
140{
141#if 1
142 struct page *tmp;
143 tmp = page_chain_tail(chain_first, NULL);
144 BUG_ON(tmp != chain_last);
145#endif
146
147 /* add chain to head */
148 set_page_private(chain_last, (unsigned long)*head);
149 *head = chain_first;
150}
151
152static struct page *drbd_pp_first_pages_or_try_alloc(struct drbd_conf *mdev, int number)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700153{
154 struct page *page = NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200155 struct page *tmp = NULL;
156 int i = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700157
158 /* Yes, testing drbd_pp_vacant outside the lock is racy.
159 * So what. It saves a spin_lock. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200160 if (drbd_pp_vacant >= number) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700161 spin_lock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200162 page = page_chain_del(&drbd_pp_pool, number);
163 if (page)
164 drbd_pp_vacant -= number;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700165 spin_unlock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200166 if (page)
167 return page;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700168 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200169
Philipp Reisnerb411b362009-09-25 16:07:19 -0700170 /* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD
171 * "criss-cross" setup, that might cause write-out on some other DRBD,
172 * which in turn might block on the other node at this very place. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200173 for (i = 0; i < number; i++) {
174 tmp = alloc_page(GFP_TRY);
175 if (!tmp)
176 break;
177 set_page_private(tmp, (unsigned long)page);
178 page = tmp;
179 }
180
181 if (i == number)
182 return page;
183
184 /* Not enough pages immediately available this time.
185 * No need to jump around here, drbd_pp_alloc will retry this
186 * function "soon". */
187 if (page) {
188 tmp = page_chain_tail(page, NULL);
189 spin_lock(&drbd_pp_lock);
190 page_chain_add(&drbd_pp_pool, page, tmp);
191 drbd_pp_vacant += i;
192 spin_unlock(&drbd_pp_lock);
193 }
194 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700195}
196
Philipp Reisnerb411b362009-09-25 16:07:19 -0700197static void reclaim_net_ee(struct drbd_conf *mdev, struct list_head *to_be_freed)
198{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100199 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700200 struct list_head *le, *tle;
201
202 /* The EEs are always appended to the end of the list. Since
203 they are sent in order over the wire, they have to finish
204 in order. As soon as we see the first not finished we can
205 stop to examine the list... */
206
207 list_for_each_safe(le, tle, &mdev->net_ee) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100208 peer_req = list_entry(le, struct drbd_peer_request, w.list);
209 if (drbd_ee_has_active_page(peer_req))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700210 break;
211 list_move(le, to_be_freed);
212 }
213}
214
215static void drbd_kick_lo_and_reclaim_net(struct drbd_conf *mdev)
216{
217 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100218 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700219
Philipp Reisner87eeee42011-01-19 14:16:30 +0100220 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700221 reclaim_net_ee(mdev, &reclaimed);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100222 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700223
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100224 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
225 drbd_free_net_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700226}
227
228/**
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200229 * drbd_pp_alloc() - Returns @number pages, retries forever (or until signalled)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700230 * @mdev: DRBD device.
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200231 * @number: number of pages requested
232 * @retry: whether to retry, if not enough pages are available right now
Philipp Reisnerb411b362009-09-25 16:07:19 -0700233 *
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200234 * Tries to allocate number pages, first from our own page pool, then from
235 * the kernel, unless this allocation would exceed the max_buffers setting.
236 * Possibly retry until DRBD frees sufficient pages somewhere else.
237 *
238 * Returns a page chain linked via page->private.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700239 */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200240static struct page *drbd_pp_alloc(struct drbd_conf *mdev, unsigned number, bool retry)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700241{
242 struct page *page = NULL;
243 DEFINE_WAIT(wait);
244
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200245 /* Yes, we may run up to @number over max_buffers. If we
246 * follow it strictly, the admin will get it wrong anyways. */
Philipp Reisner89e58e72011-01-19 13:12:45 +0100247 if (atomic_read(&mdev->pp_in_use) < mdev->tconn->net_conf->max_buffers)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200248 page = drbd_pp_first_pages_or_try_alloc(mdev, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700249
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200250 while (page == NULL) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700251 prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
252
253 drbd_kick_lo_and_reclaim_net(mdev);
254
Philipp Reisner89e58e72011-01-19 13:12:45 +0100255 if (atomic_read(&mdev->pp_in_use) < mdev->tconn->net_conf->max_buffers) {
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200256 page = drbd_pp_first_pages_or_try_alloc(mdev, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700257 if (page)
258 break;
259 }
260
261 if (!retry)
262 break;
263
264 if (signal_pending(current)) {
265 dev_warn(DEV, "drbd_pp_alloc interrupted!\n");
266 break;
267 }
268
269 schedule();
270 }
271 finish_wait(&drbd_pp_wait, &wait);
272
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200273 if (page)
274 atomic_add(number, &mdev->pp_in_use);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700275 return page;
276}
277
278/* Must not be used from irq, as that may deadlock: see drbd_pp_alloc.
Philipp Reisner87eeee42011-01-19 14:16:30 +0100279 * Is also used from inside an other spin_lock_irq(&mdev->tconn->req_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200280 * Either links the page chain back to the global pool,
281 * or returns all pages to the system. */
Lars Ellenberg435f0742010-09-06 12:30:25 +0200282static void drbd_pp_free(struct drbd_conf *mdev, struct page *page, int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700283{
Lars Ellenberg435f0742010-09-06 12:30:25 +0200284 atomic_t *a = is_net ? &mdev->pp_in_use_by_net : &mdev->pp_in_use;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700285 int i;
Lars Ellenberg435f0742010-09-06 12:30:25 +0200286
Lars Ellenberg1816a2b2010-11-11 15:19:07 +0100287 if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE)*minor_count)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200288 i = page_chain_free(page);
289 else {
290 struct page *tmp;
291 tmp = page_chain_tail(page, &i);
292 spin_lock(&drbd_pp_lock);
293 page_chain_add(&drbd_pp_pool, page, tmp);
294 drbd_pp_vacant += i;
295 spin_unlock(&drbd_pp_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700296 }
Lars Ellenberg435f0742010-09-06 12:30:25 +0200297 i = atomic_sub_return(i, a);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200298 if (i < 0)
Lars Ellenberg435f0742010-09-06 12:30:25 +0200299 dev_warn(DEV, "ASSERTION FAILED: %s: %d < 0\n",
300 is_net ? "pp_in_use_by_net" : "pp_in_use", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700301 wake_up(&drbd_pp_wait);
302}
303
304/*
305You need to hold the req_lock:
306 _drbd_wait_ee_list_empty()
307
308You must not have the req_lock:
309 drbd_free_ee()
310 drbd_alloc_ee()
311 drbd_init_ee()
312 drbd_release_ee()
313 drbd_ee_fix_bhs()
314 drbd_process_done_ee()
315 drbd_clear_done_ee()
316 drbd_wait_ee_list_empty()
317*/
318
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100319struct drbd_peer_request *
320drbd_alloc_ee(struct drbd_conf *mdev, u64 id, sector_t sector,
321 unsigned int data_size, gfp_t gfp_mask) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700322{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100323 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700324 struct page *page;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200325 unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700326
Andreas Gruenbacher0cf9d272010-12-07 10:43:29 +0100327 if (drbd_insert_fault(mdev, DRBD_FAULT_AL_EE))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700328 return NULL;
329
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100330 peer_req = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
331 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700332 if (!(gfp_mask & __GFP_NOWARN))
333 dev_err(DEV, "alloc_ee: Allocation of an EE failed\n");
334 return NULL;
335 }
336
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200337 page = drbd_pp_alloc(mdev, nr_pages, (gfp_mask & __GFP_WAIT));
338 if (!page)
339 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700340
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100341 drbd_clear_interval(&peer_req->i);
342 peer_req->i.size = data_size;
343 peer_req->i.sector = sector;
344 peer_req->i.local = false;
345 peer_req->i.waiting = false;
Andreas Gruenbacher53840642011-01-28 10:31:04 +0100346
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100347 peer_req->epoch = NULL;
Philipp Reisnera21e9292011-02-08 15:08:49 +0100348 peer_req->w.mdev = mdev;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100349 peer_req->pages = page;
350 atomic_set(&peer_req->pending_bios, 0);
351 peer_req->flags = 0;
Andreas Gruenbacher9a8e7752011-01-11 14:04:09 +0100352 /*
353 * The block_id is opaque to the receiver. It is not endianness
354 * converted, and sent back to the sender unchanged.
355 */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100356 peer_req->block_id = id;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700357
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100358 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700359
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200360 fail:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100361 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700362 return NULL;
363}
364
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100365void drbd_free_some_ee(struct drbd_conf *mdev, struct drbd_peer_request *peer_req,
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100366 int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700367{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100368 if (peer_req->flags & EE_HAS_DIGEST)
369 kfree(peer_req->digest);
370 drbd_pp_free(mdev, peer_req->pages, is_net);
371 D_ASSERT(atomic_read(&peer_req->pending_bios) == 0);
372 D_ASSERT(drbd_interval_empty(&peer_req->i));
373 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700374}
375
376int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list)
377{
378 LIST_HEAD(work_list);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100379 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700380 int count = 0;
Lars Ellenberg435f0742010-09-06 12:30:25 +0200381 int is_net = list == &mdev->net_ee;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700382
Philipp Reisner87eeee42011-01-19 14:16:30 +0100383 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700384 list_splice_init(list, &work_list);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100385 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700386
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100387 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
388 drbd_free_some_ee(mdev, peer_req, is_net);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700389 count++;
390 }
391 return count;
392}
393
394
Philipp Reisner32862ec2011-02-08 16:41:01 +0100395/* See also comments in _req_mod(,BARRIER_ACKED)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700396 * and receive_Barrier.
397 *
398 * Move entries from net_ee to done_ee, if ready.
399 * Grab done_ee, call all callbacks, free the entries.
400 * The callbacks typically send out ACKs.
401 */
402static int drbd_process_done_ee(struct drbd_conf *mdev)
403{
404 LIST_HEAD(work_list);
405 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100406 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700407 int ok = (mdev->state.conn >= C_WF_REPORT_PARAMS);
408
Philipp Reisner87eeee42011-01-19 14:16:30 +0100409 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700410 reclaim_net_ee(mdev, &reclaimed);
411 list_splice_init(&mdev->done_ee, &work_list);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100412 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700413
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100414 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
415 drbd_free_net_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700416
417 /* possible callbacks here:
418 * e_end_block, and e_end_resync_block, e_send_discard_ack.
419 * all ignore the last argument.
420 */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100421 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700422 /* list_del not necessary, next/prev members not touched */
Philipp Reisner00d56942011-02-09 18:09:48 +0100423 ok = peer_req->w.cb(&peer_req->w, !ok) && ok;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100424 drbd_free_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700425 }
426 wake_up(&mdev->ee_wait);
427
428 return ok;
429}
430
431void _drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head)
432{
433 DEFINE_WAIT(wait);
434
435 /* avoids spin_lock/unlock
436 * and calling prepare_to_wait in the fast path */
437 while (!list_empty(head)) {
438 prepare_to_wait(&mdev->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100439 spin_unlock_irq(&mdev->tconn->req_lock);
Jens Axboe7eaceac2011-03-10 08:52:07 +0100440 io_schedule();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700441 finish_wait(&mdev->ee_wait, &wait);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100442 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700443 }
444}
445
446void drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head)
447{
Philipp Reisner87eeee42011-01-19 14:16:30 +0100448 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700449 _drbd_wait_ee_list_empty(mdev, head);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100450 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700451}
452
453/* see also kernel_accept; which is only present since 2.6.18.
454 * also we want to log which part of it failed, exactly */
Philipp Reisner76536202011-02-07 14:09:54 +0100455static int drbd_accept(const char **what, struct socket *sock, struct socket **newsock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700456{
457 struct sock *sk = sock->sk;
458 int err = 0;
459
460 *what = "listen";
461 err = sock->ops->listen(sock, 5);
462 if (err < 0)
463 goto out;
464
465 *what = "sock_create_lite";
466 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
467 newsock);
468 if (err < 0)
469 goto out;
470
471 *what = "accept";
472 err = sock->ops->accept(sock, *newsock, 0);
473 if (err < 0) {
474 sock_release(*newsock);
475 *newsock = NULL;
476 goto out;
477 }
478 (*newsock)->ops = sock->ops;
479
480out:
481 return err;
482}
483
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100484static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700485{
486 mm_segment_t oldfs;
487 struct kvec iov = {
488 .iov_base = buf,
489 .iov_len = size,
490 };
491 struct msghdr msg = {
492 .msg_iovlen = 1,
493 .msg_iov = (struct iovec *)&iov,
494 .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
495 };
496 int rv;
497
498 oldfs = get_fs();
499 set_fs(KERNEL_DS);
500 rv = sock_recvmsg(sock, &msg, size, msg.msg_flags);
501 set_fs(oldfs);
502
503 return rv;
504}
505
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100506static int drbd_recv(struct drbd_tconn *tconn, void *buf, size_t size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700507{
508 mm_segment_t oldfs;
509 struct kvec iov = {
510 .iov_base = buf,
511 .iov_len = size,
512 };
513 struct msghdr msg = {
514 .msg_iovlen = 1,
515 .msg_iov = (struct iovec *)&iov,
516 .msg_flags = MSG_WAITALL | MSG_NOSIGNAL
517 };
518 int rv;
519
520 oldfs = get_fs();
521 set_fs(KERNEL_DS);
522
523 for (;;) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100524 rv = sock_recvmsg(tconn->data.socket, &msg, size, msg.msg_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700525 if (rv == size)
526 break;
527
528 /* Note:
529 * ECONNRESET other side closed the connection
530 * ERESTARTSYS (on sock) we got a signal
531 */
532
533 if (rv < 0) {
534 if (rv == -ECONNRESET)
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100535 conn_info(tconn, "sock was reset by peer\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700536 else if (rv != -ERESTARTSYS)
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100537 conn_err(tconn, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700538 break;
539 } else if (rv == 0) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100540 conn_info(tconn, "sock was shut down by peer\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700541 break;
542 } else {
543 /* signal came in, or peer/link went down,
544 * after we read a partial message
545 */
546 /* D_ASSERT(signal_pending(current)); */
547 break;
548 }
549 };
550
551 set_fs(oldfs);
552
553 if (rv != size)
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100554 conn_request_state(tconn, NS(conn, C_BROKEN_PIPE), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700555
556 return rv;
557}
558
Lars Ellenberg5dbf1672010-05-25 16:18:01 +0200559/* quoting tcp(7):
560 * On individual connections, the socket buffer size must be set prior to the
561 * listen(2) or connect(2) calls in order to have it take effect.
562 * This is our wrapper to do so.
563 */
564static void drbd_setbufsize(struct socket *sock, unsigned int snd,
565 unsigned int rcv)
566{
567 /* open coded SO_SNDBUF, SO_RCVBUF */
568 if (snd) {
569 sock->sk->sk_sndbuf = snd;
570 sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
571 }
572 if (rcv) {
573 sock->sk->sk_rcvbuf = rcv;
574 sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
575 }
576}
577
Philipp Reisnereac3e992011-02-07 14:05:07 +0100578static struct socket *drbd_try_connect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700579{
580 const char *what;
581 struct socket *sock;
582 struct sockaddr_in6 src_in6;
583 int err;
584 int disconnect_on_error = 1;
585
Philipp Reisnereac3e992011-02-07 14:05:07 +0100586 if (!get_net_conf(tconn))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700587 return NULL;
588
589 what = "sock_create_kern";
Philipp Reisnereac3e992011-02-07 14:05:07 +0100590 err = sock_create_kern(((struct sockaddr *)tconn->net_conf->my_addr)->sa_family,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700591 SOCK_STREAM, IPPROTO_TCP, &sock);
592 if (err < 0) {
593 sock = NULL;
594 goto out;
595 }
596
597 sock->sk->sk_rcvtimeo =
Philipp Reisnereac3e992011-02-07 14:05:07 +0100598 sock->sk->sk_sndtimeo = tconn->net_conf->try_connect_int*HZ;
599 drbd_setbufsize(sock, tconn->net_conf->sndbuf_size,
600 tconn->net_conf->rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700601
602 /* explicitly bind to the configured IP as source IP
603 * for the outgoing connections.
604 * This is needed for multihomed hosts and to be
605 * able to use lo: interfaces for drbd.
606 * Make sure to use 0 as port number, so linux selects
607 * a free one dynamically.
608 */
Philipp Reisnereac3e992011-02-07 14:05:07 +0100609 memcpy(&src_in6, tconn->net_conf->my_addr,
610 min_t(int, tconn->net_conf->my_addr_len, sizeof(src_in6)));
611 if (((struct sockaddr *)tconn->net_conf->my_addr)->sa_family == AF_INET6)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700612 src_in6.sin6_port = 0;
613 else
614 ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
615
616 what = "bind before connect";
617 err = sock->ops->bind(sock,
618 (struct sockaddr *) &src_in6,
Philipp Reisnereac3e992011-02-07 14:05:07 +0100619 tconn->net_conf->my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700620 if (err < 0)
621 goto out;
622
623 /* connect may fail, peer not yet available.
624 * stay C_WF_CONNECTION, don't go Disconnecting! */
625 disconnect_on_error = 0;
626 what = "connect";
627 err = sock->ops->connect(sock,
Philipp Reisnereac3e992011-02-07 14:05:07 +0100628 (struct sockaddr *)tconn->net_conf->peer_addr,
629 tconn->net_conf->peer_addr_len, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700630
631out:
632 if (err < 0) {
633 if (sock) {
634 sock_release(sock);
635 sock = NULL;
636 }
637 switch (-err) {
638 /* timeout, busy, signal pending */
639 case ETIMEDOUT: case EAGAIN: case EINPROGRESS:
640 case EINTR: case ERESTARTSYS:
641 /* peer not (yet) available, network problem */
642 case ECONNREFUSED: case ENETUNREACH:
643 case EHOSTDOWN: case EHOSTUNREACH:
644 disconnect_on_error = 0;
645 break;
646 default:
Philipp Reisnereac3e992011-02-07 14:05:07 +0100647 conn_err(tconn, "%s failed, err = %d\n", what, err);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700648 }
649 if (disconnect_on_error)
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100650 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700651 }
Philipp Reisnereac3e992011-02-07 14:05:07 +0100652 put_net_conf(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700653 return sock;
654}
655
Philipp Reisner76536202011-02-07 14:09:54 +0100656static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700657{
658 int timeo, err;
659 struct socket *s_estab = NULL, *s_listen;
660 const char *what;
661
Philipp Reisner76536202011-02-07 14:09:54 +0100662 if (!get_net_conf(tconn))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700663 return NULL;
664
665 what = "sock_create_kern";
Philipp Reisner76536202011-02-07 14:09:54 +0100666 err = sock_create_kern(((struct sockaddr *)tconn->net_conf->my_addr)->sa_family,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700667 SOCK_STREAM, IPPROTO_TCP, &s_listen);
668 if (err) {
669 s_listen = NULL;
670 goto out;
671 }
672
Philipp Reisner76536202011-02-07 14:09:54 +0100673 timeo = tconn->net_conf->try_connect_int * HZ;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700674 timeo += (random32() & 1) ? timeo / 7 : -timeo / 7; /* 28.5% random jitter */
675
676 s_listen->sk->sk_reuse = 1; /* SO_REUSEADDR */
677 s_listen->sk->sk_rcvtimeo = timeo;
678 s_listen->sk->sk_sndtimeo = timeo;
Philipp Reisner76536202011-02-07 14:09:54 +0100679 drbd_setbufsize(s_listen, tconn->net_conf->sndbuf_size,
680 tconn->net_conf->rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700681
682 what = "bind before listen";
683 err = s_listen->ops->bind(s_listen,
Philipp Reisner76536202011-02-07 14:09:54 +0100684 (struct sockaddr *) tconn->net_conf->my_addr,
685 tconn->net_conf->my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700686 if (err < 0)
687 goto out;
688
Philipp Reisner76536202011-02-07 14:09:54 +0100689 err = drbd_accept(&what, s_listen, &s_estab);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700690
691out:
692 if (s_listen)
693 sock_release(s_listen);
694 if (err < 0) {
695 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
Philipp Reisner76536202011-02-07 14:09:54 +0100696 conn_err(tconn, "%s failed, err = %d\n", what, err);
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100697 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700698 }
699 }
Philipp Reisner76536202011-02-07 14:09:54 +0100700 put_net_conf(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700701
702 return s_estab;
703}
704
Philipp Reisnerd38e7872011-02-07 15:32:04 +0100705static int drbd_send_fp(struct drbd_tconn *tconn, struct socket *sock, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700706{
Philipp Reisnerd38e7872011-02-07 15:32:04 +0100707 struct p_header *h = &tconn->data.sbuf.header;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700708
Philipp Reisnerd38e7872011-02-07 15:32:04 +0100709 return _conn_send_cmd(tconn, 0, sock, cmd, h, sizeof(*h), 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700710}
711
Philipp Reisnera25b63f2011-02-07 15:43:45 +0100712static enum drbd_packet drbd_recv_fp(struct drbd_tconn *tconn, struct socket *sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700713{
Philipp Reisnera25b63f2011-02-07 15:43:45 +0100714 struct p_header80 *h = &tconn->data.rbuf.header.h80;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700715 int rr;
716
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100717 rr = drbd_recv_short(sock, h, sizeof(*h), 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700718
Andreas Gruenbacherca9bc122011-01-11 13:47:24 +0100719 if (rr == sizeof(*h) && h->magic == cpu_to_be32(DRBD_MAGIC))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700720 return be16_to_cpu(h->command);
721
722 return 0xffff;
723}
724
725/**
726 * drbd_socket_okay() - Free the socket if its connection is not okay
Philipp Reisnerb411b362009-09-25 16:07:19 -0700727 * @sock: pointer to the pointer to the socket.
728 */
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100729static int drbd_socket_okay(struct socket **sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700730{
731 int rr;
732 char tb[4];
733
734 if (!*sock)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100735 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700736
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100737 rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700738
739 if (rr > 0 || rr == -EAGAIN) {
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100740 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700741 } else {
742 sock_release(*sock);
743 *sock = NULL;
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100744 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700745 }
746}
747
Philipp Reisner907599e2011-02-08 11:25:37 +0100748static int drbd_connected(int vnr, void *p, void *data)
749{
750 struct drbd_conf *mdev = (struct drbd_conf *)p;
751 int ok = 1;
752
753 atomic_set(&mdev->packet_seq, 0);
754 mdev->peer_seq = 0;
755
Philipp Reisner8410da82011-02-11 20:11:10 +0100756 mdev->state_mutex = mdev->tconn->agreed_pro_version < 100 ?
757 &mdev->tconn->cstate_mutex :
758 &mdev->own_state_mutex;
759
Philipp Reisner907599e2011-02-08 11:25:37 +0100760 ok &= drbd_send_sync_param(mdev, &mdev->sync_conf);
761 ok &= drbd_send_sizes(mdev, 0, 0);
762 ok &= drbd_send_uuids(mdev);
763 ok &= drbd_send_state(mdev);
764 clear_bit(USE_DEGR_WFC_T, &mdev->flags);
765 clear_bit(RESIZE_PENDING, &mdev->flags);
766
Philipp Reisner8410da82011-02-11 20:11:10 +0100767
Philipp Reisner907599e2011-02-08 11:25:37 +0100768 return !ok;
769}
770
Philipp Reisnerb411b362009-09-25 16:07:19 -0700771/*
772 * return values:
773 * 1 yes, we have a valid connection
774 * 0 oops, did not work out, please try again
775 * -1 peer talks different language,
776 * no point in trying again, please go standalone.
777 * -2 We do not have a network config...
778 */
Philipp Reisner907599e2011-02-08 11:25:37 +0100779static int drbd_connect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700780{
781 struct socket *s, *sock, *msock;
782 int try, h, ok;
783
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100784 if (conn_request_state(tconn, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700785 return -2;
786
Philipp Reisner907599e2011-02-08 11:25:37 +0100787 clear_bit(DISCARD_CONCURRENT, &tconn->flags);
788 tconn->agreed_pro_version = 99;
Philipp Reisnerfd340c12011-01-19 16:57:39 +0100789 /* agreed_pro_version must be smaller than 100 so we send the old
790 header (h80) in the first packet and in the handshake packet. */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700791
792 sock = NULL;
793 msock = NULL;
794
795 do {
796 for (try = 0;;) {
797 /* 3 tries, this should take less than a second! */
Philipp Reisner907599e2011-02-08 11:25:37 +0100798 s = drbd_try_connect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700799 if (s || ++try >= 3)
800 break;
801 /* give the other side time to call bind() & listen() */
Philipp Reisner20ee6392011-01-18 15:28:59 +0100802 schedule_timeout_interruptible(HZ / 10);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700803 }
804
805 if (s) {
806 if (!sock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100807 drbd_send_fp(tconn, s, P_HAND_SHAKE_S);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700808 sock = s;
809 s = NULL;
810 } else if (!msock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100811 drbd_send_fp(tconn, s, P_HAND_SHAKE_M);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700812 msock = s;
813 s = NULL;
814 } else {
Philipp Reisner907599e2011-02-08 11:25:37 +0100815 conn_err(tconn, "Logic error in drbd_connect()\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700816 goto out_release_sockets;
817 }
818 }
819
820 if (sock && msock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100821 schedule_timeout_interruptible(tconn->net_conf->ping_timeo*HZ/10);
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100822 ok = drbd_socket_okay(&sock);
823 ok = drbd_socket_okay(&msock) && ok;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700824 if (ok)
825 break;
826 }
827
828retry:
Philipp Reisner907599e2011-02-08 11:25:37 +0100829 s = drbd_wait_for_connect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700830 if (s) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100831 try = drbd_recv_fp(tconn, s);
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100832 drbd_socket_okay(&sock);
833 drbd_socket_okay(&msock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700834 switch (try) {
835 case P_HAND_SHAKE_S:
836 if (sock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100837 conn_warn(tconn, "initial packet S crossed\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700838 sock_release(sock);
839 }
840 sock = s;
841 break;
842 case P_HAND_SHAKE_M:
843 if (msock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100844 conn_warn(tconn, "initial packet M crossed\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700845 sock_release(msock);
846 }
847 msock = s;
Philipp Reisner907599e2011-02-08 11:25:37 +0100848 set_bit(DISCARD_CONCURRENT, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700849 break;
850 default:
Philipp Reisner907599e2011-02-08 11:25:37 +0100851 conn_warn(tconn, "Error receiving initial packet\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700852 sock_release(s);
853 if (random32() & 1)
854 goto retry;
855 }
856 }
857
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100858 if (tconn->cstate <= C_DISCONNECTING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700859 goto out_release_sockets;
860 if (signal_pending(current)) {
861 flush_signals(current);
862 smp_rmb();
Philipp Reisner907599e2011-02-08 11:25:37 +0100863 if (get_t_state(&tconn->receiver) == EXITING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700864 goto out_release_sockets;
865 }
866
867 if (sock && msock) {
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100868 ok = drbd_socket_okay(&sock);
869 ok = drbd_socket_okay(&msock) && ok;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700870 if (ok)
871 break;
872 }
873 } while (1);
874
875 msock->sk->sk_reuse = 1; /* SO_REUSEADDR */
876 sock->sk->sk_reuse = 1; /* SO_REUSEADDR */
877
878 sock->sk->sk_allocation = GFP_NOIO;
879 msock->sk->sk_allocation = GFP_NOIO;
880
881 sock->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
882 msock->sk->sk_priority = TC_PRIO_INTERACTIVE;
883
Philipp Reisnerb411b362009-09-25 16:07:19 -0700884 /* NOT YET ...
Philipp Reisner907599e2011-02-08 11:25:37 +0100885 * sock->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700886 * sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
887 * first set it to the P_HAND_SHAKE timeout,
888 * which we set to 4x the configured ping_timeout. */
889 sock->sk->sk_sndtimeo =
Philipp Reisner907599e2011-02-08 11:25:37 +0100890 sock->sk->sk_rcvtimeo = tconn->net_conf->ping_timeo*4*HZ/10;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700891
Philipp Reisner907599e2011-02-08 11:25:37 +0100892 msock->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10;
893 msock->sk->sk_rcvtimeo = tconn->net_conf->ping_int*HZ;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700894
895 /* we don't want delays.
Lucas De Marchi25985ed2011-03-30 22:57:33 -0300896 * we use TCP_CORK where appropriate, though */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700897 drbd_tcp_nodelay(sock);
898 drbd_tcp_nodelay(msock);
899
Philipp Reisner907599e2011-02-08 11:25:37 +0100900 tconn->data.socket = sock;
901 tconn->meta.socket = msock;
902 tconn->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700903
Philipp Reisner907599e2011-02-08 11:25:37 +0100904 h = drbd_do_handshake(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700905 if (h <= 0)
906 return h;
907
Philipp Reisner907599e2011-02-08 11:25:37 +0100908 if (tconn->cram_hmac_tfm) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700909 /* drbd_request_state(mdev, NS(conn, WFAuth)); */
Philipp Reisner907599e2011-02-08 11:25:37 +0100910 switch (drbd_do_auth(tconn)) {
Johannes Thomab10d96c2010-01-07 16:02:50 +0100911 case -1:
Philipp Reisner907599e2011-02-08 11:25:37 +0100912 conn_err(tconn, "Authentication of peer failed\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700913 return -1;
Johannes Thomab10d96c2010-01-07 16:02:50 +0100914 case 0:
Philipp Reisner907599e2011-02-08 11:25:37 +0100915 conn_err(tconn, "Authentication of peer failed, trying again.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +0100916 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700917 }
918 }
919
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100920 if (conn_request_state(tconn, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE) < SS_SUCCESS)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700921 return 0;
922
Philipp Reisner907599e2011-02-08 11:25:37 +0100923 sock->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700924 sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
925
Philipp Reisner907599e2011-02-08 11:25:37 +0100926 drbd_thread_start(&tconn->asender);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700927
Philipp Reisner907599e2011-02-08 11:25:37 +0100928 if (drbd_send_protocol(tconn) == -1)
Philipp Reisner7e2455c2010-04-22 14:50:23 +0200929 return -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700930
Philipp Reisner907599e2011-02-08 11:25:37 +0100931 return !idr_for_each(&tconn->volumes, drbd_connected, tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700932
933out_release_sockets:
934 if (sock)
935 sock_release(sock);
936 if (msock)
937 sock_release(msock);
938 return -1;
939}
940
Philipp Reisnerce243852011-02-07 17:27:47 +0100941static bool decode_header(struct drbd_tconn *tconn, struct p_header *h, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700942{
Philipp Reisnerfd340c12011-01-19 16:57:39 +0100943 if (h->h80.magic == cpu_to_be32(DRBD_MAGIC)) {
Philipp Reisner77351055b2011-02-07 17:24:26 +0100944 pi->cmd = be16_to_cpu(h->h80.command);
945 pi->size = be16_to_cpu(h->h80.length);
Philipp Reisnereefc2f72011-02-08 12:55:24 +0100946 pi->vnr = 0;
Andreas Gruenbacherca9bc122011-01-11 13:47:24 +0100947 } else if (h->h95.magic == cpu_to_be16(DRBD_MAGIC_BIG)) {
Philipp Reisner77351055b2011-02-07 17:24:26 +0100948 pi->cmd = be16_to_cpu(h->h95.command);
949 pi->size = be32_to_cpu(h->h95.length) & 0x00ffffff;
950 pi->vnr = 0;
Philipp Reisner02918be2010-08-20 14:35:10 +0200951 } else {
Philipp Reisnerce243852011-02-07 17:27:47 +0100952 conn_err(tconn, "magic?? on data m: 0x%08x c: %d l: %d\n",
Lars Ellenberg004352f2010-10-05 20:13:58 +0200953 be32_to_cpu(h->h80.magic),
954 be16_to_cpu(h->h80.command),
955 be16_to_cpu(h->h80.length));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100956 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700957 }
Philipp Reisner257d0af2011-01-26 12:15:29 +0100958 return true;
959}
960
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100961static int drbd_recv_header(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisner257d0af2011-01-26 12:15:29 +0100962{
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100963 struct p_header *h = &tconn->data.rbuf.header;
Philipp Reisner257d0af2011-01-26 12:15:29 +0100964 int r;
965
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100966 r = drbd_recv(tconn, h, sizeof(*h));
Philipp Reisner257d0af2011-01-26 12:15:29 +0100967 if (unlikely(r != sizeof(*h))) {
968 if (!signal_pending(current))
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100969 conn_warn(tconn, "short read expecting header on sock: r=%d\n", r);
Philipp Reisner257d0af2011-01-26 12:15:29 +0100970 return false;
971 }
972
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100973 r = decode_header(tconn, h, pi);
974 tconn->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700975
Philipp Reisner257d0af2011-01-26 12:15:29 +0100976 return r;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700977}
978
Philipp Reisner2451fc32010-08-24 13:43:11 +0200979static void drbd_flush(struct drbd_conf *mdev)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700980{
981 int rv;
982
983 if (mdev->write_ordering >= WO_bdev_flush && get_ldev(mdev)) {
Dmitry Monakhovfbd9b092010-04-28 17:55:06 +0400984 rv = blkdev_issue_flush(mdev->ldev->backing_bdev, GFP_KERNEL,
Christoph Hellwigdd3932e2010-09-16 20:51:46 +0200985 NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700986 if (rv) {
987 dev_err(DEV, "local disk flush failed with status %d\n", rv);
988 /* would rather check on EOPNOTSUPP, but that is not reliable.
989 * don't try again for ANY return value != 0
990 * if (rv == -EOPNOTSUPP) */
991 drbd_bump_write_ordering(mdev, WO_drain_io);
992 }
993 put_ldev(mdev);
994 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700995}
996
997/**
998 * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it.
999 * @mdev: DRBD device.
1000 * @epoch: Epoch object.
1001 * @ev: Epoch event.
1002 */
1003static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev,
1004 struct drbd_epoch *epoch,
1005 enum epoch_event ev)
1006{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001007 int epoch_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001008 struct drbd_epoch *next_epoch;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001009 enum finish_epoch rv = FE_STILL_LIVE;
1010
1011 spin_lock(&mdev->epoch_lock);
1012 do {
1013 next_epoch = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001014
1015 epoch_size = atomic_read(&epoch->epoch_size);
1016
1017 switch (ev & ~EV_CLEANUP) {
1018 case EV_PUT:
1019 atomic_dec(&epoch->active);
1020 break;
1021 case EV_GOT_BARRIER_NR:
1022 set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001023 break;
1024 case EV_BECAME_LAST:
1025 /* nothing to do*/
1026 break;
1027 }
1028
Philipp Reisnerb411b362009-09-25 16:07:19 -07001029 if (epoch_size != 0 &&
1030 atomic_read(&epoch->active) == 0 &&
Philipp Reisner2451fc32010-08-24 13:43:11 +02001031 test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001032 if (!(ev & EV_CLEANUP)) {
1033 spin_unlock(&mdev->epoch_lock);
1034 drbd_send_b_ack(mdev, epoch->barrier_nr, epoch_size);
1035 spin_lock(&mdev->epoch_lock);
1036 }
1037 dec_unacked(mdev);
1038
1039 if (mdev->current_epoch != epoch) {
1040 next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
1041 list_del(&epoch->list);
1042 ev = EV_BECAME_LAST | (ev & EV_CLEANUP);
1043 mdev->epochs--;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001044 kfree(epoch);
1045
1046 if (rv == FE_STILL_LIVE)
1047 rv = FE_DESTROYED;
1048 } else {
1049 epoch->flags = 0;
1050 atomic_set(&epoch->epoch_size, 0);
Uwe Kleine-König698f9312010-07-02 20:41:51 +02001051 /* atomic_set(&epoch->active, 0); is already zero */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001052 if (rv == FE_STILL_LIVE)
1053 rv = FE_RECYCLED;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001054 wake_up(&mdev->ee_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001055 }
1056 }
1057
1058 if (!next_epoch)
1059 break;
1060
1061 epoch = next_epoch;
1062 } while (1);
1063
1064 spin_unlock(&mdev->epoch_lock);
1065
Philipp Reisnerb411b362009-09-25 16:07:19 -07001066 return rv;
1067}
1068
1069/**
1070 * drbd_bump_write_ordering() - Fall back to an other write ordering method
1071 * @mdev: DRBD device.
1072 * @wo: Write ordering method to try.
1073 */
1074void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo) __must_hold(local)
1075{
1076 enum write_ordering_e pwo;
1077 static char *write_ordering_str[] = {
1078 [WO_none] = "none",
1079 [WO_drain_io] = "drain",
1080 [WO_bdev_flush] = "flush",
Philipp Reisnerb411b362009-09-25 16:07:19 -07001081 };
1082
1083 pwo = mdev->write_ordering;
1084 wo = min(pwo, wo);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001085 if (wo == WO_bdev_flush && mdev->ldev->dc.no_disk_flush)
1086 wo = WO_drain_io;
1087 if (wo == WO_drain_io && mdev->ldev->dc.no_disk_drain)
1088 wo = WO_none;
1089 mdev->write_ordering = wo;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001090 if (pwo != mdev->write_ordering || wo == WO_bdev_flush)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001091 dev_info(DEV, "Method to ensure write ordering: %s\n", write_ordering_str[mdev->write_ordering]);
1092}
1093
1094/**
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001095 * drbd_submit_peer_request()
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001096 * @mdev: DRBD device.
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001097 * @peer_req: peer request
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001098 * @rw: flag field, see bio->bi_rw
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001099 *
1100 * May spread the pages to multiple bios,
1101 * depending on bio_add_page restrictions.
1102 *
1103 * Returns 0 if all bios have been submitted,
1104 * -ENOMEM if we could not allocate enough bios,
1105 * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a
1106 * single page to an empty bio (which should never happen and likely indicates
1107 * that the lower level IO stack is in some way broken). This has been observed
1108 * on certain Xen deployments.
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001109 */
1110/* TODO allocate from our own bio_set. */
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001111int drbd_submit_peer_request(struct drbd_conf *mdev,
1112 struct drbd_peer_request *peer_req,
1113 const unsigned rw, const int fault_type)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001114{
1115 struct bio *bios = NULL;
1116 struct bio *bio;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001117 struct page *page = peer_req->pages;
1118 sector_t sector = peer_req->i.sector;
1119 unsigned ds = peer_req->i.size;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001120 unsigned n_bios = 0;
1121 unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT;
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001122 int err = -ENOMEM;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001123
1124 /* In most cases, we will only need one bio. But in case the lower
1125 * level restrictions happen to be different at this offset on this
1126 * side than those of the sending peer, we may need to submit the
1127 * request in more than one bio. */
1128next_bio:
1129 bio = bio_alloc(GFP_NOIO, nr_pages);
1130 if (!bio) {
1131 dev_err(DEV, "submit_ee: Allocation of a bio failed\n");
1132 goto fail;
1133 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001134 /* > peer_req->i.sector, unless this is the first bio */
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001135 bio->bi_sector = sector;
1136 bio->bi_bdev = mdev->ldev->backing_bdev;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001137 bio->bi_rw = rw;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001138 bio->bi_private = peer_req;
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001139 bio->bi_end_io = drbd_peer_request_endio;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001140
1141 bio->bi_next = bios;
1142 bios = bio;
1143 ++n_bios;
1144
1145 page_chain_for_each(page) {
1146 unsigned len = min_t(unsigned, ds, PAGE_SIZE);
1147 if (!bio_add_page(bio, page, len, 0)) {
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001148 /* A single page must always be possible!
1149 * But in case it fails anyways,
1150 * we deal with it, and complain (below). */
1151 if (bio->bi_vcnt == 0) {
1152 dev_err(DEV,
1153 "bio_add_page failed for len=%u, "
1154 "bi_vcnt=0 (bi_sector=%llu)\n",
1155 len, (unsigned long long)bio->bi_sector);
1156 err = -ENOSPC;
1157 goto fail;
1158 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001159 goto next_bio;
1160 }
1161 ds -= len;
1162 sector += len >> 9;
1163 --nr_pages;
1164 }
1165 D_ASSERT(page == NULL);
1166 D_ASSERT(ds == 0);
1167
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001168 atomic_set(&peer_req->pending_bios, n_bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001169 do {
1170 bio = bios;
1171 bios = bios->bi_next;
1172 bio->bi_next = NULL;
1173
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001174 drbd_generic_make_request(mdev, fault_type, bio);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001175 } while (bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001176 return 0;
1177
1178fail:
1179 while (bios) {
1180 bio = bios;
1181 bios = bios->bi_next;
1182 bio_put(bio);
1183 }
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001184 return err;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001185}
1186
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001187static void drbd_remove_epoch_entry_interval(struct drbd_conf *mdev,
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001188 struct drbd_peer_request *peer_req)
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001189{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001190 struct drbd_interval *i = &peer_req->i;
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001191
1192 drbd_remove_interval(&mdev->write_requests, i);
1193 drbd_clear_interval(i);
1194
Andreas Gruenbacher6c852be2011-02-04 15:38:52 +01001195 /* Wake up any processes waiting for this peer request to complete. */
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001196 if (i->waiting)
1197 wake_up(&mdev->misc_wait);
1198}
1199
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001200static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packet cmd,
1201 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001202{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001203 int rv;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001204 struct p_barrier *p = &mdev->tconn->data.rbuf.barrier;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001205 struct drbd_epoch *epoch;
1206
Philipp Reisnerb411b362009-09-25 16:07:19 -07001207 inc_unacked(mdev);
1208
Philipp Reisnerb411b362009-09-25 16:07:19 -07001209 mdev->current_epoch->barrier_nr = p->barrier;
1210 rv = drbd_may_finish_epoch(mdev, mdev->current_epoch, EV_GOT_BARRIER_NR);
1211
1212 /* P_BARRIER_ACK may imply that the corresponding extent is dropped from
1213 * the activity log, which means it would not be resynced in case the
1214 * R_PRIMARY crashes now.
1215 * Therefore we must send the barrier_ack after the barrier request was
1216 * completed. */
1217 switch (mdev->write_ordering) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001218 case WO_none:
1219 if (rv == FE_RECYCLED)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001220 return true;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001221
1222 /* receiver context, in the writeout path of the other node.
1223 * avoid potential distributed deadlock */
1224 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1225 if (epoch)
1226 break;
1227 else
1228 dev_warn(DEV, "Allocation of an epoch failed, slowing down\n");
1229 /* Fall through */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001230
1231 case WO_bdev_flush:
1232 case WO_drain_io:
Philipp Reisnerb411b362009-09-25 16:07:19 -07001233 drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
Philipp Reisner2451fc32010-08-24 13:43:11 +02001234 drbd_flush(mdev);
1235
1236 if (atomic_read(&mdev->current_epoch->epoch_size)) {
1237 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1238 if (epoch)
1239 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001240 }
1241
Philipp Reisner2451fc32010-08-24 13:43:11 +02001242 epoch = mdev->current_epoch;
1243 wait_event(mdev->ee_wait, atomic_read(&epoch->epoch_size) == 0);
1244
1245 D_ASSERT(atomic_read(&epoch->active) == 0);
1246 D_ASSERT(epoch->flags == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001247
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001248 return true;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001249 default:
1250 dev_err(DEV, "Strangeness in mdev->write_ordering %d\n", mdev->write_ordering);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001251 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001252 }
1253
1254 epoch->flags = 0;
1255 atomic_set(&epoch->epoch_size, 0);
1256 atomic_set(&epoch->active, 0);
1257
1258 spin_lock(&mdev->epoch_lock);
1259 if (atomic_read(&mdev->current_epoch->epoch_size)) {
1260 list_add(&epoch->list, &mdev->current_epoch->list);
1261 mdev->current_epoch = epoch;
1262 mdev->epochs++;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001263 } else {
1264 /* The current_epoch got recycled while we allocated this one... */
1265 kfree(epoch);
1266 }
1267 spin_unlock(&mdev->epoch_lock);
1268
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001269 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001270}
1271
1272/* used from receive_RSDataReply (recv_resync_read)
1273 * and from receive_Data */
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +01001274static struct drbd_peer_request *
1275read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector,
1276 int data_size) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001277{
Lars Ellenberg66660322010-04-06 12:15:04 +02001278 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001279 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001280 struct page *page;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001281 int dgs, ds, rr;
Philipp Reisnera0638452011-01-19 14:31:32 +01001282 void *dig_in = mdev->tconn->int_dig_in;
1283 void *dig_vv = mdev->tconn->int_dig_vv;
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001284 unsigned long *data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001285
Philipp Reisnera0638452011-01-19 14:31:32 +01001286 dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_r_tfm) ?
1287 crypto_hash_digestsize(mdev->tconn->integrity_r_tfm) : 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001288
1289 if (dgs) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001290 rr = drbd_recv(mdev->tconn, dig_in, dgs);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001291 if (rr != dgs) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001292 if (!signal_pending(current))
1293 dev_warn(DEV,
1294 "short read receiving data digest: read %d expected %d\n",
1295 rr, dgs);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001296 return NULL;
1297 }
1298 }
1299
1300 data_size -= dgs;
1301
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01001302 if (!expect(data_size != 0))
1303 return NULL;
1304 if (!expect(IS_ALIGNED(data_size, 512)))
1305 return NULL;
1306 if (!expect(data_size <= DRBD_MAX_BIO_SIZE))
1307 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001308
Lars Ellenberg66660322010-04-06 12:15:04 +02001309 /* even though we trust out peer,
1310 * we sometimes have to double check. */
1311 if (sector + (data_size>>9) > capacity) {
Lars Ellenbergfdda6542011-01-24 15:11:01 +01001312 dev_err(DEV, "request from peer beyond end of local disk: "
1313 "capacity: %llus < sector: %llus + size: %u\n",
Lars Ellenberg66660322010-04-06 12:15:04 +02001314 (unsigned long long)capacity,
1315 (unsigned long long)sector, data_size);
1316 return NULL;
1317 }
1318
Philipp Reisnerb411b362009-09-25 16:07:19 -07001319 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
1320 * "criss-cross" setup, that might cause write-out on some other DRBD,
1321 * which in turn might block on the other node at this very place. */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001322 peer_req = drbd_alloc_ee(mdev, id, sector, data_size, GFP_NOIO);
1323 if (!peer_req)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001324 return NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001325
Philipp Reisnerb411b362009-09-25 16:07:19 -07001326 ds = data_size;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001327 page = peer_req->pages;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001328 page_chain_for_each(page) {
1329 unsigned len = min_t(int, ds, PAGE_SIZE);
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001330 data = kmap(page);
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001331 rr = drbd_recv(mdev->tconn, data, len);
Andreas Gruenbacher0cf9d272010-12-07 10:43:29 +01001332 if (drbd_insert_fault(mdev, DRBD_FAULT_RECEIVE)) {
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001333 dev_err(DEV, "Fault injection: Corrupting data on receive\n");
1334 data[0] = data[0] ^ (unsigned long)-1;
1335 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001336 kunmap(page);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001337 if (rr != len) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001338 drbd_free_ee(mdev, peer_req);
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001339 if (!signal_pending(current))
1340 dev_warn(DEV, "short read receiving data: read %d expected %d\n",
1341 rr, len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001342 return NULL;
1343 }
1344 ds -= rr;
1345 }
1346
1347 if (dgs) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001348 drbd_csum_ee(mdev, mdev->tconn->integrity_r_tfm, peer_req, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001349 if (memcmp(dig_in, dig_vv, dgs)) {
Lars Ellenberg470be442010-11-10 10:36:52 +01001350 dev_err(DEV, "Digest integrity check FAILED: %llus +%u\n",
1351 (unsigned long long)sector, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001352 drbd_bcast_ee(mdev, "digest failed",
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001353 dgs, dig_in, dig_vv, peer_req);
1354 drbd_free_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001355 return NULL;
1356 }
1357 }
1358 mdev->recv_cnt += data_size>>9;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001359 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001360}
1361
1362/* drbd_drain_block() just takes a data block
1363 * out of the socket input buffer, and discards it.
1364 */
1365static int drbd_drain_block(struct drbd_conf *mdev, int data_size)
1366{
1367 struct page *page;
1368 int rr, rv = 1;
1369 void *data;
1370
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001371 if (!data_size)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001372 return true;
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001373
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001374 page = drbd_pp_alloc(mdev, 1, 1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001375
1376 data = kmap(page);
1377 while (data_size) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001378 rr = drbd_recv(mdev->tconn, data, min_t(int, data_size, PAGE_SIZE));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001379 if (rr != min_t(int, data_size, PAGE_SIZE)) {
1380 rv = 0;
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001381 if (!signal_pending(current))
1382 dev_warn(DEV,
1383 "short read receiving data: read %d expected %d\n",
1384 rr, min_t(int, data_size, PAGE_SIZE));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001385 break;
1386 }
1387 data_size -= rr;
1388 }
1389 kunmap(page);
Lars Ellenberg435f0742010-09-06 12:30:25 +02001390 drbd_pp_free(mdev, page, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001391 return rv;
1392}
1393
1394static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req,
1395 sector_t sector, int data_size)
1396{
1397 struct bio_vec *bvec;
1398 struct bio *bio;
1399 int dgs, rr, i, expect;
Philipp Reisnera0638452011-01-19 14:31:32 +01001400 void *dig_in = mdev->tconn->int_dig_in;
1401 void *dig_vv = mdev->tconn->int_dig_vv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001402
Philipp Reisnera0638452011-01-19 14:31:32 +01001403 dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_r_tfm) ?
1404 crypto_hash_digestsize(mdev->tconn->integrity_r_tfm) : 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001405
1406 if (dgs) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001407 rr = drbd_recv(mdev->tconn, dig_in, dgs);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001408 if (rr != dgs) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001409 if (!signal_pending(current))
1410 dev_warn(DEV,
1411 "short read receiving data reply digest: read %d expected %d\n",
1412 rr, dgs);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001413 return 0;
1414 }
1415 }
1416
1417 data_size -= dgs;
1418
1419 /* optimistically update recv_cnt. if receiving fails below,
1420 * we disconnect anyways, and counters will be reset. */
1421 mdev->recv_cnt += data_size>>9;
1422
1423 bio = req->master_bio;
1424 D_ASSERT(sector == bio->bi_sector);
1425
1426 bio_for_each_segment(bvec, bio, i) {
1427 expect = min_t(int, data_size, bvec->bv_len);
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001428 rr = drbd_recv(mdev->tconn,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001429 kmap(bvec->bv_page)+bvec->bv_offset,
1430 expect);
1431 kunmap(bvec->bv_page);
1432 if (rr != expect) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001433 if (!signal_pending(current))
1434 dev_warn(DEV, "short read receiving data reply: "
1435 "read %d expected %d\n",
1436 rr, expect);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001437 return 0;
1438 }
1439 data_size -= rr;
1440 }
1441
1442 if (dgs) {
Philipp Reisnera0638452011-01-19 14:31:32 +01001443 drbd_csum_bio(mdev, mdev->tconn->integrity_r_tfm, bio, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001444 if (memcmp(dig_in, dig_vv, dgs)) {
1445 dev_err(DEV, "Digest integrity check FAILED. Broken NICs?\n");
1446 return 0;
1447 }
1448 }
1449
1450 D_ASSERT(data_size == 0);
1451 return 1;
1452}
1453
1454/* e_end_resync_block() is called via
1455 * drbd_process_done_ee() by asender only */
Philipp Reisner00d56942011-02-09 18:09:48 +01001456static int e_end_resync_block(struct drbd_work *w, int unused)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001457{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001458 struct drbd_peer_request *peer_req =
1459 container_of(w, struct drbd_peer_request, w);
Philipp Reisner00d56942011-02-09 18:09:48 +01001460 struct drbd_conf *mdev = w->mdev;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001461 sector_t sector = peer_req->i.sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001462 int ok;
1463
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001464 D_ASSERT(drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001465
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001466 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
1467 drbd_set_in_sync(mdev, sector, peer_req->i.size);
1468 ok = drbd_send_ack(mdev, P_RS_WRITE_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001469 } else {
1470 /* Record failure to sync */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001471 drbd_rs_failed_io(mdev, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001472
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001473 ok = drbd_send_ack(mdev, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001474 }
1475 dec_unacked(mdev);
1476
1477 return ok;
1478}
1479
1480static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_size) __releases(local)
1481{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001482 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001483
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001484 peer_req = read_in_block(mdev, ID_SYNCER, sector, data_size);
1485 if (!peer_req)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001486 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001487
1488 dec_rs_pending(mdev);
1489
Philipp Reisnerb411b362009-09-25 16:07:19 -07001490 inc_unacked(mdev);
1491 /* corresponding dec_unacked() in e_end_resync_block()
1492 * respective _drbd_clear_done_ee */
1493
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001494 peer_req->w.cb = e_end_resync_block;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001495
Philipp Reisner87eeee42011-01-19 14:16:30 +01001496 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001497 list_add(&peer_req->w.list, &mdev->sync_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001498 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001499
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001500 atomic_add(data_size >> 9, &mdev->rs_sect_ev);
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001501 if (drbd_submit_peer_request(mdev, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001502 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001503
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001504 /* don't care for the reason here */
1505 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01001506 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001507 list_del(&peer_req->w.list);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001508 spin_unlock_irq(&mdev->tconn->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02001509
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001510 drbd_free_ee(mdev, peer_req);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001511fail:
1512 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001513 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001514}
1515
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001516static struct drbd_request *
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001517find_request(struct drbd_conf *mdev, struct rb_root *root, u64 id,
1518 sector_t sector, bool missing_ok, const char *func)
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001519{
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001520 struct drbd_request *req;
1521
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001522 /* Request object according to our peer */
1523 req = (struct drbd_request *)(unsigned long)id;
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001524 if (drbd_contains_interval(root, sector, &req->i) && req->i.local)
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001525 return req;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001526 if (!missing_ok) {
1527 dev_err(DEV, "%s: failed to find request %lu, sector %llus\n", func,
1528 (unsigned long)id, (unsigned long long)sector);
1529 }
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001530 return NULL;
1531}
1532
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001533static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packet cmd,
1534 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001535{
1536 struct drbd_request *req;
1537 sector_t sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001538 int ok;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001539 struct p_data *p = &mdev->tconn->data.rbuf.data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001540
1541 sector = be64_to_cpu(p->sector);
1542
Philipp Reisner87eeee42011-01-19 14:16:30 +01001543 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001544 req = find_request(mdev, &mdev->read_requests, p->block_id, sector, false, __func__);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001545 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001546 if (unlikely(!req))
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001547 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001548
Bart Van Assche24c48302011-05-21 18:32:29 +02001549 /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid
Philipp Reisnerb411b362009-09-25 16:07:19 -07001550 * special casing it there for the various failure cases.
1551 * still no race with drbd_fail_pending_reads */
1552 ok = recv_dless_read(mdev, req, sector, data_size);
1553
1554 if (ok)
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001555 req_mod(req, DATA_RECEIVED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001556 /* else: nothing. handled from drbd_disconnect...
1557 * I don't think we may complete this just yet
1558 * in case we are "on-disconnect: freeze" */
1559
1560 return ok;
1561}
1562
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001563static int receive_RSDataReply(struct drbd_conf *mdev, enum drbd_packet cmd,
1564 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001565{
1566 sector_t sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001567 int ok;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001568 struct p_data *p = &mdev->tconn->data.rbuf.data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001569
1570 sector = be64_to_cpu(p->sector);
1571 D_ASSERT(p->block_id == ID_SYNCER);
1572
1573 if (get_ldev(mdev)) {
1574 /* data is submitted to disk within recv_resync_read.
1575 * corresponding put_ldev done below on error,
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001576 * or in drbd_peer_request_endio. */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001577 ok = recv_resync_read(mdev, sector, data_size);
1578 } else {
1579 if (__ratelimit(&drbd_ratelimit_state))
1580 dev_err(DEV, "Can not write resync data to local disk.\n");
1581
1582 ok = drbd_drain_block(mdev, data_size);
1583
Lars Ellenberg2b2bf212010-10-06 11:46:55 +02001584 drbd_send_ack_dp(mdev, P_NEG_ACK, p, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001585 }
1586
Philipp Reisner778f2712010-07-06 11:14:00 +02001587 atomic_add(data_size >> 9, &mdev->rs_sect_in);
1588
Philipp Reisnerb411b362009-09-25 16:07:19 -07001589 return ok;
1590}
1591
1592/* e_end_block() is called via drbd_process_done_ee().
1593 * this means this function only runs in the asender thread
1594 */
Philipp Reisner00d56942011-02-09 18:09:48 +01001595static int e_end_block(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001596{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001597 struct drbd_peer_request *peer_req =
1598 container_of(w, struct drbd_peer_request, w);
Philipp Reisner00d56942011-02-09 18:09:48 +01001599 struct drbd_conf *mdev = w->mdev;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001600 sector_t sector = peer_req->i.sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001601 int ok = 1, pcmd;
1602
Philipp Reisner89e58e72011-01-19 13:12:45 +01001603 if (mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001604 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001605 pcmd = (mdev->state.conn >= C_SYNC_SOURCE &&
1606 mdev->state.conn <= C_PAUSED_SYNC_T &&
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001607 peer_req->flags & EE_MAY_SET_IN_SYNC) ?
Philipp Reisnerb411b362009-09-25 16:07:19 -07001608 P_RS_WRITE_ACK : P_WRITE_ACK;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001609 ok &= drbd_send_ack(mdev, pcmd, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001610 if (pcmd == P_RS_WRITE_ACK)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001611 drbd_set_in_sync(mdev, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001612 } else {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001613 ok = drbd_send_ack(mdev, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001614 /* we expect it to be marked out of sync anyways...
1615 * maybe assert this? */
1616 }
1617 dec_unacked(mdev);
1618 }
1619 /* we delete from the conflict detection hash _after_ we sent out the
1620 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */
Philipp Reisner89e58e72011-01-19 13:12:45 +01001621 if (mdev->tconn->net_conf->two_primaries) {
Philipp Reisner87eeee42011-01-19 14:16:30 +01001622 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001623 D_ASSERT(!drbd_interval_empty(&peer_req->i));
1624 drbd_remove_epoch_entry_interval(mdev, peer_req);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001625 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +01001626 } else
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001627 D_ASSERT(drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001628
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001629 drbd_may_finish_epoch(mdev, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001630
1631 return ok;
1632}
1633
Philipp Reisner00d56942011-02-09 18:09:48 +01001634static int e_send_discard_ack(struct drbd_work *w, int unused)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001635{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001636 struct drbd_peer_request *peer_req =
1637 container_of(w, struct drbd_peer_request, w);
Philipp Reisner00d56942011-02-09 18:09:48 +01001638 struct drbd_conf *mdev = w->mdev;
Andreas Gruenbacher206d3582011-02-26 23:19:15 +01001639 int ok;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001640
Philipp Reisner89e58e72011-01-19 13:12:45 +01001641 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001642 ok = drbd_send_ack(mdev, P_DISCARD_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001643 dec_unacked(mdev);
1644
1645 return ok;
1646}
1647
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001648static bool seq_greater(u32 a, u32 b)
1649{
1650 /*
1651 * We assume 32-bit wrap-around here.
1652 * For 24-bit wrap-around, we would have to shift:
1653 * a <<= 8; b <<= 8;
1654 */
1655 return (s32)a - (s32)b > 0;
1656}
1657
1658static u32 seq_max(u32 a, u32 b)
1659{
1660 return seq_greater(a, b) ? a : b;
1661}
1662
Andreas Gruenbacher43ae0772011-02-03 18:42:08 +01001663static void update_peer_seq(struct drbd_conf *mdev, unsigned int peer_seq)
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001664{
Andreas Gruenbacher43ae0772011-02-03 18:42:08 +01001665 unsigned int old_peer_seq;
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001666
1667 spin_lock(&mdev->peer_seq_lock);
Andreas Gruenbacher43ae0772011-02-03 18:42:08 +01001668 old_peer_seq = mdev->peer_seq;
1669 mdev->peer_seq = seq_max(mdev->peer_seq, peer_seq);
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001670 spin_unlock(&mdev->peer_seq_lock);
Andreas Gruenbacher43ae0772011-02-03 18:42:08 +01001671 if (old_peer_seq != peer_seq)
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001672 wake_up(&mdev->seq_wait);
1673}
1674
Philipp Reisnerb411b362009-09-25 16:07:19 -07001675/* Called from receive_Data.
1676 * Synchronize packets on sock with packets on msock.
1677 *
1678 * This is here so even when a P_DATA packet traveling via sock overtook an Ack
1679 * packet traveling on msock, they are still processed in the order they have
1680 * been sent.
1681 *
1682 * Note: we don't care for Ack packets overtaking P_DATA packets.
1683 *
1684 * In case packet_seq is larger than mdev->peer_seq number, there are
1685 * outstanding packets on the msock. We wait for them to arrive.
1686 * In case we are the logically next packet, we update mdev->peer_seq
1687 * ourselves. Correctly handles 32bit wrap around.
1688 *
1689 * Assume we have a 10 GBit connection, that is about 1<<30 byte per second,
1690 * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds
1691 * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have
1692 * 1<<9 == 512 seconds aka ages for the 32bit wrap around...
1693 *
1694 * returns 0 if we may process the packet,
1695 * -ERESTARTSYS if we were interrupted (by disconnect signal). */
1696static int drbd_wait_peer_seq(struct drbd_conf *mdev, const u32 packet_seq)
1697{
1698 DEFINE_WAIT(wait);
1699 unsigned int p_seq;
1700 long timeout;
1701 int ret = 0;
1702 spin_lock(&mdev->peer_seq_lock);
1703 for (;;) {
1704 prepare_to_wait(&mdev->seq_wait, &wait, TASK_INTERRUPTIBLE);
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001705 if (!seq_greater(packet_seq, mdev->peer_seq + 1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07001706 break;
1707 if (signal_pending(current)) {
1708 ret = -ERESTARTSYS;
1709 break;
1710 }
1711 p_seq = mdev->peer_seq;
1712 spin_unlock(&mdev->peer_seq_lock);
Andreas Gruenbacher71b1c1e2011-03-01 15:40:43 +01001713 timeout = mdev->tconn->net_conf->ping_timeo*HZ/10;
1714 timeout = schedule_timeout(timeout);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001715 spin_lock(&mdev->peer_seq_lock);
1716 if (timeout == 0 && p_seq == mdev->peer_seq) {
1717 ret = -ETIMEDOUT;
Andreas Gruenbacher71b1c1e2011-03-01 15:40:43 +01001718 dev_err(DEV, "Timed out waiting for missing ack packets; disconnecting\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001719 break;
1720 }
1721 }
1722 finish_wait(&mdev->seq_wait, &wait);
1723 if (mdev->peer_seq+1 == packet_seq)
1724 mdev->peer_seq++;
1725 spin_unlock(&mdev->peer_seq_lock);
1726 return ret;
1727}
1728
Lars Ellenberg688593c2010-11-17 22:25:03 +01001729/* see also bio_flags_to_wire()
1730 * DRBD_REQ_*, because we need to semantically map the flags to data packet
1731 * flags and back. We may replicate to other kernel versions. */
1732static unsigned long wire_flags_to_bio(struct drbd_conf *mdev, u32 dpf)
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02001733{
Lars Ellenberg688593c2010-11-17 22:25:03 +01001734 return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
1735 (dpf & DP_FUA ? REQ_FUA : 0) |
1736 (dpf & DP_FLUSH ? REQ_FLUSH : 0) |
1737 (dpf & DP_DISCARD ? REQ_DISCARD : 0);
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02001738}
1739
Philipp Reisnerb411b362009-09-25 16:07:19 -07001740/* mirrored write */
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001741static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd,
1742 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001743{
1744 sector_t sector;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001745 struct drbd_peer_request *peer_req;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001746 struct p_data *p = &mdev->tconn->data.rbuf.data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001747 int rw = WRITE;
1748 u32 dp_flags;
1749
Philipp Reisnerb411b362009-09-25 16:07:19 -07001750 if (!get_ldev(mdev)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001751 spin_lock(&mdev->peer_seq_lock);
1752 if (mdev->peer_seq+1 == be32_to_cpu(p->seq_num))
1753 mdev->peer_seq++;
1754 spin_unlock(&mdev->peer_seq_lock);
1755
Lars Ellenberg2b2bf212010-10-06 11:46:55 +02001756 drbd_send_ack_dp(mdev, P_NEG_ACK, p, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001757 atomic_inc(&mdev->current_epoch->epoch_size);
1758 return drbd_drain_block(mdev, data_size);
1759 }
1760
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001761 /*
1762 * Corresponding put_ldev done either below (on various errors), or in
1763 * drbd_peer_request_endio, if we successfully submit the data at the
1764 * end of this function.
1765 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001766
1767 sector = be64_to_cpu(p->sector);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001768 peer_req = read_in_block(mdev, p->block_id, sector, data_size);
1769 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001770 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001771 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001772 }
1773
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001774 peer_req->w.cb = e_end_block;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001775
Lars Ellenberg688593c2010-11-17 22:25:03 +01001776 dp_flags = be32_to_cpu(p->dp_flags);
1777 rw |= wire_flags_to_bio(mdev, dp_flags);
1778
1779 if (dp_flags & DP_MAY_SET_IN_SYNC)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001780 peer_req->flags |= EE_MAY_SET_IN_SYNC;
Lars Ellenberg688593c2010-11-17 22:25:03 +01001781
Philipp Reisnerb411b362009-09-25 16:07:19 -07001782 spin_lock(&mdev->epoch_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001783 peer_req->epoch = mdev->current_epoch;
1784 atomic_inc(&peer_req->epoch->epoch_size);
1785 atomic_inc(&peer_req->epoch->active);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001786 spin_unlock(&mdev->epoch_lock);
1787
Philipp Reisnerb411b362009-09-25 16:07:19 -07001788 /* I'm the receiver, I do hold a net_cnt reference. */
Philipp Reisner89e58e72011-01-19 13:12:45 +01001789 if (!mdev->tconn->net_conf->two_primaries) {
Philipp Reisner87eeee42011-01-19 14:16:30 +01001790 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001791 } else {
1792 /* don't get the req_lock yet,
1793 * we may sleep in drbd_wait_peer_seq */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001794 const int size = peer_req->i.size;
Philipp Reisner25703f82011-02-07 14:35:25 +01001795 const int discard = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001796 DEFINE_WAIT(wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001797 int first;
1798
Philipp Reisner89e58e72011-01-19 13:12:45 +01001799 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001800
1801 /* conflict detection and handling:
1802 * 1. wait on the sequence number,
1803 * in case this data packet overtook ACK packets.
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001804 * 2. check for conflicting write requests.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001805 *
1806 * Note: for two_primaries, we are protocol C,
1807 * so there cannot be any request that is DONE
1808 * but still on the transfer log.
1809 *
Philipp Reisnerb411b362009-09-25 16:07:19 -07001810 * if no conflicting request is found:
1811 * submit.
1812 *
1813 * if any conflicting request is found
1814 * that has not yet been acked,
1815 * AND I have the "discard concurrent writes" flag:
1816 * queue (via done_ee) the P_DISCARD_ACK; OUT.
1817 *
1818 * if any conflicting request is found:
1819 * block the receiver, waiting on misc_wait
1820 * until no more conflicting requests are there,
1821 * or we get interrupted (disconnect).
1822 *
1823 * we do not just write after local io completion of those
1824 * requests, but only after req is done completely, i.e.
1825 * we wait for the P_DISCARD_ACK to arrive!
1826 *
1827 * then proceed normally, i.e. submit.
1828 */
1829 if (drbd_wait_peer_seq(mdev, be32_to_cpu(p->seq_num)))
1830 goto out_interrupted;
1831
Philipp Reisner87eeee42011-01-19 14:16:30 +01001832 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001833
Andreas Gruenbacher206d3582011-02-26 23:19:15 +01001834 /*
1835 * Inserting the peer request into the write_requests tree will
1836 * prevent new conflicting local requests from being added.
1837 */
1838 drbd_insert_interval(&mdev->write_requests, &peer_req->i);
1839
Philipp Reisnerb411b362009-09-25 16:07:19 -07001840 first = 1;
1841 for (;;) {
Andreas Gruenbacherde696712011-01-20 15:00:24 +01001842 struct drbd_interval *i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001843 int have_unacked = 0;
1844 int have_conflict = 0;
1845 prepare_to_wait(&mdev->misc_wait, &wait,
1846 TASK_INTERRUPTIBLE);
Andreas Gruenbacherde696712011-01-20 15:00:24 +01001847
Andreas Gruenbacher206d3582011-02-26 23:19:15 +01001848 drbd_for_each_overlap(i, &mdev->write_requests, sector, size) {
1849 struct drbd_request *req2;
1850
1851 if (i == &peer_req->i || !i->local)
1852 continue;
1853
Andreas Gruenbacherde696712011-01-20 15:00:24 +01001854 /* only ALERT on first iteration,
1855 * we may be woken up early... */
1856 if (first)
Andreas Gruenbacher206d3582011-02-26 23:19:15 +01001857 dev_alert(DEV, "%s[%u] Concurrent local write detected!"
Andreas Gruenbacherde696712011-01-20 15:00:24 +01001858 " new: %llus +%u; pending: %llus +%u\n",
1859 current->comm, current->pid,
1860 (unsigned long long)sector, size,
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001861 (unsigned long long)i->sector, i->size);
1862
Andreas Gruenbacher206d3582011-02-26 23:19:15 +01001863 req2 = container_of(i, struct drbd_request, i);
1864 if (req2->rq_state & RQ_NET_PENDING)
1865 ++have_unacked;
Andreas Gruenbacherde696712011-01-20 15:00:24 +01001866 ++have_conflict;
Andreas Gruenbacher206d3582011-02-26 23:19:15 +01001867 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001868 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001869 if (!have_conflict)
1870 break;
1871
1872 /* Discard Ack only for the _first_ iteration */
1873 if (first && discard && have_unacked) {
1874 dev_alert(DEV, "Concurrent write! [DISCARD BY FLAG] sec=%llus\n",
1875 (unsigned long long)sector);
Andreas Gruenbacher206d3582011-02-26 23:19:15 +01001876 drbd_remove_epoch_entry_interval(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001877 inc_unacked(mdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001878 peer_req->w.cb = e_send_discard_ack;
1879 list_add_tail(&peer_req->w.list, &mdev->done_ee);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001880
Philipp Reisner87eeee42011-01-19 14:16:30 +01001881 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001882
1883 /* we could probably send that P_DISCARD_ACK ourselves,
1884 * but I don't like the receiver using the msock */
1885
1886 put_ldev(mdev);
Philipp Reisner0625ac12011-02-07 14:49:19 +01001887 wake_asender(mdev->tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001888 finish_wait(&mdev->misc_wait, &wait);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001889 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001890 }
1891
1892 if (signal_pending(current)) {
Andreas Gruenbacher206d3582011-02-26 23:19:15 +01001893 drbd_remove_epoch_entry_interval(mdev, peer_req);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001894 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001895 finish_wait(&mdev->misc_wait, &wait);
1896 goto out_interrupted;
1897 }
1898
Andreas Gruenbachera500c2e2011-01-27 14:12:23 +01001899 /* Indicate to wake up mdev->misc_wait upon completion. */
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001900 i->waiting = true;
Andreas Gruenbachera500c2e2011-01-27 14:12:23 +01001901
Philipp Reisner87eeee42011-01-19 14:16:30 +01001902 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001903 if (first) {
1904 first = 0;
1905 dev_alert(DEV, "Concurrent write! [W AFTERWARDS] "
1906 "sec=%llus\n", (unsigned long long)sector);
1907 } else if (discard) {
1908 /* we had none on the first iteration.
1909 * there must be none now. */
1910 D_ASSERT(have_unacked == 0);
1911 }
Andreas Gruenbacher206d3582011-02-26 23:19:15 +01001912 /* FIXME: Introduce a timeout here after which we disconnect. */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001913 schedule();
Philipp Reisner87eeee42011-01-19 14:16:30 +01001914 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001915 }
1916 finish_wait(&mdev->misc_wait, &wait);
1917 }
1918
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001919 list_add(&peer_req->w.list, &mdev->active_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001920 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001921
Philipp Reisner89e58e72011-01-19 13:12:45 +01001922 switch (mdev->tconn->net_conf->wire_protocol) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001923 case DRBD_PROT_C:
1924 inc_unacked(mdev);
1925 /* corresponding dec_unacked() in e_end_block()
1926 * respective _drbd_clear_done_ee */
1927 break;
1928 case DRBD_PROT_B:
1929 /* I really don't like it that the receiver thread
1930 * sends on the msock, but anyways */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001931 drbd_send_ack(mdev, P_RECV_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001932 break;
1933 case DRBD_PROT_A:
1934 /* nothing to do */
1935 break;
1936 }
1937
Lars Ellenberg6719fb02010-10-18 23:04:07 +02001938 if (mdev->state.pdsk < D_INCONSISTENT) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001939 /* In case we have the only disk of the cluster, */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001940 drbd_set_out_of_sync(mdev, peer_req->i.sector, peer_req->i.size);
1941 peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
1942 peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
1943 drbd_al_begin_io(mdev, peer_req->i.sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001944 }
1945
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001946 if (drbd_submit_peer_request(mdev, peer_req, rw, DRBD_FAULT_DT_WR) == 0)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001947 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001948
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001949 /* don't care for the reason here */
1950 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01001951 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001952 list_del(&peer_req->w.list);
1953 drbd_remove_epoch_entry_interval(mdev, peer_req);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001954 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001955 if (peer_req->flags & EE_CALL_AL_COMPLETE_IO)
1956 drbd_al_complete_io(mdev, peer_req->i.sector);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02001957
Philipp Reisnerb411b362009-09-25 16:07:19 -07001958out_interrupted:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001959 drbd_may_finish_epoch(mdev, peer_req->epoch, EV_PUT + EV_CLEANUP);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001960 put_ldev(mdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001961 drbd_free_ee(mdev, peer_req);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001962 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001963}
1964
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001965/* We may throttle resync, if the lower device seems to be busy,
1966 * and current sync rate is above c_min_rate.
1967 *
1968 * To decide whether or not the lower device is busy, we use a scheme similar
1969 * to MD RAID is_mddev_idle(): if the partition stats reveal "significant"
1970 * (more than 64 sectors) of activity we cannot account for with our own resync
1971 * activity, it obviously is "busy".
1972 *
1973 * The current sync rate used here uses only the most recent two step marks,
1974 * to have a short time average so we can react faster.
1975 */
Philipp Reisnere3555d82010-11-07 15:56:29 +01001976int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001977{
1978 struct gendisk *disk = mdev->ldev->backing_bdev->bd_contains->bd_disk;
1979 unsigned long db, dt, dbdt;
Philipp Reisnere3555d82010-11-07 15:56:29 +01001980 struct lc_element *tmp;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001981 int curr_events;
1982 int throttle = 0;
1983
1984 /* feature disabled? */
1985 if (mdev->sync_conf.c_min_rate == 0)
1986 return 0;
1987
Philipp Reisnere3555d82010-11-07 15:56:29 +01001988 spin_lock_irq(&mdev->al_lock);
1989 tmp = lc_find(mdev->resync, BM_SECT_TO_EXT(sector));
1990 if (tmp) {
1991 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
1992 if (test_bit(BME_PRIORITY, &bm_ext->flags)) {
1993 spin_unlock_irq(&mdev->al_lock);
1994 return 0;
1995 }
1996 /* Do not slow down if app IO is already waiting for this extent */
1997 }
1998 spin_unlock_irq(&mdev->al_lock);
1999
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002000 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
2001 (int)part_stat_read(&disk->part0, sectors[1]) -
2002 atomic_read(&mdev->rs_sect_ev);
Philipp Reisnere3555d82010-11-07 15:56:29 +01002003
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002004 if (!mdev->rs_last_events || curr_events - mdev->rs_last_events > 64) {
2005 unsigned long rs_left;
2006 int i;
2007
2008 mdev->rs_last_events = curr_events;
2009
2010 /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
2011 * approx. */
Lars Ellenberg2649f082010-11-05 10:05:47 +01002012 i = (mdev->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
2013
2014 if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T)
2015 rs_left = mdev->ov_left;
2016 else
2017 rs_left = drbd_bm_total_weight(mdev) - mdev->rs_failed;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002018
2019 dt = ((long)jiffies - (long)mdev->rs_mark_time[i]) / HZ;
2020 if (!dt)
2021 dt++;
2022 db = mdev->rs_mark_left[i] - rs_left;
2023 dbdt = Bit2KB(db/dt);
2024
2025 if (dbdt > mdev->sync_conf.c_min_rate)
2026 throttle = 1;
2027 }
2028 return throttle;
2029}
2030
2031
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01002032static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packet cmd,
2033 unsigned int digest_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002034{
2035 sector_t sector;
2036 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002037 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002038 struct digest_info *di = NULL;
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002039 int size, verb;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002040 unsigned int fault_type;
Philipp Reisnere42325a2011-01-19 13:55:45 +01002041 struct p_block_req *p = &mdev->tconn->data.rbuf.block_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002042
2043 sector = be64_to_cpu(p->sector);
2044 size = be32_to_cpu(p->blksize);
2045
Andreas Gruenbacherc670a392011-02-21 12:41:39 +01002046 if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002047 dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2048 (unsigned long long)sector, size);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002049 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002050 }
2051 if (sector + (size>>9) > capacity) {
2052 dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2053 (unsigned long long)sector, size);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002054 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002055 }
2056
2057 if (!get_ldev_if_state(mdev, D_UP_TO_DATE)) {
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002058 verb = 1;
2059 switch (cmd) {
2060 case P_DATA_REQUEST:
2061 drbd_send_ack_rp(mdev, P_NEG_DREPLY, p);
2062 break;
2063 case P_RS_DATA_REQUEST:
2064 case P_CSUM_RS_REQUEST:
2065 case P_OV_REQUEST:
2066 drbd_send_ack_rp(mdev, P_NEG_RS_DREPLY , p);
2067 break;
2068 case P_OV_REPLY:
2069 verb = 0;
2070 dec_rs_pending(mdev);
2071 drbd_send_ack_ex(mdev, P_OV_RESULT, sector, size, ID_IN_SYNC);
2072 break;
2073 default:
2074 dev_err(DEV, "unexpected command (%s) in receive_DataRequest\n",
2075 cmdname(cmd));
2076 }
2077 if (verb && __ratelimit(&drbd_ratelimit_state))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002078 dev_err(DEV, "Can not satisfy peer's read request, "
2079 "no local data.\n");
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002080
Lars Ellenberga821cc42010-09-06 12:31:37 +02002081 /* drain possibly payload */
2082 return drbd_drain_block(mdev, digest_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002083 }
2084
2085 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
2086 * "criss-cross" setup, that might cause write-out on some other DRBD,
2087 * which in turn might block on the other node at this very place. */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002088 peer_req = drbd_alloc_ee(mdev, p->block_id, sector, size, GFP_NOIO);
2089 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002090 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002091 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002092 }
2093
Philipp Reisner02918be2010-08-20 14:35:10 +02002094 switch (cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002095 case P_DATA_REQUEST:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002096 peer_req->w.cb = w_e_end_data_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002097 fault_type = DRBD_FAULT_DT_RD;
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002098 /* application IO, don't drbd_rs_begin_io */
2099 goto submit;
2100
Philipp Reisnerb411b362009-09-25 16:07:19 -07002101 case P_RS_DATA_REQUEST:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002102 peer_req->w.cb = w_e_end_rsdata_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002103 fault_type = DRBD_FAULT_RS_RD;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002104 /* used in the sector offset progress display */
2105 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002106 break;
2107
2108 case P_OV_REPLY:
2109 case P_CSUM_RS_REQUEST:
2110 fault_type = DRBD_FAULT_RS_RD;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002111 di = kmalloc(sizeof(*di) + digest_size, GFP_NOIO);
2112 if (!di)
2113 goto out_free_e;
2114
2115 di->digest_size = digest_size;
2116 di->digest = (((char *)di)+sizeof(struct digest_info));
2117
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002118 peer_req->digest = di;
2119 peer_req->flags |= EE_HAS_DIGEST;
Lars Ellenbergc36c3ce2010-08-11 20:42:55 +02002120
Philipp Reisnerde0ff332011-02-07 16:56:20 +01002121 if (drbd_recv(mdev->tconn, di->digest, digest_size) != digest_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002122 goto out_free_e;
2123
Philipp Reisner02918be2010-08-20 14:35:10 +02002124 if (cmd == P_CSUM_RS_REQUEST) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002125 D_ASSERT(mdev->tconn->agreed_pro_version >= 89);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002126 peer_req->w.cb = w_e_end_csum_rs_req;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002127 /* used in the sector offset progress display */
2128 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
Philipp Reisner02918be2010-08-20 14:35:10 +02002129 } else if (cmd == P_OV_REPLY) {
Lars Ellenberg2649f082010-11-05 10:05:47 +01002130 /* track progress, we may need to throttle */
2131 atomic_add(size >> 9, &mdev->rs_sect_in);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002132 peer_req->w.cb = w_e_end_ov_reply;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002133 dec_rs_pending(mdev);
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002134 /* drbd_rs_begin_io done when we sent this request,
2135 * but accounting still needs to be done. */
2136 goto submit_for_resync;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002137 }
2138 break;
2139
2140 case P_OV_REQUEST:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002141 if (mdev->ov_start_sector == ~(sector_t)0 &&
Philipp Reisner31890f42011-01-19 14:12:51 +01002142 mdev->tconn->agreed_pro_version >= 90) {
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002143 unsigned long now = jiffies;
2144 int i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002145 mdev->ov_start_sector = sector;
2146 mdev->ov_position = sector;
Lars Ellenberg30b743a2010-11-05 09:39:06 +01002147 mdev->ov_left = drbd_bm_bits(mdev) - BM_SECT_TO_BIT(sector);
2148 mdev->rs_total = mdev->ov_left;
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002149 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
2150 mdev->rs_mark_left[i] = mdev->ov_left;
2151 mdev->rs_mark_time[i] = now;
2152 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002153 dev_info(DEV, "Online Verify start sector: %llu\n",
2154 (unsigned long long)sector);
2155 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002156 peer_req->w.cb = w_e_end_ov_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002157 fault_type = DRBD_FAULT_RS_RD;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002158 break;
2159
Philipp Reisnerb411b362009-09-25 16:07:19 -07002160 default:
2161 dev_err(DEV, "unexpected command (%s) in receive_DataRequest\n",
Philipp Reisner02918be2010-08-20 14:35:10 +02002162 cmdname(cmd));
Philipp Reisnerb411b362009-09-25 16:07:19 -07002163 fault_type = DRBD_FAULT_MAX;
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002164 goto out_free_e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002165 }
2166
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002167 /* Throttle, drbd_rs_begin_io and submit should become asynchronous
2168 * wrt the receiver, but it is not as straightforward as it may seem.
2169 * Various places in the resync start and stop logic assume resync
2170 * requests are processed in order, requeuing this on the worker thread
2171 * introduces a bunch of new code for synchronization between threads.
2172 *
2173 * Unlimited throttling before drbd_rs_begin_io may stall the resync
2174 * "forever", throttling after drbd_rs_begin_io will lock that extent
2175 * for application writes for the same time. For now, just throttle
2176 * here, where the rest of the code expects the receiver to sleep for
2177 * a while, anyways.
2178 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002179
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002180 /* Throttle before drbd_rs_begin_io, as that locks out application IO;
2181 * this defers syncer requests for some time, before letting at least
2182 * on request through. The resync controller on the receiving side
2183 * will adapt to the incoming rate accordingly.
2184 *
2185 * We cannot throttle here if remote is Primary/SyncTarget:
2186 * we would also throttle its application reads.
2187 * In that case, throttling is done on the SyncTarget only.
2188 */
Philipp Reisnere3555d82010-11-07 15:56:29 +01002189 if (mdev->state.peer != R_PRIMARY && drbd_rs_should_slow_down(mdev, sector))
2190 schedule_timeout_uninterruptible(HZ/10);
2191 if (drbd_rs_begin_io(mdev, sector))
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002192 goto out_free_e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002193
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002194submit_for_resync:
2195 atomic_add(size >> 9, &mdev->rs_sect_ev);
2196
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002197submit:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002198 inc_unacked(mdev);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002199 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002200 list_add_tail(&peer_req->w.list, &mdev->read_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002201 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002202
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01002203 if (drbd_submit_peer_request(mdev, peer_req, READ, fault_type) == 0)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002204 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002205
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002206 /* don't care for the reason here */
2207 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01002208 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002209 list_del(&peer_req->w.list);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002210 spin_unlock_irq(&mdev->tconn->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002211 /* no drbd_rs_complete_io(), we are dropping the connection anyways */
2212
Philipp Reisnerb411b362009-09-25 16:07:19 -07002213out_free_e:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002214 put_ldev(mdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002215 drbd_free_ee(mdev, peer_req);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002216 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002217}
2218
2219static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local)
2220{
2221 int self, peer, rv = -100;
2222 unsigned long ch_self, ch_peer;
2223
2224 self = mdev->ldev->md.uuid[UI_BITMAP] & 1;
2225 peer = mdev->p_uuid[UI_BITMAP] & 1;
2226
2227 ch_peer = mdev->p_uuid[UI_SIZE];
2228 ch_self = mdev->comm_bm_set;
2229
Philipp Reisner89e58e72011-01-19 13:12:45 +01002230 switch (mdev->tconn->net_conf->after_sb_0p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002231 case ASB_CONSENSUS:
2232 case ASB_DISCARD_SECONDARY:
2233 case ASB_CALL_HELPER:
2234 dev_err(DEV, "Configuration error.\n");
2235 break;
2236 case ASB_DISCONNECT:
2237 break;
2238 case ASB_DISCARD_YOUNGER_PRI:
2239 if (self == 0 && peer == 1) {
2240 rv = -1;
2241 break;
2242 }
2243 if (self == 1 && peer == 0) {
2244 rv = 1;
2245 break;
2246 }
2247 /* Else fall through to one of the other strategies... */
2248 case ASB_DISCARD_OLDER_PRI:
2249 if (self == 0 && peer == 1) {
2250 rv = 1;
2251 break;
2252 }
2253 if (self == 1 && peer == 0) {
2254 rv = -1;
2255 break;
2256 }
2257 /* Else fall through to one of the other strategies... */
Lars Ellenbergad19bf62009-10-14 09:36:49 +02002258 dev_warn(DEV, "Discard younger/older primary did not find a decision\n"
Philipp Reisnerb411b362009-09-25 16:07:19 -07002259 "Using discard-least-changes instead\n");
2260 case ASB_DISCARD_ZERO_CHG:
2261 if (ch_peer == 0 && ch_self == 0) {
Philipp Reisner25703f82011-02-07 14:35:25 +01002262 rv = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002263 ? -1 : 1;
2264 break;
2265 } else {
2266 if (ch_peer == 0) { rv = 1; break; }
2267 if (ch_self == 0) { rv = -1; break; }
2268 }
Philipp Reisner89e58e72011-01-19 13:12:45 +01002269 if (mdev->tconn->net_conf->after_sb_0p == ASB_DISCARD_ZERO_CHG)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002270 break;
2271 case ASB_DISCARD_LEAST_CHG:
2272 if (ch_self < ch_peer)
2273 rv = -1;
2274 else if (ch_self > ch_peer)
2275 rv = 1;
2276 else /* ( ch_self == ch_peer ) */
2277 /* Well, then use something else. */
Philipp Reisner25703f82011-02-07 14:35:25 +01002278 rv = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002279 ? -1 : 1;
2280 break;
2281 case ASB_DISCARD_LOCAL:
2282 rv = -1;
2283 break;
2284 case ASB_DISCARD_REMOTE:
2285 rv = 1;
2286 }
2287
2288 return rv;
2289}
2290
2291static int drbd_asb_recover_1p(struct drbd_conf *mdev) __must_hold(local)
2292{
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002293 int hg, rv = -100;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002294
Philipp Reisner89e58e72011-01-19 13:12:45 +01002295 switch (mdev->tconn->net_conf->after_sb_1p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002296 case ASB_DISCARD_YOUNGER_PRI:
2297 case ASB_DISCARD_OLDER_PRI:
2298 case ASB_DISCARD_LEAST_CHG:
2299 case ASB_DISCARD_LOCAL:
2300 case ASB_DISCARD_REMOTE:
2301 dev_err(DEV, "Configuration error.\n");
2302 break;
2303 case ASB_DISCONNECT:
2304 break;
2305 case ASB_CONSENSUS:
2306 hg = drbd_asb_recover_0p(mdev);
2307 if (hg == -1 && mdev->state.role == R_SECONDARY)
2308 rv = hg;
2309 if (hg == 1 && mdev->state.role == R_PRIMARY)
2310 rv = hg;
2311 break;
2312 case ASB_VIOLENTLY:
2313 rv = drbd_asb_recover_0p(mdev);
2314 break;
2315 case ASB_DISCARD_SECONDARY:
2316 return mdev->state.role == R_PRIMARY ? 1 : -1;
2317 case ASB_CALL_HELPER:
2318 hg = drbd_asb_recover_0p(mdev);
2319 if (hg == -1 && mdev->state.role == R_PRIMARY) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002320 enum drbd_state_rv rv2;
2321
2322 drbd_set_role(mdev, R_SECONDARY, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002323 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2324 * we might be here in C_WF_REPORT_PARAMS which is transient.
2325 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002326 rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
2327 if (rv2 != SS_SUCCESS) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002328 drbd_khelper(mdev, "pri-lost-after-sb");
2329 } else {
2330 dev_warn(DEV, "Successfully gave up primary role.\n");
2331 rv = hg;
2332 }
2333 } else
2334 rv = hg;
2335 }
2336
2337 return rv;
2338}
2339
2340static int drbd_asb_recover_2p(struct drbd_conf *mdev) __must_hold(local)
2341{
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002342 int hg, rv = -100;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002343
Philipp Reisner89e58e72011-01-19 13:12:45 +01002344 switch (mdev->tconn->net_conf->after_sb_2p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002345 case ASB_DISCARD_YOUNGER_PRI:
2346 case ASB_DISCARD_OLDER_PRI:
2347 case ASB_DISCARD_LEAST_CHG:
2348 case ASB_DISCARD_LOCAL:
2349 case ASB_DISCARD_REMOTE:
2350 case ASB_CONSENSUS:
2351 case ASB_DISCARD_SECONDARY:
2352 dev_err(DEV, "Configuration error.\n");
2353 break;
2354 case ASB_VIOLENTLY:
2355 rv = drbd_asb_recover_0p(mdev);
2356 break;
2357 case ASB_DISCONNECT:
2358 break;
2359 case ASB_CALL_HELPER:
2360 hg = drbd_asb_recover_0p(mdev);
2361 if (hg == -1) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002362 enum drbd_state_rv rv2;
2363
Philipp Reisnerb411b362009-09-25 16:07:19 -07002364 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2365 * we might be here in C_WF_REPORT_PARAMS which is transient.
2366 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002367 rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
2368 if (rv2 != SS_SUCCESS) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002369 drbd_khelper(mdev, "pri-lost-after-sb");
2370 } else {
2371 dev_warn(DEV, "Successfully gave up primary role.\n");
2372 rv = hg;
2373 }
2374 } else
2375 rv = hg;
2376 }
2377
2378 return rv;
2379}
2380
2381static void drbd_uuid_dump(struct drbd_conf *mdev, char *text, u64 *uuid,
2382 u64 bits, u64 flags)
2383{
2384 if (!uuid) {
2385 dev_info(DEV, "%s uuid info vanished while I was looking!\n", text);
2386 return;
2387 }
2388 dev_info(DEV, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n",
2389 text,
2390 (unsigned long long)uuid[UI_CURRENT],
2391 (unsigned long long)uuid[UI_BITMAP],
2392 (unsigned long long)uuid[UI_HISTORY_START],
2393 (unsigned long long)uuid[UI_HISTORY_END],
2394 (unsigned long long)bits,
2395 (unsigned long long)flags);
2396}
2397
2398/*
2399 100 after split brain try auto recover
2400 2 C_SYNC_SOURCE set BitMap
2401 1 C_SYNC_SOURCE use BitMap
2402 0 no Sync
2403 -1 C_SYNC_TARGET use BitMap
2404 -2 C_SYNC_TARGET set BitMap
2405 -100 after split brain, disconnect
2406-1000 unrelated data
Philipp Reisner4a23f262011-01-11 17:42:17 +01002407-1091 requires proto 91
2408-1096 requires proto 96
Philipp Reisnerb411b362009-09-25 16:07:19 -07002409 */
2410static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(local)
2411{
2412 u64 self, peer;
2413 int i, j;
2414
2415 self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2416 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
2417
2418 *rule_nr = 10;
2419 if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED)
2420 return 0;
2421
2422 *rule_nr = 20;
2423 if ((self == UUID_JUST_CREATED || self == (u64)0) &&
2424 peer != UUID_JUST_CREATED)
2425 return -2;
2426
2427 *rule_nr = 30;
2428 if (self != UUID_JUST_CREATED &&
2429 (peer == UUID_JUST_CREATED || peer == (u64)0))
2430 return 2;
2431
2432 if (self == peer) {
2433 int rct, dc; /* roles at crash time */
2434
2435 if (mdev->p_uuid[UI_BITMAP] == (u64)0 && mdev->ldev->md.uuid[UI_BITMAP] != (u64)0) {
2436
Philipp Reisner31890f42011-01-19 14:12:51 +01002437 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002438 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002439
2440 if ((mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
2441 (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
2442 dev_info(DEV, "was SyncSource, missed the resync finished event, corrected myself:\n");
2443 drbd_uuid_set_bm(mdev, 0UL);
2444
2445 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
2446 mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
2447 *rule_nr = 34;
2448 } else {
2449 dev_info(DEV, "was SyncSource (peer failed to write sync_uuid)\n");
2450 *rule_nr = 36;
2451 }
2452
2453 return 1;
2454 }
2455
2456 if (mdev->ldev->md.uuid[UI_BITMAP] == (u64)0 && mdev->p_uuid[UI_BITMAP] != (u64)0) {
2457
Philipp Reisner31890f42011-01-19 14:12:51 +01002458 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002459 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002460
2461 if ((mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_BITMAP] & ~((u64)1)) &&
2462 (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
2463 dev_info(DEV, "was SyncTarget, peer missed the resync finished event, corrected peer:\n");
2464
2465 mdev->p_uuid[UI_HISTORY_START + 1] = mdev->p_uuid[UI_HISTORY_START];
2466 mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_BITMAP];
2467 mdev->p_uuid[UI_BITMAP] = 0UL;
2468
2469 drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2470 *rule_nr = 35;
2471 } else {
2472 dev_info(DEV, "was SyncTarget (failed to write sync_uuid)\n");
2473 *rule_nr = 37;
2474 }
2475
2476 return -1;
2477 }
2478
2479 /* Common power [off|failure] */
2480 rct = (test_bit(CRASHED_PRIMARY, &mdev->flags) ? 1 : 0) +
2481 (mdev->p_uuid[UI_FLAGS] & 2);
2482 /* lowest bit is set when we were primary,
2483 * next bit (weight 2) is set when peer was primary */
2484 *rule_nr = 40;
2485
2486 switch (rct) {
2487 case 0: /* !self_pri && !peer_pri */ return 0;
2488 case 1: /* self_pri && !peer_pri */ return 1;
2489 case 2: /* !self_pri && peer_pri */ return -1;
2490 case 3: /* self_pri && peer_pri */
Philipp Reisner25703f82011-02-07 14:35:25 +01002491 dc = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002492 return dc ? -1 : 1;
2493 }
2494 }
2495
2496 *rule_nr = 50;
2497 peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1);
2498 if (self == peer)
2499 return -1;
2500
2501 *rule_nr = 51;
2502 peer = mdev->p_uuid[UI_HISTORY_START] & ~((u64)1);
2503 if (self == peer) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002504 if (mdev->tconn->agreed_pro_version < 96 ?
Philipp Reisner4a23f262011-01-11 17:42:17 +01002505 (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
2506 (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
2507 peer + UUID_NEW_BM_OFFSET == (mdev->p_uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002508 /* The last P_SYNC_UUID did not get though. Undo the last start of
2509 resync as sync source modifications of the peer's UUIDs. */
2510
Philipp Reisner31890f42011-01-19 14:12:51 +01002511 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002512 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002513
2514 mdev->p_uuid[UI_BITMAP] = mdev->p_uuid[UI_HISTORY_START];
2515 mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_HISTORY_START + 1];
Philipp Reisner4a23f262011-01-11 17:42:17 +01002516
2517 dev_info(DEV, "Did not got last syncUUID packet, corrected:\n");
2518 drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2519
Philipp Reisnerb411b362009-09-25 16:07:19 -07002520 return -1;
2521 }
2522 }
2523
2524 *rule_nr = 60;
2525 self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2526 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2527 peer = mdev->p_uuid[i] & ~((u64)1);
2528 if (self == peer)
2529 return -2;
2530 }
2531
2532 *rule_nr = 70;
2533 self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2534 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
2535 if (self == peer)
2536 return 1;
2537
2538 *rule_nr = 71;
2539 self = mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
2540 if (self == peer) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002541 if (mdev->tconn->agreed_pro_version < 96 ?
Philipp Reisner4a23f262011-01-11 17:42:17 +01002542 (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
2543 (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
2544 self + UUID_NEW_BM_OFFSET == (mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002545 /* The last P_SYNC_UUID did not get though. Undo the last start of
2546 resync as sync source modifications of our UUIDs. */
2547
Philipp Reisner31890f42011-01-19 14:12:51 +01002548 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002549 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002550
2551 _drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_HISTORY_START]);
2552 _drbd_uuid_set(mdev, UI_HISTORY_START, mdev->ldev->md.uuid[UI_HISTORY_START + 1]);
2553
Philipp Reisner4a23f262011-01-11 17:42:17 +01002554 dev_info(DEV, "Last syncUUID did not get through, corrected:\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002555 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
2556 mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
2557
2558 return 1;
2559 }
2560 }
2561
2562
2563 *rule_nr = 80;
Philipp Reisnerd8c2a362009-11-18 15:52:51 +01002564 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002565 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2566 self = mdev->ldev->md.uuid[i] & ~((u64)1);
2567 if (self == peer)
2568 return 2;
2569 }
2570
2571 *rule_nr = 90;
2572 self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2573 peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1);
2574 if (self == peer && self != ((u64)0))
2575 return 100;
2576
2577 *rule_nr = 100;
2578 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2579 self = mdev->ldev->md.uuid[i] & ~((u64)1);
2580 for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) {
2581 peer = mdev->p_uuid[j] & ~((u64)1);
2582 if (self == peer)
2583 return -100;
2584 }
2585 }
2586
2587 return -1000;
2588}
2589
2590/* drbd_sync_handshake() returns the new conn state on success, or
2591 CONN_MASK (-1) on failure.
2592 */
2593static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_role peer_role,
2594 enum drbd_disk_state peer_disk) __must_hold(local)
2595{
2596 int hg, rule_nr;
2597 enum drbd_conns rv = C_MASK;
2598 enum drbd_disk_state mydisk;
2599
2600 mydisk = mdev->state.disk;
2601 if (mydisk == D_NEGOTIATING)
2602 mydisk = mdev->new_state_tmp.disk;
2603
2604 dev_info(DEV, "drbd_sync_handshake:\n");
2605 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid, mdev->comm_bm_set, 0);
2606 drbd_uuid_dump(mdev, "peer", mdev->p_uuid,
2607 mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2608
2609 hg = drbd_uuid_compare(mdev, &rule_nr);
2610
2611 dev_info(DEV, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
2612
2613 if (hg == -1000) {
2614 dev_alert(DEV, "Unrelated data, aborting!\n");
2615 return C_MASK;
2616 }
Philipp Reisner4a23f262011-01-11 17:42:17 +01002617 if (hg < -1000) {
2618 dev_alert(DEV, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002619 return C_MASK;
2620 }
2621
2622 if ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) ||
2623 (peer_disk == D_INCONSISTENT && mydisk > D_INCONSISTENT)) {
2624 int f = (hg == -100) || abs(hg) == 2;
2625 hg = mydisk > D_INCONSISTENT ? 1 : -1;
2626 if (f)
2627 hg = hg*2;
2628 dev_info(DEV, "Becoming sync %s due to disk states.\n",
2629 hg > 0 ? "source" : "target");
2630 }
2631
Adam Gandelman3a11a482010-04-08 16:48:23 -07002632 if (abs(hg) == 100)
2633 drbd_khelper(mdev, "initial-split-brain");
2634
Philipp Reisner89e58e72011-01-19 13:12:45 +01002635 if (hg == 100 || (hg == -100 && mdev->tconn->net_conf->always_asbp)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002636 int pcount = (mdev->state.role == R_PRIMARY)
2637 + (peer_role == R_PRIMARY);
2638 int forced = (hg == -100);
2639
2640 switch (pcount) {
2641 case 0:
2642 hg = drbd_asb_recover_0p(mdev);
2643 break;
2644 case 1:
2645 hg = drbd_asb_recover_1p(mdev);
2646 break;
2647 case 2:
2648 hg = drbd_asb_recover_2p(mdev);
2649 break;
2650 }
2651 if (abs(hg) < 100) {
2652 dev_warn(DEV, "Split-Brain detected, %d primaries, "
2653 "automatically solved. Sync from %s node\n",
2654 pcount, (hg < 0) ? "peer" : "this");
2655 if (forced) {
2656 dev_warn(DEV, "Doing a full sync, since"
2657 " UUIDs where ambiguous.\n");
2658 hg = hg*2;
2659 }
2660 }
2661 }
2662
2663 if (hg == -100) {
Philipp Reisner89e58e72011-01-19 13:12:45 +01002664 if (mdev->tconn->net_conf->want_lose && !(mdev->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002665 hg = -1;
Philipp Reisner89e58e72011-01-19 13:12:45 +01002666 if (!mdev->tconn->net_conf->want_lose && (mdev->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002667 hg = 1;
2668
2669 if (abs(hg) < 100)
2670 dev_warn(DEV, "Split-Brain detected, manually solved. "
2671 "Sync from %s node\n",
2672 (hg < 0) ? "peer" : "this");
2673 }
2674
2675 if (hg == -100) {
Lars Ellenberg580b9762010-02-26 23:15:23 +01002676 /* FIXME this log message is not correct if we end up here
2677 * after an attempted attach on a diskless node.
2678 * We just refuse to attach -- well, we drop the "connection"
2679 * to that disk, in a way... */
Adam Gandelman3a11a482010-04-08 16:48:23 -07002680 dev_alert(DEV, "Split-Brain detected but unresolved, dropping connection!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002681 drbd_khelper(mdev, "split-brain");
2682 return C_MASK;
2683 }
2684
2685 if (hg > 0 && mydisk <= D_INCONSISTENT) {
2686 dev_err(DEV, "I shall become SyncSource, but I am inconsistent!\n");
2687 return C_MASK;
2688 }
2689
2690 if (hg < 0 && /* by intention we do not use mydisk here. */
2691 mdev->state.role == R_PRIMARY && mdev->state.disk >= D_CONSISTENT) {
Philipp Reisner89e58e72011-01-19 13:12:45 +01002692 switch (mdev->tconn->net_conf->rr_conflict) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002693 case ASB_CALL_HELPER:
2694 drbd_khelper(mdev, "pri-lost");
2695 /* fall through */
2696 case ASB_DISCONNECT:
2697 dev_err(DEV, "I shall become SyncTarget, but I am primary!\n");
2698 return C_MASK;
2699 case ASB_VIOLENTLY:
2700 dev_warn(DEV, "Becoming SyncTarget, violating the stable-data"
2701 "assumption\n");
2702 }
2703 }
2704
Philipp Reisner89e58e72011-01-19 13:12:45 +01002705 if (mdev->tconn->net_conf->dry_run || test_bit(CONN_DRY_RUN, &mdev->flags)) {
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002706 if (hg == 0)
2707 dev_info(DEV, "dry-run connect: No resync, would become Connected immediately.\n");
2708 else
2709 dev_info(DEV, "dry-run connect: Would become %s, doing a %s resync.",
2710 drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET),
2711 abs(hg) >= 2 ? "full" : "bit-map based");
2712 return C_MASK;
2713 }
2714
Philipp Reisnerb411b362009-09-25 16:07:19 -07002715 if (abs(hg) >= 2) {
2716 dev_info(DEV, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01002717 if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
2718 BM_LOCKED_SET_ALLOWED))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002719 return C_MASK;
2720 }
2721
2722 if (hg > 0) { /* become sync source. */
2723 rv = C_WF_BITMAP_S;
2724 } else if (hg < 0) { /* become sync target */
2725 rv = C_WF_BITMAP_T;
2726 } else {
2727 rv = C_CONNECTED;
2728 if (drbd_bm_total_weight(mdev)) {
2729 dev_info(DEV, "No resync, but %lu bits in bitmap!\n",
2730 drbd_bm_total_weight(mdev));
2731 }
2732 }
2733
2734 return rv;
2735}
2736
2737/* returns 1 if invalid */
2738static int cmp_after_sb(enum drbd_after_sb_p peer, enum drbd_after_sb_p self)
2739{
2740 /* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */
2741 if ((peer == ASB_DISCARD_REMOTE && self == ASB_DISCARD_LOCAL) ||
2742 (self == ASB_DISCARD_REMOTE && peer == ASB_DISCARD_LOCAL))
2743 return 0;
2744
2745 /* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */
2746 if (peer == ASB_DISCARD_REMOTE || peer == ASB_DISCARD_LOCAL ||
2747 self == ASB_DISCARD_REMOTE || self == ASB_DISCARD_LOCAL)
2748 return 1;
2749
2750 /* everything else is valid if they are equal on both sides. */
2751 if (peer == self)
2752 return 0;
2753
2754 /* everything es is invalid. */
2755 return 1;
2756}
2757
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01002758static int receive_protocol(struct drbd_conf *mdev, enum drbd_packet cmd,
2759 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002760{
Philipp Reisnere42325a2011-01-19 13:55:45 +01002761 struct p_protocol *p = &mdev->tconn->data.rbuf.protocol;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002762 int p_proto, p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002763 int p_want_lose, p_two_primaries, cf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002764 char p_integrity_alg[SHARED_SECRET_MAX] = "";
2765
Philipp Reisnerb411b362009-09-25 16:07:19 -07002766 p_proto = be32_to_cpu(p->protocol);
2767 p_after_sb_0p = be32_to_cpu(p->after_sb_0p);
2768 p_after_sb_1p = be32_to_cpu(p->after_sb_1p);
2769 p_after_sb_2p = be32_to_cpu(p->after_sb_2p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002770 p_two_primaries = be32_to_cpu(p->two_primaries);
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002771 cf = be32_to_cpu(p->conn_flags);
2772 p_want_lose = cf & CF_WANT_LOSE;
2773
2774 clear_bit(CONN_DRY_RUN, &mdev->flags);
2775
2776 if (cf & CF_DRY_RUN)
2777 set_bit(CONN_DRY_RUN, &mdev->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002778
Philipp Reisner89e58e72011-01-19 13:12:45 +01002779 if (p_proto != mdev->tconn->net_conf->wire_protocol) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002780 dev_err(DEV, "incompatible communication protocols\n");
2781 goto disconnect;
2782 }
2783
Philipp Reisner89e58e72011-01-19 13:12:45 +01002784 if (cmp_after_sb(p_after_sb_0p, mdev->tconn->net_conf->after_sb_0p)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002785 dev_err(DEV, "incompatible after-sb-0pri settings\n");
2786 goto disconnect;
2787 }
2788
Philipp Reisner89e58e72011-01-19 13:12:45 +01002789 if (cmp_after_sb(p_after_sb_1p, mdev->tconn->net_conf->after_sb_1p)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002790 dev_err(DEV, "incompatible after-sb-1pri settings\n");
2791 goto disconnect;
2792 }
2793
Philipp Reisner89e58e72011-01-19 13:12:45 +01002794 if (cmp_after_sb(p_after_sb_2p, mdev->tconn->net_conf->after_sb_2p)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002795 dev_err(DEV, "incompatible after-sb-2pri settings\n");
2796 goto disconnect;
2797 }
2798
Philipp Reisner89e58e72011-01-19 13:12:45 +01002799 if (p_want_lose && mdev->tconn->net_conf->want_lose) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002800 dev_err(DEV, "both sides have the 'want_lose' flag set\n");
2801 goto disconnect;
2802 }
2803
Philipp Reisner89e58e72011-01-19 13:12:45 +01002804 if (p_two_primaries != mdev->tconn->net_conf->two_primaries) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002805 dev_err(DEV, "incompatible setting of the two-primaries options\n");
2806 goto disconnect;
2807 }
2808
Philipp Reisner31890f42011-01-19 14:12:51 +01002809 if (mdev->tconn->agreed_pro_version >= 87) {
Philipp Reisner89e58e72011-01-19 13:12:45 +01002810 unsigned char *my_alg = mdev->tconn->net_conf->integrity_alg;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002811
Philipp Reisnerde0ff332011-02-07 16:56:20 +01002812 if (drbd_recv(mdev->tconn, p_integrity_alg, data_size) != data_size)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002813 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002814
2815 p_integrity_alg[SHARED_SECRET_MAX-1] = 0;
2816 if (strcmp(p_integrity_alg, my_alg)) {
2817 dev_err(DEV, "incompatible setting of the data-integrity-alg\n");
2818 goto disconnect;
2819 }
2820 dev_info(DEV, "data-integrity-alg: %s\n",
2821 my_alg[0] ? my_alg : (unsigned char *)"<not-used>");
2822 }
2823
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002824 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002825
2826disconnect:
2827 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002828 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002829}
2830
2831/* helper function
2832 * input: alg name, feature name
2833 * return: NULL (alg name was "")
2834 * ERR_PTR(error) if something goes wrong
2835 * or the crypto hash ptr, if it worked out ok. */
2836struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_conf *mdev,
2837 const char *alg, const char *name)
2838{
2839 struct crypto_hash *tfm;
2840
2841 if (!alg[0])
2842 return NULL;
2843
2844 tfm = crypto_alloc_hash(alg, 0, CRYPTO_ALG_ASYNC);
2845 if (IS_ERR(tfm)) {
2846 dev_err(DEV, "Can not allocate \"%s\" as %s (reason: %ld)\n",
2847 alg, name, PTR_ERR(tfm));
2848 return tfm;
2849 }
2850 if (!drbd_crypto_is_hash(crypto_hash_tfm(tfm))) {
2851 crypto_free_hash(tfm);
2852 dev_err(DEV, "\"%s\" is not a digest (%s)\n", alg, name);
2853 return ERR_PTR(-EINVAL);
2854 }
2855 return tfm;
2856}
2857
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01002858static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packet cmd,
2859 unsigned int packet_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002860{
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002861 int ok = true;
Philipp Reisnere42325a2011-01-19 13:55:45 +01002862 struct p_rs_param_95 *p = &mdev->tconn->data.rbuf.rs_param_95;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002863 unsigned int header_size, data_size, exp_max_sz;
2864 struct crypto_hash *verify_tfm = NULL;
2865 struct crypto_hash *csums_tfm = NULL;
Philipp Reisner31890f42011-01-19 14:12:51 +01002866 const int apv = mdev->tconn->agreed_pro_version;
Philipp Reisner778f2712010-07-06 11:14:00 +02002867 int *rs_plan_s = NULL;
2868 int fifo_size = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002869
2870 exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param)
2871 : apv == 88 ? sizeof(struct p_rs_param)
2872 + SHARED_SECRET_MAX
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002873 : apv <= 94 ? sizeof(struct p_rs_param_89)
2874 : /* apv >= 95 */ sizeof(struct p_rs_param_95);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002875
Philipp Reisner02918be2010-08-20 14:35:10 +02002876 if (packet_size > exp_max_sz) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002877 dev_err(DEV, "SyncParam packet too long: received %u, expected <= %u bytes\n",
Philipp Reisner02918be2010-08-20 14:35:10 +02002878 packet_size, exp_max_sz);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002879 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002880 }
2881
2882 if (apv <= 88) {
Philipp Reisner257d0af2011-01-26 12:15:29 +01002883 header_size = sizeof(struct p_rs_param) - sizeof(struct p_header);
Philipp Reisner02918be2010-08-20 14:35:10 +02002884 data_size = packet_size - header_size;
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002885 } else if (apv <= 94) {
Philipp Reisner257d0af2011-01-26 12:15:29 +01002886 header_size = sizeof(struct p_rs_param_89) - sizeof(struct p_header);
Philipp Reisner02918be2010-08-20 14:35:10 +02002887 data_size = packet_size - header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002888 D_ASSERT(data_size == 0);
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002889 } else {
Philipp Reisner257d0af2011-01-26 12:15:29 +01002890 header_size = sizeof(struct p_rs_param_95) - sizeof(struct p_header);
Philipp Reisner02918be2010-08-20 14:35:10 +02002891 data_size = packet_size - header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002892 D_ASSERT(data_size == 0);
2893 }
2894
2895 /* initialize verify_alg and csums_alg */
2896 memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
2897
Philipp Reisnerde0ff332011-02-07 16:56:20 +01002898 if (drbd_recv(mdev->tconn, &p->head.payload, header_size) != header_size)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002899 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002900
2901 mdev->sync_conf.rate = be32_to_cpu(p->rate);
2902
2903 if (apv >= 88) {
2904 if (apv == 88) {
2905 if (data_size > SHARED_SECRET_MAX) {
2906 dev_err(DEV, "verify-alg too long, "
2907 "peer wants %u, accepting only %u byte\n",
2908 data_size, SHARED_SECRET_MAX);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002909 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002910 }
2911
Philipp Reisnerde0ff332011-02-07 16:56:20 +01002912 if (drbd_recv(mdev->tconn, p->verify_alg, data_size) != data_size)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002913 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002914
2915 /* we expect NUL terminated string */
2916 /* but just in case someone tries to be evil */
2917 D_ASSERT(p->verify_alg[data_size-1] == 0);
2918 p->verify_alg[data_size-1] = 0;
2919
2920 } else /* apv >= 89 */ {
2921 /* we still expect NUL terminated strings */
2922 /* but just in case someone tries to be evil */
2923 D_ASSERT(p->verify_alg[SHARED_SECRET_MAX-1] == 0);
2924 D_ASSERT(p->csums_alg[SHARED_SECRET_MAX-1] == 0);
2925 p->verify_alg[SHARED_SECRET_MAX-1] = 0;
2926 p->csums_alg[SHARED_SECRET_MAX-1] = 0;
2927 }
2928
2929 if (strcmp(mdev->sync_conf.verify_alg, p->verify_alg)) {
2930 if (mdev->state.conn == C_WF_REPORT_PARAMS) {
2931 dev_err(DEV, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
2932 mdev->sync_conf.verify_alg, p->verify_alg);
2933 goto disconnect;
2934 }
2935 verify_tfm = drbd_crypto_alloc_digest_safe(mdev,
2936 p->verify_alg, "verify-alg");
2937 if (IS_ERR(verify_tfm)) {
2938 verify_tfm = NULL;
2939 goto disconnect;
2940 }
2941 }
2942
2943 if (apv >= 89 && strcmp(mdev->sync_conf.csums_alg, p->csums_alg)) {
2944 if (mdev->state.conn == C_WF_REPORT_PARAMS) {
2945 dev_err(DEV, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
2946 mdev->sync_conf.csums_alg, p->csums_alg);
2947 goto disconnect;
2948 }
2949 csums_tfm = drbd_crypto_alloc_digest_safe(mdev,
2950 p->csums_alg, "csums-alg");
2951 if (IS_ERR(csums_tfm)) {
2952 csums_tfm = NULL;
2953 goto disconnect;
2954 }
2955 }
2956
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002957 if (apv > 94) {
2958 mdev->sync_conf.rate = be32_to_cpu(p->rate);
2959 mdev->sync_conf.c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
2960 mdev->sync_conf.c_delay_target = be32_to_cpu(p->c_delay_target);
2961 mdev->sync_conf.c_fill_target = be32_to_cpu(p->c_fill_target);
2962 mdev->sync_conf.c_max_rate = be32_to_cpu(p->c_max_rate);
Philipp Reisner778f2712010-07-06 11:14:00 +02002963
2964 fifo_size = (mdev->sync_conf.c_plan_ahead * 10 * SLEEP_TIME) / HZ;
2965 if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) {
2966 rs_plan_s = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL);
2967 if (!rs_plan_s) {
2968 dev_err(DEV, "kmalloc of fifo_buffer failed");
2969 goto disconnect;
2970 }
2971 }
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002972 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002973
2974 spin_lock(&mdev->peer_seq_lock);
2975 /* lock against drbd_nl_syncer_conf() */
2976 if (verify_tfm) {
2977 strcpy(mdev->sync_conf.verify_alg, p->verify_alg);
2978 mdev->sync_conf.verify_alg_len = strlen(p->verify_alg) + 1;
2979 crypto_free_hash(mdev->verify_tfm);
2980 mdev->verify_tfm = verify_tfm;
2981 dev_info(DEV, "using verify-alg: \"%s\"\n", p->verify_alg);
2982 }
2983 if (csums_tfm) {
2984 strcpy(mdev->sync_conf.csums_alg, p->csums_alg);
2985 mdev->sync_conf.csums_alg_len = strlen(p->csums_alg) + 1;
2986 crypto_free_hash(mdev->csums_tfm);
2987 mdev->csums_tfm = csums_tfm;
2988 dev_info(DEV, "using csums-alg: \"%s\"\n", p->csums_alg);
2989 }
Philipp Reisner778f2712010-07-06 11:14:00 +02002990 if (fifo_size != mdev->rs_plan_s.size) {
2991 kfree(mdev->rs_plan_s.values);
2992 mdev->rs_plan_s.values = rs_plan_s;
2993 mdev->rs_plan_s.size = fifo_size;
2994 mdev->rs_planed = 0;
2995 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002996 spin_unlock(&mdev->peer_seq_lock);
2997 }
2998
2999 return ok;
3000disconnect:
3001 /* just for completeness: actually not needed,
3002 * as this is not reached if csums_tfm was ok. */
3003 crypto_free_hash(csums_tfm);
3004 /* but free the verify_tfm again, if csums_tfm did not work out */
3005 crypto_free_hash(verify_tfm);
3006 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003007 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003008}
3009
Philipp Reisnerb411b362009-09-25 16:07:19 -07003010/* warn if the arguments differ by more than 12.5% */
3011static void warn_if_differ_considerably(struct drbd_conf *mdev,
3012 const char *s, sector_t a, sector_t b)
3013{
3014 sector_t d;
3015 if (a == 0 || b == 0)
3016 return;
3017 d = (a > b) ? (a - b) : (b - a);
3018 if (d > (a>>3) || d > (b>>3))
3019 dev_warn(DEV, "Considerable difference in %s: %llus vs. %llus\n", s,
3020 (unsigned long long)a, (unsigned long long)b);
3021}
3022
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003023static int receive_sizes(struct drbd_conf *mdev, enum drbd_packet cmd,
3024 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003025{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003026 struct p_sizes *p = &mdev->tconn->data.rbuf.sizes;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003027 enum determine_dev_size dd = unchanged;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003028 sector_t p_size, p_usize, my_usize;
3029 int ldsc = 0; /* local disk size changed */
Philipp Reisnere89b5912010-03-24 17:11:33 +01003030 enum dds_flags ddsf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003031
Philipp Reisnerb411b362009-09-25 16:07:19 -07003032 p_size = be64_to_cpu(p->d_size);
3033 p_usize = be64_to_cpu(p->u_size);
3034
3035 if (p_size == 0 && mdev->state.disk == D_DISKLESS) {
3036 dev_err(DEV, "some backing storage is needed\n");
3037 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003038 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003039 }
3040
3041 /* just store the peer's disk size for now.
3042 * we still need to figure out whether we accept that. */
3043 mdev->p_size = p_size;
3044
Philipp Reisnerb411b362009-09-25 16:07:19 -07003045 if (get_ldev(mdev)) {
3046 warn_if_differ_considerably(mdev, "lower level device sizes",
3047 p_size, drbd_get_max_capacity(mdev->ldev));
3048 warn_if_differ_considerably(mdev, "user requested size",
3049 p_usize, mdev->ldev->dc.disk_size);
3050
3051 /* if this is the first connect, or an otherwise expected
3052 * param exchange, choose the minimum */
3053 if (mdev->state.conn == C_WF_REPORT_PARAMS)
3054 p_usize = min_not_zero((sector_t)mdev->ldev->dc.disk_size,
3055 p_usize);
3056
3057 my_usize = mdev->ldev->dc.disk_size;
3058
3059 if (mdev->ldev->dc.disk_size != p_usize) {
3060 mdev->ldev->dc.disk_size = p_usize;
3061 dev_info(DEV, "Peer sets u_size to %lu sectors\n",
3062 (unsigned long)mdev->ldev->dc.disk_size);
3063 }
3064
3065 /* Never shrink a device with usable data during connect.
3066 But allow online shrinking if we are connected. */
Philipp Reisnera393db62009-12-22 13:35:52 +01003067 if (drbd_new_dev_size(mdev, mdev->ldev, 0) <
Philipp Reisnerb411b362009-09-25 16:07:19 -07003068 drbd_get_capacity(mdev->this_bdev) &&
3069 mdev->state.disk >= D_OUTDATED &&
3070 mdev->state.conn < C_CONNECTED) {
3071 dev_err(DEV, "The peer's disk size is too small!\n");
3072 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
3073 mdev->ldev->dc.disk_size = my_usize;
3074 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003075 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003076 }
3077 put_ldev(mdev);
3078 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003079
Philipp Reisnere89b5912010-03-24 17:11:33 +01003080 ddsf = be16_to_cpu(p->dds_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003081 if (get_ldev(mdev)) {
Bart Van Assche24c48302011-05-21 18:32:29 +02003082 dd = drbd_determine_dev_size(mdev, ddsf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003083 put_ldev(mdev);
3084 if (dd == dev_size_error)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003085 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003086 drbd_md_sync(mdev);
3087 } else {
3088 /* I am diskless, need to accept the peer's size. */
3089 drbd_set_my_capacity(mdev, p_size);
3090 }
3091
Philipp Reisner99432fc2011-05-20 16:39:13 +02003092 mdev->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
3093 drbd_reconsider_max_bio_size(mdev);
3094
Philipp Reisnerb411b362009-09-25 16:07:19 -07003095 if (get_ldev(mdev)) {
3096 if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) {
3097 mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev);
3098 ldsc = 1;
3099 }
3100
Philipp Reisnerb411b362009-09-25 16:07:19 -07003101 put_ldev(mdev);
3102 }
3103
3104 if (mdev->state.conn > C_WF_REPORT_PARAMS) {
3105 if (be64_to_cpu(p->c_size) !=
3106 drbd_get_capacity(mdev->this_bdev) || ldsc) {
3107 /* we have different sizes, probably peer
3108 * needs to know my new size... */
Philipp Reisnere89b5912010-03-24 17:11:33 +01003109 drbd_send_sizes(mdev, 0, ddsf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003110 }
3111 if (test_and_clear_bit(RESIZE_PENDING, &mdev->flags) ||
3112 (dd == grew && mdev->state.conn == C_CONNECTED)) {
3113 if (mdev->state.pdsk >= D_INCONSISTENT &&
Philipp Reisnere89b5912010-03-24 17:11:33 +01003114 mdev->state.disk >= D_INCONSISTENT) {
3115 if (ddsf & DDSF_NO_RESYNC)
3116 dev_info(DEV, "Resync of new storage suppressed with --assume-clean\n");
3117 else
3118 resync_after_online_grow(mdev);
3119 } else
Philipp Reisnerb411b362009-09-25 16:07:19 -07003120 set_bit(RESYNC_AFTER_NEG, &mdev->flags);
3121 }
3122 }
3123
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003124 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003125}
3126
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003127static int receive_uuids(struct drbd_conf *mdev, enum drbd_packet cmd,
3128 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003129{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003130 struct p_uuids *p = &mdev->tconn->data.rbuf.uuids;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003131 u64 *p_uuid;
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003132 int i, updated_uuids = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003133
Philipp Reisnerb411b362009-09-25 16:07:19 -07003134 p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO);
3135
3136 for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
3137 p_uuid[i] = be64_to_cpu(p->uuid[i]);
3138
3139 kfree(mdev->p_uuid);
3140 mdev->p_uuid = p_uuid;
3141
3142 if (mdev->state.conn < C_CONNECTED &&
3143 mdev->state.disk < D_INCONSISTENT &&
3144 mdev->state.role == R_PRIMARY &&
3145 (mdev->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
3146 dev_err(DEV, "Can only connect to data with current UUID=%016llX\n",
3147 (unsigned long long)mdev->ed_uuid);
3148 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003149 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003150 }
3151
3152 if (get_ldev(mdev)) {
3153 int skip_initial_sync =
3154 mdev->state.conn == C_CONNECTED &&
Philipp Reisner31890f42011-01-19 14:12:51 +01003155 mdev->tconn->agreed_pro_version >= 90 &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003156 mdev->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
3157 (p_uuid[UI_FLAGS] & 8);
3158 if (skip_initial_sync) {
3159 dev_info(DEV, "Accepted new current UUID, preparing to skip initial sync\n");
3160 drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write,
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003161 "clear_n_write from receive_uuids",
3162 BM_LOCKED_TEST_ALLOWED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003163 _drbd_uuid_set(mdev, UI_CURRENT, p_uuid[UI_CURRENT]);
3164 _drbd_uuid_set(mdev, UI_BITMAP, 0);
3165 _drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
3166 CS_VERBOSE, NULL);
3167 drbd_md_sync(mdev);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003168 updated_uuids = 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003169 }
3170 put_ldev(mdev);
Philipp Reisner18a50fa2010-06-21 14:14:15 +02003171 } else if (mdev->state.disk < D_INCONSISTENT &&
3172 mdev->state.role == R_PRIMARY) {
3173 /* I am a diskless primary, the peer just created a new current UUID
3174 for me. */
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003175 updated_uuids = drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003176 }
3177
3178 /* Before we test for the disk state, we should wait until an eventually
3179 ongoing cluster wide state change is finished. That is important if
3180 we are primary and are detaching from our disk. We need to see the
3181 new disk state... */
Philipp Reisner8410da82011-02-11 20:11:10 +01003182 mutex_lock(mdev->state_mutex);
3183 mutex_unlock(mdev->state_mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003184 if (mdev->state.conn >= C_CONNECTED && mdev->state.disk < D_INCONSISTENT)
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003185 updated_uuids |= drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
3186
3187 if (updated_uuids)
3188 drbd_print_uuids(mdev, "receiver updated UUIDs to");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003189
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003190 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003191}
3192
3193/**
3194 * convert_state() - Converts the peer's view of the cluster state to our point of view
3195 * @ps: The state as seen by the peer.
3196 */
3197static union drbd_state convert_state(union drbd_state ps)
3198{
3199 union drbd_state ms;
3200
3201 static enum drbd_conns c_tab[] = {
3202 [C_CONNECTED] = C_CONNECTED,
3203
3204 [C_STARTING_SYNC_S] = C_STARTING_SYNC_T,
3205 [C_STARTING_SYNC_T] = C_STARTING_SYNC_S,
3206 [C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */
3207 [C_VERIFY_S] = C_VERIFY_T,
3208 [C_MASK] = C_MASK,
3209 };
3210
3211 ms.i = ps.i;
3212
3213 ms.conn = c_tab[ps.conn];
3214 ms.peer = ps.role;
3215 ms.role = ps.peer;
3216 ms.pdsk = ps.disk;
3217 ms.disk = ps.pdsk;
3218 ms.peer_isp = (ps.aftr_isp | ps.user_isp);
3219
3220 return ms;
3221}
3222
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003223static int receive_req_state(struct drbd_conf *mdev, enum drbd_packet cmd,
3224 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003225{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003226 struct p_req_state *p = &mdev->tconn->data.rbuf.req_state;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003227 union drbd_state mask, val;
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +01003228 enum drbd_state_rv rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003229
Philipp Reisnerb411b362009-09-25 16:07:19 -07003230 mask.i = be32_to_cpu(p->mask);
3231 val.i = be32_to_cpu(p->val);
3232
Philipp Reisner25703f82011-02-07 14:35:25 +01003233 if (test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags) &&
Philipp Reisner8410da82011-02-11 20:11:10 +01003234 mutex_is_locked(mdev->state_mutex)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003235 drbd_send_sr_reply(mdev, SS_CONCURRENT_ST_CHG);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003236 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003237 }
3238
3239 mask = convert_state(mask);
3240 val = convert_state(val);
3241
Philipp Reisner047cd4a2011-02-15 11:09:33 +01003242 if (cmd == P_CONN_ST_CHG_REQ) {
3243 rv = conn_request_state(mdev->tconn, mask, val, CS_VERBOSE | CS_LOCAL_ONLY);
3244 conn_send_sr_reply(mdev->tconn, rv);
3245 } else {
3246 rv = drbd_change_state(mdev, CS_VERBOSE, mask, val);
3247 drbd_send_sr_reply(mdev, rv);
3248 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003249
Philipp Reisnerb411b362009-09-25 16:07:19 -07003250 drbd_md_sync(mdev);
3251
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003252 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003253}
3254
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003255static int receive_state(struct drbd_conf *mdev, enum drbd_packet cmd,
3256 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003257{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003258 struct p_state *p = &mdev->tconn->data.rbuf.state;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003259 union drbd_state os, ns, peer_state;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003260 enum drbd_disk_state real_peer_disk;
Philipp Reisner65d922c2010-06-16 16:18:09 +02003261 enum chg_state_flags cs_flags;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003262 int rv;
3263
Philipp Reisnerb411b362009-09-25 16:07:19 -07003264 peer_state.i = be32_to_cpu(p->state);
3265
3266 real_peer_disk = peer_state.disk;
3267 if (peer_state.disk == D_NEGOTIATING) {
3268 real_peer_disk = mdev->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT;
3269 dev_info(DEV, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
3270 }
3271
Philipp Reisner87eeee42011-01-19 14:16:30 +01003272 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003273 retry:
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003274 os = ns = mdev->state;
Philipp Reisner87eeee42011-01-19 14:16:30 +01003275 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003276
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003277 /* peer says his disk is uptodate, while we think it is inconsistent,
3278 * and this happens while we think we have a sync going on. */
3279 if (os.pdsk == D_INCONSISTENT && real_peer_disk == D_UP_TO_DATE &&
3280 os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
3281 /* If we are (becoming) SyncSource, but peer is still in sync
3282 * preparation, ignore its uptodate-ness to avoid flapping, it
3283 * will change to inconsistent once the peer reaches active
3284 * syncing states.
3285 * It may have changed syncer-paused flags, however, so we
3286 * cannot ignore this completely. */
3287 if (peer_state.conn > C_CONNECTED &&
3288 peer_state.conn < C_SYNC_SOURCE)
3289 real_peer_disk = D_INCONSISTENT;
3290
3291 /* if peer_state changes to connected at the same time,
3292 * it explicitly notifies us that it finished resync.
3293 * Maybe we should finish it up, too? */
3294 else if (os.conn >= C_SYNC_SOURCE &&
3295 peer_state.conn == C_CONNECTED) {
3296 if (drbd_bm_total_weight(mdev) <= mdev->rs_failed)
3297 drbd_resync_finished(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003298 return true;
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003299 }
3300 }
3301
3302 /* peer says his disk is inconsistent, while we think it is uptodate,
3303 * and this happens while the peer still thinks we have a sync going on,
3304 * but we think we are already done with the sync.
3305 * We ignore this to avoid flapping pdsk.
3306 * This should not happen, if the peer is a recent version of drbd. */
3307 if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT &&
3308 os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE)
3309 real_peer_disk = D_UP_TO_DATE;
3310
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003311 if (ns.conn == C_WF_REPORT_PARAMS)
3312 ns.conn = C_CONNECTED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003313
Philipp Reisner67531712010-10-27 12:21:30 +02003314 if (peer_state.conn == C_AHEAD)
3315 ns.conn = C_BEHIND;
3316
Philipp Reisnerb411b362009-09-25 16:07:19 -07003317 if (mdev->p_uuid && peer_state.disk >= D_NEGOTIATING &&
3318 get_ldev_if_state(mdev, D_NEGOTIATING)) {
3319 int cr; /* consider resync */
3320
3321 /* if we established a new connection */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003322 cr = (os.conn < C_CONNECTED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003323 /* if we had an established connection
3324 * and one of the nodes newly attaches a disk */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003325 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003326 (peer_state.disk == D_NEGOTIATING ||
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003327 os.disk == D_NEGOTIATING));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003328 /* if we have both been inconsistent, and the peer has been
3329 * forced to be UpToDate with --overwrite-data */
3330 cr |= test_bit(CONSIDER_RESYNC, &mdev->flags);
3331 /* if we had been plain connected, and the admin requested to
3332 * start a sync by "invalidate" or "invalidate-remote" */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003333 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003334 (peer_state.conn >= C_STARTING_SYNC_S &&
3335 peer_state.conn <= C_WF_BITMAP_T));
3336
3337 if (cr)
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003338 ns.conn = drbd_sync_handshake(mdev, peer_state.role, real_peer_disk);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003339
3340 put_ldev(mdev);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003341 if (ns.conn == C_MASK) {
3342 ns.conn = C_CONNECTED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003343 if (mdev->state.disk == D_NEGOTIATING) {
Lars Ellenberg82f59cc2010-10-16 12:13:47 +02003344 drbd_force_state(mdev, NS(disk, D_FAILED));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003345 } else if (peer_state.disk == D_NEGOTIATING) {
3346 dev_err(DEV, "Disk attach process on the peer node was aborted.\n");
3347 peer_state.disk = D_DISKLESS;
Lars Ellenberg580b9762010-02-26 23:15:23 +01003348 real_peer_disk = D_DISKLESS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003349 } else {
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003350 if (test_and_clear_bit(CONN_DRY_RUN, &mdev->flags))
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003351 return false;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003352 D_ASSERT(os.conn == C_WF_REPORT_PARAMS);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003353 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003354 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003355 }
3356 }
3357 }
3358
Philipp Reisner87eeee42011-01-19 14:16:30 +01003359 spin_lock_irq(&mdev->tconn->req_lock);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003360 if (mdev->state.i != os.i)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003361 goto retry;
3362 clear_bit(CONSIDER_RESYNC, &mdev->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003363 ns.peer = peer_state.role;
3364 ns.pdsk = real_peer_disk;
3365 ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003366 if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003367 ns.disk = mdev->new_state_tmp.disk;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003368 cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
3369 if (ns.pdsk == D_CONSISTENT && is_susp(ns) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
Philipp Reisner481c6f52010-06-22 14:03:27 +02003370 test_bit(NEW_CUR_UUID, &mdev->flags)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01003371 /* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this
Philipp Reisner481c6f52010-06-22 14:03:27 +02003372 for temporal network outages! */
Philipp Reisner87eeee42011-01-19 14:16:30 +01003373 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisner481c6f52010-06-22 14:03:27 +02003374 dev_err(DEV, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
3375 tl_clear(mdev);
3376 drbd_uuid_new_current(mdev);
3377 clear_bit(NEW_CUR_UUID, &mdev->flags);
3378 drbd_force_state(mdev, NS2(conn, C_PROTOCOL_ERROR, susp, 0));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003379 return false;
Philipp Reisner481c6f52010-06-22 14:03:27 +02003380 }
Philipp Reisner65d922c2010-06-16 16:18:09 +02003381 rv = _drbd_set_state(mdev, ns, cs_flags, NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003382 ns = mdev->state;
Philipp Reisner87eeee42011-01-19 14:16:30 +01003383 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003384
3385 if (rv < SS_SUCCESS) {
3386 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003387 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003388 }
3389
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003390 if (os.conn > C_WF_REPORT_PARAMS) {
3391 if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003392 peer_state.disk != D_NEGOTIATING ) {
3393 /* we want resync, peer has not yet decided to sync... */
3394 /* Nowadays only used when forcing a node into primary role and
3395 setting its disk to UpToDate with that */
3396 drbd_send_uuids(mdev);
3397 drbd_send_state(mdev);
3398 }
3399 }
3400
Philipp Reisner89e58e72011-01-19 13:12:45 +01003401 mdev->tconn->net_conf->want_lose = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003402
3403 drbd_md_sync(mdev); /* update connected indicator, la_size, ... */
3404
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003405 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003406}
3407
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003408static int receive_sync_uuid(struct drbd_conf *mdev, enum drbd_packet cmd,
3409 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003410{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003411 struct p_rs_uuid *p = &mdev->tconn->data.rbuf.rs_uuid;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003412
3413 wait_event(mdev->misc_wait,
3414 mdev->state.conn == C_WF_SYNC_UUID ||
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02003415 mdev->state.conn == C_BEHIND ||
Philipp Reisnerb411b362009-09-25 16:07:19 -07003416 mdev->state.conn < C_CONNECTED ||
3417 mdev->state.disk < D_NEGOTIATING);
3418
3419 /* D_ASSERT( mdev->state.conn == C_WF_SYNC_UUID ); */
3420
Philipp Reisnerb411b362009-09-25 16:07:19 -07003421 /* Here the _drbd_uuid_ functions are right, current should
3422 _not_ be rotated into the history */
3423 if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
3424 _drbd_uuid_set(mdev, UI_CURRENT, be64_to_cpu(p->uuid));
3425 _drbd_uuid_set(mdev, UI_BITMAP, 0UL);
3426
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003427 drbd_print_uuids(mdev, "updated sync uuid");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003428 drbd_start_resync(mdev, C_SYNC_TARGET);
3429
3430 put_ldev(mdev);
3431 } else
3432 dev_err(DEV, "Ignoring SyncUUID packet!\n");
3433
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003434 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003435}
3436
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003437/**
3438 * receive_bitmap_plain
3439 *
3440 * Return 0 when done, 1 when another iteration is needed, and a negative error
3441 * code upon failure.
3442 */
3443static int
Philipp Reisner02918be2010-08-20 14:35:10 +02003444receive_bitmap_plain(struct drbd_conf *mdev, unsigned int data_size,
3445 unsigned long *buffer, struct bm_xfer_ctx *c)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003446{
3447 unsigned num_words = min_t(size_t, BM_PACKET_WORDS, c->bm_words - c->word_offset);
3448 unsigned want = num_words * sizeof(long);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003449 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003450
Philipp Reisner02918be2010-08-20 14:35:10 +02003451 if (want != data_size) {
3452 dev_err(DEV, "%s:want (%u) != data_size (%u)\n", __func__, want, data_size);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003453 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003454 }
3455 if (want == 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003456 return 0;
Philipp Reisnerde0ff332011-02-07 16:56:20 +01003457 err = drbd_recv(mdev->tconn, buffer, want);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003458 if (err != want) {
3459 if (err >= 0)
3460 err = -EIO;
3461 return err;
3462 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003463
3464 drbd_bm_merge_lel(mdev, c->word_offset, num_words, buffer);
3465
3466 c->word_offset += num_words;
3467 c->bit_offset = c->word_offset * BITS_PER_LONG;
3468 if (c->bit_offset > c->bm_bits)
3469 c->bit_offset = c->bm_bits;
3470
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003471 return 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003472}
3473
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003474/**
3475 * recv_bm_rle_bits
3476 *
3477 * Return 0 when done, 1 when another iteration is needed, and a negative error
3478 * code upon failure.
3479 */
3480static int
Philipp Reisnerb411b362009-09-25 16:07:19 -07003481recv_bm_rle_bits(struct drbd_conf *mdev,
3482 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003483 struct bm_xfer_ctx *c,
3484 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003485{
3486 struct bitstream bs;
3487 u64 look_ahead;
3488 u64 rl;
3489 u64 tmp;
3490 unsigned long s = c->bit_offset;
3491 unsigned long e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003492 int toggle = DCBP_get_start(p);
3493 int have;
3494 int bits;
3495
3496 bitstream_init(&bs, p->code, len, DCBP_get_pad_bits(p));
3497
3498 bits = bitstream_get_bits(&bs, &look_ahead, 64);
3499 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003500 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003501
3502 for (have = bits; have > 0; s += rl, toggle = !toggle) {
3503 bits = vli_decode_bits(&rl, look_ahead);
3504 if (bits <= 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003505 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003506
3507 if (toggle) {
3508 e = s + rl -1;
3509 if (e >= c->bm_bits) {
3510 dev_err(DEV, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003511 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003512 }
3513 _drbd_bm_set_bits(mdev, s, e);
3514 }
3515
3516 if (have < bits) {
3517 dev_err(DEV, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n",
3518 have, bits, look_ahead,
3519 (unsigned int)(bs.cur.b - p->code),
3520 (unsigned int)bs.buf_len);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003521 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003522 }
3523 look_ahead >>= bits;
3524 have -= bits;
3525
3526 bits = bitstream_get_bits(&bs, &tmp, 64 - have);
3527 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003528 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003529 look_ahead |= tmp << have;
3530 have += bits;
3531 }
3532
3533 c->bit_offset = s;
3534 bm_xfer_ctx_bit_to_word_offset(c);
3535
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003536 return (s != c->bm_bits);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003537}
3538
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003539/**
3540 * decode_bitmap_c
3541 *
3542 * Return 0 when done, 1 when another iteration is needed, and a negative error
3543 * code upon failure.
3544 */
3545static int
Philipp Reisnerb411b362009-09-25 16:07:19 -07003546decode_bitmap_c(struct drbd_conf *mdev,
3547 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003548 struct bm_xfer_ctx *c,
3549 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003550{
3551 if (DCBP_get_code(p) == RLE_VLI_Bits)
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003552 return recv_bm_rle_bits(mdev, p, c, len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003553
3554 /* other variants had been implemented for evaluation,
3555 * but have been dropped as this one turned out to be "best"
3556 * during all our tests. */
3557
3558 dev_err(DEV, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
3559 drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR));
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003560 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003561}
3562
3563void INFO_bm_xfer_stats(struct drbd_conf *mdev,
3564 const char *direction, struct bm_xfer_ctx *c)
3565{
3566 /* what would it take to transfer it "plaintext" */
Philipp Reisnerc0129492011-01-19 16:58:16 +01003567 unsigned plain = sizeof(struct p_header) *
Philipp Reisnerb411b362009-09-25 16:07:19 -07003568 ((c->bm_words+BM_PACKET_WORDS-1)/BM_PACKET_WORDS+1)
3569 + c->bm_words * sizeof(long);
3570 unsigned total = c->bytes[0] + c->bytes[1];
3571 unsigned r;
3572
3573 /* total can not be zero. but just in case: */
3574 if (total == 0)
3575 return;
3576
3577 /* don't report if not compressed */
3578 if (total >= plain)
3579 return;
3580
3581 /* total < plain. check for overflow, still */
3582 r = (total > UINT_MAX/1000) ? (total / (plain/1000))
3583 : (1000 * total / plain);
3584
3585 if (r > 1000)
3586 r = 1000;
3587
3588 r = 1000 - r;
3589 dev_info(DEV, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
3590 "total %u; compression: %u.%u%%\n",
3591 direction,
3592 c->bytes[1], c->packets[1],
3593 c->bytes[0], c->packets[0],
3594 total, r/10, r % 10);
3595}
3596
3597/* Since we are processing the bitfield from lower addresses to higher,
3598 it does not matter if the process it in 32 bit chunks or 64 bit
3599 chunks as long as it is little endian. (Understand it as byte stream,
3600 beginning with the lowest byte...) If we would use big endian
3601 we would need to process it from the highest address to the lowest,
3602 in order to be agnostic to the 32 vs 64 bits issue.
3603
3604 returns 0 on failure, 1 if we successfully received it. */
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003605static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packet cmd,
3606 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003607{
3608 struct bm_xfer_ctx c;
3609 void *buffer;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003610 int err;
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003611 int ok = false;
Philipp Reisner257d0af2011-01-26 12:15:29 +01003612 struct p_header *h = &mdev->tconn->data.rbuf.header;
Philipp Reisner77351055b2011-02-07 17:24:26 +01003613 struct packet_info pi;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003614
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003615 drbd_bm_lock(mdev, "receive bitmap", BM_LOCKED_SET_ALLOWED);
3616 /* you are supposed to send additional out-of-sync information
3617 * if you actually set bits during this phase */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003618
3619 /* maybe we should use some per thread scratch page,
3620 * and allocate that during initial device creation? */
3621 buffer = (unsigned long *) __get_free_page(GFP_NOIO);
3622 if (!buffer) {
3623 dev_err(DEV, "failed to allocate one page buffer in %s\n", __func__);
3624 goto out;
3625 }
3626
3627 c = (struct bm_xfer_ctx) {
3628 .bm_bits = drbd_bm_bits(mdev),
3629 .bm_words = drbd_bm_words(mdev),
3630 };
3631
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003632 for(;;) {
Philipp Reisner02918be2010-08-20 14:35:10 +02003633 if (cmd == P_BITMAP) {
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003634 err = receive_bitmap_plain(mdev, data_size, buffer, &c);
Philipp Reisner02918be2010-08-20 14:35:10 +02003635 } else if (cmd == P_COMPRESSED_BITMAP) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003636 /* MAYBE: sanity check that we speak proto >= 90,
3637 * and the feature is enabled! */
3638 struct p_compressed_bm *p;
3639
Philipp Reisner02918be2010-08-20 14:35:10 +02003640 if (data_size > BM_PACKET_PAYLOAD_BYTES) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003641 dev_err(DEV, "ReportCBitmap packet too large\n");
3642 goto out;
3643 }
3644 /* use the page buff */
3645 p = buffer;
3646 memcpy(p, h, sizeof(*h));
Philipp Reisnerde0ff332011-02-07 16:56:20 +01003647 if (drbd_recv(mdev->tconn, p->head.payload, data_size) != data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003648 goto out;
Lars Ellenberg004352f2010-10-05 20:13:58 +02003649 if (data_size <= (sizeof(*p) - sizeof(p->head))) {
3650 dev_err(DEV, "ReportCBitmap packet too small (l:%u)\n", data_size);
Andreas Gruenbacher78fcbda2010-12-10 22:18:27 +01003651 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003652 }
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003653 err = decode_bitmap_c(mdev, p, &c, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003654 } else {
Philipp Reisner02918be2010-08-20 14:35:10 +02003655 dev_warn(DEV, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003656 goto out;
3657 }
3658
Philipp Reisner02918be2010-08-20 14:35:10 +02003659 c.packets[cmd == P_BITMAP]++;
Philipp Reisner257d0af2011-01-26 12:15:29 +01003660 c.bytes[cmd == P_BITMAP] += sizeof(struct p_header) + data_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003661
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003662 if (err <= 0) {
3663 if (err < 0)
3664 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003665 break;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003666 }
Philipp Reisner9ba7aa02011-02-07 17:32:41 +01003667 if (!drbd_recv_header(mdev->tconn, &pi))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003668 goto out;
Philipp Reisner77351055b2011-02-07 17:24:26 +01003669 cmd = pi.cmd;
3670 data_size = pi.size;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003671 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003672
3673 INFO_bm_xfer_stats(mdev, "receive", &c);
3674
3675 if (mdev->state.conn == C_WF_BITMAP_T) {
Andreas Gruenbacherde1f8e42010-12-10 21:04:00 +01003676 enum drbd_state_rv rv;
3677
Philipp Reisnerb411b362009-09-25 16:07:19 -07003678 ok = !drbd_send_bitmap(mdev);
3679 if (!ok)
3680 goto out;
3681 /* Omit CS_ORDERED with this state transition to avoid deadlocks. */
Andreas Gruenbacherde1f8e42010-12-10 21:04:00 +01003682 rv = _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
3683 D_ASSERT(rv == SS_SUCCESS);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003684 } else if (mdev->state.conn != C_WF_BITMAP_S) {
3685 /* admin may have requested C_DISCONNECTING,
3686 * other threads may have noticed network errors */
3687 dev_info(DEV, "unexpected cstate (%s) in receive_bitmap\n",
3688 drbd_conn_str(mdev->state.conn));
3689 }
3690
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003691 ok = true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003692 out:
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003693 drbd_bm_unlock(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003694 if (ok && mdev->state.conn == C_WF_BITMAP_S)
3695 drbd_start_resync(mdev, C_SYNC_SOURCE);
3696 free_page((unsigned long) buffer);
3697 return ok;
3698}
3699
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003700static int receive_skip(struct drbd_conf *mdev, enum drbd_packet cmd,
3701 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003702{
3703 /* TODO zero copy sink :) */
3704 static char sink[128];
3705 int size, want, r;
3706
Philipp Reisner02918be2010-08-20 14:35:10 +02003707 dev_warn(DEV, "skipping unknown optional packet type %d, l: %d!\n",
3708 cmd, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003709
Philipp Reisner02918be2010-08-20 14:35:10 +02003710 size = data_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003711 while (size > 0) {
3712 want = min_t(int, size, sizeof(sink));
Philipp Reisnerde0ff332011-02-07 16:56:20 +01003713 r = drbd_recv(mdev->tconn, sink, want);
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01003714 if (!expect(r > 0))
3715 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003716 size -= r;
3717 }
3718 return size == 0;
3719}
3720
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003721static int receive_UnplugRemote(struct drbd_conf *mdev, enum drbd_packet cmd,
3722 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003723{
Philipp Reisnerb411b362009-09-25 16:07:19 -07003724 /* Make sure we've acked all the TCP data associated
3725 * with the data requests being unplugged */
Philipp Reisnere42325a2011-01-19 13:55:45 +01003726 drbd_tcp_quickack(mdev->tconn->data.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003727
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003728 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003729}
3730
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003731static int receive_out_of_sync(struct drbd_conf *mdev, enum drbd_packet cmd,
3732 unsigned int data_size)
Philipp Reisner73a01a12010-10-27 14:33:00 +02003733{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003734 struct p_block_desc *p = &mdev->tconn->data.rbuf.block_desc;
Philipp Reisner73a01a12010-10-27 14:33:00 +02003735
Lars Ellenbergf735e3632010-12-17 21:06:18 +01003736 switch (mdev->state.conn) {
3737 case C_WF_SYNC_UUID:
3738 case C_WF_BITMAP_T:
3739 case C_BEHIND:
3740 break;
3741 default:
3742 dev_err(DEV, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n",
3743 drbd_conn_str(mdev->state.conn));
3744 }
3745
Philipp Reisner73a01a12010-10-27 14:33:00 +02003746 drbd_set_out_of_sync(mdev, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
3747
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003748 return true;
Philipp Reisner73a01a12010-10-27 14:33:00 +02003749}
3750
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003751typedef int (*drbd_cmd_handler_f)(struct drbd_conf *, enum drbd_packet cmd,
3752 unsigned int to_receive);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003753
Philipp Reisner02918be2010-08-20 14:35:10 +02003754struct data_cmd {
3755 int expect_payload;
3756 size_t pkt_size;
3757 drbd_cmd_handler_f function;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003758};
3759
Philipp Reisner02918be2010-08-20 14:35:10 +02003760static struct data_cmd drbd_cmd_handler[] = {
3761 [P_DATA] = { 1, sizeof(struct p_data), receive_Data },
3762 [P_DATA_REPLY] = { 1, sizeof(struct p_data), receive_DataReply },
3763 [P_RS_DATA_REPLY] = { 1, sizeof(struct p_data), receive_RSDataReply } ,
3764 [P_BARRIER] = { 0, sizeof(struct p_barrier), receive_Barrier } ,
Philipp Reisner257d0af2011-01-26 12:15:29 +01003765 [P_BITMAP] = { 1, sizeof(struct p_header), receive_bitmap } ,
3766 [P_COMPRESSED_BITMAP] = { 1, sizeof(struct p_header), receive_bitmap } ,
3767 [P_UNPLUG_REMOTE] = { 0, sizeof(struct p_header), receive_UnplugRemote },
Philipp Reisner02918be2010-08-20 14:35:10 +02003768 [P_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
3769 [P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
Philipp Reisner257d0af2011-01-26 12:15:29 +01003770 [P_SYNC_PARAM] = { 1, sizeof(struct p_header), receive_SyncParam },
3771 [P_SYNC_PARAM89] = { 1, sizeof(struct p_header), receive_SyncParam },
Philipp Reisner02918be2010-08-20 14:35:10 +02003772 [P_PROTOCOL] = { 1, sizeof(struct p_protocol), receive_protocol },
3773 [P_UUIDS] = { 0, sizeof(struct p_uuids), receive_uuids },
3774 [P_SIZES] = { 0, sizeof(struct p_sizes), receive_sizes },
3775 [P_STATE] = { 0, sizeof(struct p_state), receive_state },
3776 [P_STATE_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_state },
3777 [P_SYNC_UUID] = { 0, sizeof(struct p_rs_uuid), receive_sync_uuid },
3778 [P_OV_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
3779 [P_OV_REPLY] = { 1, sizeof(struct p_block_req), receive_DataRequest },
3780 [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest },
3781 [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip },
Philipp Reisner73a01a12010-10-27 14:33:00 +02003782 [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
Philipp Reisner047cd4a2011-02-15 11:09:33 +01003783 [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_state },
Philipp Reisner02918be2010-08-20 14:35:10 +02003784 /* anything missing from this table is in
3785 * the asender_tbl, see get_asender_cmd */
3786 [P_MAX_CMD] = { 0, 0, NULL },
3787};
3788
3789/* All handler functions that expect a sub-header get that sub-heder in
Philipp Reisnere42325a2011-01-19 13:55:45 +01003790 mdev->tconn->data.rbuf.header.head.payload.
Philipp Reisner02918be2010-08-20 14:35:10 +02003791
Philipp Reisnere42325a2011-01-19 13:55:45 +01003792 Usually in mdev->tconn->data.rbuf.header.head the callback can find the usual
Philipp Reisner02918be2010-08-20 14:35:10 +02003793 p_header, but they may not rely on that. Since there is also p_header95 !
3794 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003795
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003796static void drbdd(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003797{
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003798 struct p_header *header = &tconn->data.rbuf.header;
Philipp Reisner77351055b2011-02-07 17:24:26 +01003799 struct packet_info pi;
Philipp Reisner02918be2010-08-20 14:35:10 +02003800 size_t shs; /* sub header size */
3801 int rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003802
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003803 while (get_t_state(&tconn->receiver) == RUNNING) {
3804 drbd_thread_current_set_cpu(&tconn->receiver);
3805 if (!drbd_recv_header(tconn, &pi))
Philipp Reisner02918be2010-08-20 14:35:10 +02003806 goto err_out;
3807
Philipp Reisner77351055b2011-02-07 17:24:26 +01003808 if (unlikely(pi.cmd >= P_MAX_CMD || !drbd_cmd_handler[pi.cmd].function)) {
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003809 conn_err(tconn, "unknown packet type %d, l: %d!\n", pi.cmd, pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02003810 goto err_out;
Lars Ellenberg0b33a912009-11-16 15:58:04 +01003811 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003812
Philipp Reisner77351055b2011-02-07 17:24:26 +01003813 shs = drbd_cmd_handler[pi.cmd].pkt_size - sizeof(struct p_header);
3814 if (pi.size - shs > 0 && !drbd_cmd_handler[pi.cmd].expect_payload) {
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003815 conn_err(tconn, "No payload expected %s l:%d\n", cmdname(pi.cmd), pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02003816 goto err_out;
3817 }
3818
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02003819 if (shs) {
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003820 rv = drbd_recv(tconn, &header->payload, shs);
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02003821 if (unlikely(rv != shs)) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01003822 if (!signal_pending(current))
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003823 conn_warn(tconn, "short read while reading sub header: rv=%d\n", rv);
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02003824 goto err_out;
3825 }
3826 }
3827
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003828 rv = drbd_cmd_handler[pi.cmd].function(vnr_to_mdev(tconn, pi.vnr), pi.cmd, pi.size - shs);
Philipp Reisner02918be2010-08-20 14:35:10 +02003829
3830 if (unlikely(!rv)) {
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003831 conn_err(tconn, "error receiving %s, l: %d!\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01003832 cmdname(pi.cmd), pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02003833 goto err_out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003834 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003835 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003836
Philipp Reisner02918be2010-08-20 14:35:10 +02003837 if (0) {
3838 err_out:
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01003839 conn_request_state(tconn, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003840 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003841}
3842
Philipp Reisnera21e9292011-02-08 15:08:49 +01003843void drbd_flush_workqueue(struct drbd_conf *mdev)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003844{
3845 struct drbd_wq_barrier barr;
3846
3847 barr.w.cb = w_prev_work_done;
Philipp Reisnera21e9292011-02-08 15:08:49 +01003848 barr.w.mdev = mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003849 init_completion(&barr.done);
Philipp Reisnera21e9292011-02-08 15:08:49 +01003850 drbd_queue_work(&mdev->tconn->data.work, &barr.w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003851 wait_for_completion(&barr.done);
3852}
3853
Philipp Reisner360cc742011-02-08 14:29:53 +01003854static void drbd_disconnect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003855{
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01003856 enum drbd_conns oc;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003857 int rv = SS_UNKNOWN_ERROR;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003858
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01003859 if (tconn->cstate == C_STANDALONE)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003860 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003861
3862 /* asender does not clean up anything. it must not interfere, either */
Philipp Reisner360cc742011-02-08 14:29:53 +01003863 drbd_thread_stop(&tconn->asender);
3864 drbd_free_sock(tconn);
3865
3866 idr_for_each(&tconn->volumes, drbd_disconnected, tconn);
3867
3868 conn_info(tconn, "Connection closed\n");
3869
3870 spin_lock_irq(&tconn->req_lock);
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01003871 oc = tconn->cstate;
3872 if (oc >= C_UNCONNECTED)
3873 rv = _conn_request_state(tconn, NS(conn, C_UNCONNECTED), CS_VERBOSE);
3874
Philipp Reisner360cc742011-02-08 14:29:53 +01003875 spin_unlock_irq(&tconn->req_lock);
3876
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01003877 if (oc == C_DISCONNECTING) {
Philipp Reisner360cc742011-02-08 14:29:53 +01003878 wait_event(tconn->net_cnt_wait, atomic_read(&tconn->net_cnt) == 0);
3879
3880 crypto_free_hash(tconn->cram_hmac_tfm);
3881 tconn->cram_hmac_tfm = NULL;
3882
3883 kfree(tconn->net_conf);
3884 tconn->net_conf = NULL;
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01003885 conn_request_state(tconn, NS(conn, C_STANDALONE), CS_VERBOSE);
Philipp Reisner360cc742011-02-08 14:29:53 +01003886 }
3887}
3888
3889static int drbd_disconnected(int vnr, void *p, void *data)
3890{
3891 struct drbd_conf *mdev = (struct drbd_conf *)p;
3892 enum drbd_fencing_p fp;
3893 unsigned int i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003894
Philipp Reisner85719572010-07-21 10:20:17 +02003895 /* wait for current activity to cease. */
Philipp Reisner87eeee42011-01-19 14:16:30 +01003896 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003897 _drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
3898 _drbd_wait_ee_list_empty(mdev, &mdev->sync_ee);
3899 _drbd_wait_ee_list_empty(mdev, &mdev->read_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01003900 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003901
3902 /* We do not have data structures that would allow us to
3903 * get the rs_pending_cnt down to 0 again.
3904 * * On C_SYNC_TARGET we do not have any data structures describing
3905 * the pending RSDataRequest's we have sent.
3906 * * On C_SYNC_SOURCE there is no data structure that tracks
3907 * the P_RS_DATA_REPLY blocks that we sent to the SyncTarget.
3908 * And no, it is not the sum of the reference counts in the
3909 * resync_LRU. The resync_LRU tracks the whole operation including
3910 * the disk-IO, while the rs_pending_cnt only tracks the blocks
3911 * on the fly. */
3912 drbd_rs_cancel_all(mdev);
3913 mdev->rs_total = 0;
3914 mdev->rs_failed = 0;
3915 atomic_set(&mdev->rs_pending_cnt, 0);
3916 wake_up(&mdev->misc_wait);
3917
Philipp Reisner7fde2be2011-03-01 11:08:28 +01003918 del_timer(&mdev->request_timer);
3919
Philipp Reisnerb411b362009-09-25 16:07:19 -07003920 /* make sure syncer is stopped and w_resume_next_sg queued */
3921 del_timer_sync(&mdev->resync_timer);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003922 resync_timer_fn((unsigned long)mdev);
3923
Philipp Reisnerb411b362009-09-25 16:07:19 -07003924 /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
3925 * w_make_resync_request etc. which may still be on the worker queue
3926 * to be "canceled" */
Philipp Reisnera21e9292011-02-08 15:08:49 +01003927 drbd_flush_workqueue(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003928
3929 /* This also does reclaim_net_ee(). If we do this too early, we might
3930 * miss some resync ee and pages.*/
3931 drbd_process_done_ee(mdev);
3932
3933 kfree(mdev->p_uuid);
3934 mdev->p_uuid = NULL;
3935
Philipp Reisnerfb22c402010-09-08 23:20:21 +02003936 if (!is_susp(mdev->state))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003937 tl_clear(mdev);
3938
Philipp Reisnerb411b362009-09-25 16:07:19 -07003939 drbd_md_sync(mdev);
3940
3941 fp = FP_DONT_CARE;
3942 if (get_ldev(mdev)) {
3943 fp = mdev->ldev->dc.fencing;
3944 put_ldev(mdev);
3945 }
3946
Philipp Reisner87f7be42010-06-11 13:56:33 +02003947 if (mdev->state.role == R_PRIMARY && fp >= FP_RESOURCE && mdev->state.pdsk >= D_UNKNOWN)
3948 drbd_try_outdate_peer_async(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003949
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003950 /* serialize with bitmap writeout triggered by the state change,
3951 * if any. */
3952 wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
3953
Philipp Reisnerb411b362009-09-25 16:07:19 -07003954 /* tcp_close and release of sendpage pages can be deferred. I don't
3955 * want to use SO_LINGER, because apparently it can be deferred for
3956 * more than 20 seconds (longest time I checked).
3957 *
3958 * Actually we don't care for exactly when the network stack does its
3959 * put_page(), but release our reference on these pages right here.
3960 */
3961 i = drbd_release_ee(mdev, &mdev->net_ee);
3962 if (i)
3963 dev_info(DEV, "net_ee not empty, killed %u entries\n", i);
Lars Ellenberg435f0742010-09-06 12:30:25 +02003964 i = atomic_read(&mdev->pp_in_use_by_net);
3965 if (i)
3966 dev_info(DEV, "pp_in_use_by_net = %d, expected 0\n", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003967 i = atomic_read(&mdev->pp_in_use);
3968 if (i)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02003969 dev_info(DEV, "pp_in_use = %d, expected 0\n", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003970
3971 D_ASSERT(list_empty(&mdev->read_ee));
3972 D_ASSERT(list_empty(&mdev->active_ee));
3973 D_ASSERT(list_empty(&mdev->sync_ee));
3974 D_ASSERT(list_empty(&mdev->done_ee));
3975
3976 /* ok, no more ee's on the fly, it is safe to reset the epoch_size */
3977 atomic_set(&mdev->current_epoch->epoch_size, 0);
3978 D_ASSERT(list_empty(&mdev->current_epoch->list));
Philipp Reisner360cc742011-02-08 14:29:53 +01003979
3980 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003981}
3982
3983/*
3984 * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version
3985 * we can agree on is stored in agreed_pro_version.
3986 *
3987 * feature flags and the reserved array should be enough room for future
3988 * enhancements of the handshake protocol, and possible plugins...
3989 *
3990 * for now, they are expected to be zero, but ignored.
3991 */
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01003992static int drbd_send_handshake(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003993{
Philipp Reisnere6b3ea82011-01-19 14:02:01 +01003994 /* ASSERT current == mdev->tconn->receiver ... */
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01003995 struct p_handshake *p = &tconn->data.sbuf.handshake;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003996 int ok;
3997
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01003998 if (mutex_lock_interruptible(&tconn->data.mutex)) {
3999 conn_err(tconn, "interrupted during initial handshake\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004000 return 0; /* interrupted. not ok. */
4001 }
4002
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01004003 if (tconn->data.socket == NULL) {
4004 mutex_unlock(&tconn->data.mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004005 return 0;
4006 }
4007
4008 memset(p, 0, sizeof(*p));
4009 p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
4010 p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01004011 ok = _conn_send_cmd(tconn, 0, tconn->data.socket, P_HAND_SHAKE,
4012 &p->head, sizeof(*p), 0);
4013 mutex_unlock(&tconn->data.mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004014 return ok;
4015}
4016
4017/*
4018 * return values:
4019 * 1 yes, we have a valid connection
4020 * 0 oops, did not work out, please try again
4021 * -1 peer talks different language,
4022 * no point in trying again, please go standalone.
4023 */
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004024static int drbd_do_handshake(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004025{
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004026 /* ASSERT current == tconn->receiver ... */
4027 struct p_handshake *p = &tconn->data.rbuf.handshake;
Philipp Reisner02918be2010-08-20 14:35:10 +02004028 const int expect = sizeof(struct p_handshake) - sizeof(struct p_header80);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004029 struct packet_info pi;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004030 int rv;
4031
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004032 rv = drbd_send_handshake(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004033 if (!rv)
4034 return 0;
4035
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004036 rv = drbd_recv_header(tconn, &pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004037 if (!rv)
4038 return 0;
4039
Philipp Reisner77351055b2011-02-07 17:24:26 +01004040 if (pi.cmd != P_HAND_SHAKE) {
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004041 conn_err(tconn, "expected HandShake packet, received: %s (0x%04x)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004042 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004043 return -1;
4044 }
4045
Philipp Reisner77351055b2011-02-07 17:24:26 +01004046 if (pi.size != expect) {
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004047 conn_err(tconn, "expected HandShake length: %u, received: %u\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004048 expect, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004049 return -1;
4050 }
4051
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004052 rv = drbd_recv(tconn, &p->head.payload, expect);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004053
4054 if (rv != expect) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01004055 if (!signal_pending(current))
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004056 conn_warn(tconn, "short read receiving handshake packet: l=%u\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004057 return 0;
4058 }
4059
Philipp Reisnerb411b362009-09-25 16:07:19 -07004060 p->protocol_min = be32_to_cpu(p->protocol_min);
4061 p->protocol_max = be32_to_cpu(p->protocol_max);
4062 if (p->protocol_max == 0)
4063 p->protocol_max = p->protocol_min;
4064
4065 if (PRO_VERSION_MAX < p->protocol_min ||
4066 PRO_VERSION_MIN > p->protocol_max)
4067 goto incompat;
4068
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004069 tconn->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004070
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004071 conn_info(tconn, "Handshake successful: "
4072 "Agreed network protocol version %d\n", tconn->agreed_pro_version);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004073
4074 return 1;
4075
4076 incompat:
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004077 conn_err(tconn, "incompatible DRBD dialects: "
Philipp Reisnerb411b362009-09-25 16:07:19 -07004078 "I support %d-%d, peer supports %d-%d\n",
4079 PRO_VERSION_MIN, PRO_VERSION_MAX,
4080 p->protocol_min, p->protocol_max);
4081 return -1;
4082}
4083
4084#if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
Philipp Reisner13e60372011-02-08 09:54:40 +01004085static int drbd_do_auth(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004086{
4087 dev_err(DEV, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
4088 dev_err(DEV, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004089 return -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004090}
4091#else
4092#define CHALLENGE_LEN 64
Johannes Thomab10d96c2010-01-07 16:02:50 +01004093
4094/* Return value:
4095 1 - auth succeeded,
4096 0 - failed, try again (network error),
4097 -1 - auth failed, don't try again.
4098*/
4099
Philipp Reisner13e60372011-02-08 09:54:40 +01004100static int drbd_do_auth(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004101{
4102 char my_challenge[CHALLENGE_LEN]; /* 64 Bytes... */
4103 struct scatterlist sg;
4104 char *response = NULL;
4105 char *right_response = NULL;
4106 char *peers_ch = NULL;
Philipp Reisner13e60372011-02-08 09:54:40 +01004107 unsigned int key_len = strlen(tconn->net_conf->shared_secret);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004108 unsigned int resp_size;
4109 struct hash_desc desc;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004110 struct packet_info pi;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004111 int rv;
4112
Philipp Reisner13e60372011-02-08 09:54:40 +01004113 desc.tfm = tconn->cram_hmac_tfm;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004114 desc.flags = 0;
4115
Philipp Reisner13e60372011-02-08 09:54:40 +01004116 rv = crypto_hash_setkey(tconn->cram_hmac_tfm,
4117 (u8 *)tconn->net_conf->shared_secret, key_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004118 if (rv) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004119 conn_err(tconn, "crypto_hash_setkey() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004120 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004121 goto fail;
4122 }
4123
4124 get_random_bytes(my_challenge, CHALLENGE_LEN);
4125
Philipp Reisner13e60372011-02-08 09:54:40 +01004126 rv = conn_send_cmd2(tconn, P_AUTH_CHALLENGE, my_challenge, CHALLENGE_LEN);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004127 if (!rv)
4128 goto fail;
4129
Philipp Reisner13e60372011-02-08 09:54:40 +01004130 rv = drbd_recv_header(tconn, &pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004131 if (!rv)
4132 goto fail;
4133
Philipp Reisner77351055b2011-02-07 17:24:26 +01004134 if (pi.cmd != P_AUTH_CHALLENGE) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004135 conn_err(tconn, "expected AuthChallenge packet, received: %s (0x%04x)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004136 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004137 rv = 0;
4138 goto fail;
4139 }
4140
Philipp Reisner77351055b2011-02-07 17:24:26 +01004141 if (pi.size > CHALLENGE_LEN * 2) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004142 conn_err(tconn, "expected AuthChallenge payload too big.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004143 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004144 goto fail;
4145 }
4146
Philipp Reisner77351055b2011-02-07 17:24:26 +01004147 peers_ch = kmalloc(pi.size, GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004148 if (peers_ch == NULL) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004149 conn_err(tconn, "kmalloc of peers_ch failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004150 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004151 goto fail;
4152 }
4153
Philipp Reisner13e60372011-02-08 09:54:40 +01004154 rv = drbd_recv(tconn, peers_ch, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004155
Philipp Reisner77351055b2011-02-07 17:24:26 +01004156 if (rv != pi.size) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01004157 if (!signal_pending(current))
Philipp Reisner13e60372011-02-08 09:54:40 +01004158 conn_warn(tconn, "short read AuthChallenge: l=%u\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004159 rv = 0;
4160 goto fail;
4161 }
4162
Philipp Reisner13e60372011-02-08 09:54:40 +01004163 resp_size = crypto_hash_digestsize(tconn->cram_hmac_tfm);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004164 response = kmalloc(resp_size, GFP_NOIO);
4165 if (response == NULL) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004166 conn_err(tconn, "kmalloc of response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004167 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004168 goto fail;
4169 }
4170
4171 sg_init_table(&sg, 1);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004172 sg_set_buf(&sg, peers_ch, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004173
4174 rv = crypto_hash_digest(&desc, &sg, sg.length, response);
4175 if (rv) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004176 conn_err(tconn, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004177 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004178 goto fail;
4179 }
4180
Philipp Reisner13e60372011-02-08 09:54:40 +01004181 rv = conn_send_cmd2(tconn, P_AUTH_RESPONSE, response, resp_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004182 if (!rv)
4183 goto fail;
4184
Philipp Reisner13e60372011-02-08 09:54:40 +01004185 rv = drbd_recv_header(tconn, &pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004186 if (!rv)
4187 goto fail;
4188
Philipp Reisner77351055b2011-02-07 17:24:26 +01004189 if (pi.cmd != P_AUTH_RESPONSE) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004190 conn_err(tconn, "expected AuthResponse packet, received: %s (0x%04x)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004191 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004192 rv = 0;
4193 goto fail;
4194 }
4195
Philipp Reisner77351055b2011-02-07 17:24:26 +01004196 if (pi.size != resp_size) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004197 conn_err(tconn, "expected AuthResponse payload of wrong size\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004198 rv = 0;
4199 goto fail;
4200 }
4201
Philipp Reisner13e60372011-02-08 09:54:40 +01004202 rv = drbd_recv(tconn, response , resp_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004203
4204 if (rv != resp_size) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01004205 if (!signal_pending(current))
Philipp Reisner13e60372011-02-08 09:54:40 +01004206 conn_warn(tconn, "short read receiving AuthResponse: l=%u\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004207 rv = 0;
4208 goto fail;
4209 }
4210
4211 right_response = kmalloc(resp_size, GFP_NOIO);
Julia Lawall2d1ee872009-12-27 22:27:11 +01004212 if (right_response == NULL) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004213 conn_err(tconn, "kmalloc of right_response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004214 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004215 goto fail;
4216 }
4217
4218 sg_set_buf(&sg, my_challenge, CHALLENGE_LEN);
4219
4220 rv = crypto_hash_digest(&desc, &sg, sg.length, right_response);
4221 if (rv) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004222 conn_err(tconn, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004223 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004224 goto fail;
4225 }
4226
4227 rv = !memcmp(response, right_response, resp_size);
4228
4229 if (rv)
Philipp Reisner13e60372011-02-08 09:54:40 +01004230 conn_info(tconn, "Peer authenticated using %d bytes of '%s' HMAC\n",
4231 resp_size, tconn->net_conf->cram_hmac_alg);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004232 else
4233 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004234
4235 fail:
4236 kfree(peers_ch);
4237 kfree(response);
4238 kfree(right_response);
4239
4240 return rv;
4241}
4242#endif
4243
4244int drbdd_init(struct drbd_thread *thi)
4245{
Philipp Reisner392c8802011-02-09 10:33:31 +01004246 struct drbd_tconn *tconn = thi->tconn;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004247 int h;
4248
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004249 conn_info(tconn, "receiver (re)started\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004250
4251 do {
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004252 h = drbd_connect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004253 if (h == 0) {
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004254 drbd_disconnect(tconn);
Philipp Reisner20ee6392011-01-18 15:28:59 +01004255 schedule_timeout_interruptible(HZ);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004256 }
4257 if (h == -1) {
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004258 conn_warn(tconn, "Discarding network configuration.\n");
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004259 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004260 }
4261 } while (h == 0);
4262
4263 if (h > 0) {
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004264 if (get_net_conf(tconn)) {
4265 drbdd(tconn);
4266 put_net_conf(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004267 }
4268 }
4269
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004270 drbd_disconnect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004271
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004272 conn_info(tconn, "receiver terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004273 return 0;
4274}
4275
4276/* ********* acknowledge sender ******** */
4277
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004278static int got_RqSReply(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004279{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004280 struct p_req_state_reply *p = &mdev->tconn->meta.rbuf.req_state_reply;
Philipp Reisnerfc3b10a2011-02-15 11:07:59 +01004281 struct drbd_tconn *tconn = mdev->tconn;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004282
4283 int retcode = be32_to_cpu(p->retcode);
4284
Philipp Reisnerfc3b10a2011-02-15 11:07:59 +01004285 if (cmd == P_STATE_CHG_REPLY) {
4286 if (retcode >= SS_SUCCESS) {
4287 set_bit(CL_ST_CHG_SUCCESS, &mdev->flags);
4288 } else {
4289 set_bit(CL_ST_CHG_FAIL, &mdev->flags);
4290 dev_err(DEV, "Requested state change failed by peer: %s (%d)\n",
4291 drbd_set_st_err_str(retcode), retcode);
4292 }
4293 wake_up(&mdev->state_wait);
4294 } else /* conn == P_CONN_ST_CHG_REPLY */ {
4295 if (retcode >= SS_SUCCESS) {
4296 set_bit(CONN_WD_ST_CHG_OKAY, &tconn->flags);
4297 } else {
4298 set_bit(CONN_WD_ST_CHG_FAIL, &tconn->flags);
4299 conn_err(tconn, "Requested state change failed by peer: %s (%d)\n",
4300 drbd_set_st_err_str(retcode), retcode);
4301 }
4302 wake_up(&tconn->ping_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004303 }
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004304 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004305}
4306
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004307static int got_Ping(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004308{
Philipp Reisner2a67d8b2011-02-09 14:10:32 +01004309 return drbd_send_ping_ack(mdev->tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004310
4311}
4312
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004313static int got_PingAck(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004314{
Philipp Reisner2a67d8b2011-02-09 14:10:32 +01004315 struct drbd_tconn *tconn = mdev->tconn;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004316 /* restore idle timeout */
Philipp Reisner2a67d8b2011-02-09 14:10:32 +01004317 tconn->meta.socket->sk->sk_rcvtimeo = tconn->net_conf->ping_int*HZ;
4318 if (!test_and_set_bit(GOT_PING_ACK, &tconn->flags))
4319 wake_up(&tconn->ping_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004320
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004321 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004322}
4323
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004324static int got_IsInSync(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004325{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004326 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004327 sector_t sector = be64_to_cpu(p->sector);
4328 int blksize = be32_to_cpu(p->blksize);
4329
Philipp Reisner31890f42011-01-19 14:12:51 +01004330 D_ASSERT(mdev->tconn->agreed_pro_version >= 89);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004331
4332 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4333
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004334 if (get_ldev(mdev)) {
4335 drbd_rs_complete_io(mdev, sector);
4336 drbd_set_in_sync(mdev, sector, blksize);
4337 /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
4338 mdev->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
4339 put_ldev(mdev);
4340 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004341 dec_rs_pending(mdev);
Philipp Reisner778f2712010-07-06 11:14:00 +02004342 atomic_add(blksize >> 9, &mdev->rs_sect_in);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004343
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004344 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004345}
4346
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004347static int
4348validate_req_change_req_state(struct drbd_conf *mdev, u64 id, sector_t sector,
4349 struct rb_root *root, const char *func,
4350 enum drbd_req_event what, bool missing_ok)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004351{
4352 struct drbd_request *req;
4353 struct bio_and_error m;
4354
Philipp Reisner87eeee42011-01-19 14:16:30 +01004355 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004356 req = find_request(mdev, root, id, sector, missing_ok, func);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004357 if (unlikely(!req)) {
Philipp Reisner87eeee42011-01-19 14:16:30 +01004358 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004359 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004360 }
4361 __req_mod(req, what, &m);
Philipp Reisner87eeee42011-01-19 14:16:30 +01004362 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004363
4364 if (m.bio)
4365 complete_master_bio(mdev, &m);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004366 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004367}
4368
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004369static int got_BlockAck(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004370{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004371 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004372 sector_t sector = be64_to_cpu(p->sector);
4373 int blksize = be32_to_cpu(p->blksize);
4374 enum drbd_req_event what;
4375
4376 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4377
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01004378 if (p->block_id == ID_SYNCER) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004379 drbd_set_in_sync(mdev, sector, blksize);
4380 dec_rs_pending(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004381 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004382 }
Philipp Reisner257d0af2011-01-26 12:15:29 +01004383 switch (cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004384 case P_RS_WRITE_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004385 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004386 what = WRITE_ACKED_BY_PEER_AND_SIS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004387 break;
4388 case P_WRITE_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004389 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004390 what = WRITE_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004391 break;
4392 case P_RECV_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004393 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_B);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004394 what = RECV_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004395 break;
4396 case P_DISCARD_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004397 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004398 what = CONFLICT_DISCARDED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004399 break;
4400 default:
4401 D_ASSERT(0);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004402 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004403 }
4404
4405 return validate_req_change_req_state(mdev, p->block_id, sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004406 &mdev->write_requests, __func__,
4407 what, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004408}
4409
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004410static int got_NegAck(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004411{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004412 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004413 sector_t sector = be64_to_cpu(p->sector);
Philipp Reisner2deb8332011-01-17 18:39:18 +01004414 int size = be32_to_cpu(p->blksize);
Philipp Reisner89e58e72011-01-19 13:12:45 +01004415 bool missing_ok = mdev->tconn->net_conf->wire_protocol == DRBD_PROT_A ||
4416 mdev->tconn->net_conf->wire_protocol == DRBD_PROT_B;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004417 bool found;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004418
4419 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4420
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01004421 if (p->block_id == ID_SYNCER) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004422 dec_rs_pending(mdev);
4423 drbd_rs_failed_io(mdev, sector, size);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004424 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004425 }
Philipp Reisner2deb8332011-01-17 18:39:18 +01004426
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004427 found = validate_req_change_req_state(mdev, p->block_id, sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004428 &mdev->write_requests, __func__,
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004429 NEG_ACKED, missing_ok);
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004430 if (!found) {
4431 /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs.
4432 The master bio might already be completed, therefore the
4433 request is no longer in the collision hash. */
4434 /* In Protocol B we might already have got a P_RECV_ACK
4435 but then get a P_NEG_ACK afterwards. */
4436 if (!missing_ok)
Philipp Reisner2deb8332011-01-17 18:39:18 +01004437 return false;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004438 drbd_set_out_of_sync(mdev, sector, size);
Philipp Reisner2deb8332011-01-17 18:39:18 +01004439 }
Philipp Reisner2deb8332011-01-17 18:39:18 +01004440 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004441}
4442
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004443static int got_NegDReply(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004444{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004445 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004446 sector_t sector = be64_to_cpu(p->sector);
4447
4448 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4449 dev_err(DEV, "Got NegDReply; Sector %llus, len %u; Fail original request.\n",
4450 (unsigned long long)sector, be32_to_cpu(p->blksize));
4451
4452 return validate_req_change_req_state(mdev, p->block_id, sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004453 &mdev->read_requests, __func__,
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004454 NEG_ACKED, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004455}
4456
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004457static int got_NegRSDReply(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004458{
4459 sector_t sector;
4460 int size;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004461 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004462
4463 sector = be64_to_cpu(p->sector);
4464 size = be32_to_cpu(p->blksize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004465
4466 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4467
4468 dec_rs_pending(mdev);
4469
4470 if (get_ldev_if_state(mdev, D_FAILED)) {
4471 drbd_rs_complete_io(mdev, sector);
Philipp Reisner257d0af2011-01-26 12:15:29 +01004472 switch (cmd) {
Philipp Reisnerd612d302010-12-27 10:53:28 +01004473 case P_NEG_RS_DREPLY:
4474 drbd_rs_failed_io(mdev, sector, size);
4475 case P_RS_CANCEL:
4476 break;
4477 default:
4478 D_ASSERT(0);
4479 put_ldev(mdev);
4480 return false;
4481 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004482 put_ldev(mdev);
4483 }
4484
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004485 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004486}
4487
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004488static int got_BarrierAck(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004489{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004490 struct p_barrier_ack *p = &mdev->tconn->meta.rbuf.barrier_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004491
4492 tl_release(mdev, p->barrier, be32_to_cpu(p->set_size));
4493
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02004494 if (mdev->state.conn == C_AHEAD &&
4495 atomic_read(&mdev->ap_in_flight) == 0 &&
Philipp Reisner370a43e2011-01-14 16:03:11 +01004496 !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &mdev->current_epoch->flags)) {
4497 mdev->start_resync_timer.expires = jiffies + HZ;
4498 add_timer(&mdev->start_resync_timer);
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02004499 }
4500
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004501 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004502}
4503
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004504static int got_OVResult(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004505{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004506 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004507 struct drbd_work *w;
4508 sector_t sector;
4509 int size;
4510
4511 sector = be64_to_cpu(p->sector);
4512 size = be32_to_cpu(p->blksize);
4513
4514 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4515
4516 if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
4517 drbd_ov_oos_found(mdev, sector, size);
4518 else
4519 ov_oos_print(mdev);
4520
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004521 if (!get_ldev(mdev))
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004522 return true;
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004523
Philipp Reisnerb411b362009-09-25 16:07:19 -07004524 drbd_rs_complete_io(mdev, sector);
4525 dec_rs_pending(mdev);
4526
Lars Ellenbergea5442a2010-11-05 09:48:01 +01004527 --mdev->ov_left;
4528
4529 /* let's advance progress step marks only for every other megabyte */
4530 if ((mdev->ov_left & 0x200) == 0x200)
4531 drbd_advance_rs_marks(mdev, mdev->ov_left);
4532
4533 if (mdev->ov_left == 0) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004534 w = kmalloc(sizeof(*w), GFP_NOIO);
4535 if (w) {
4536 w->cb = w_ov_finished;
Philipp Reisnera21e9292011-02-08 15:08:49 +01004537 w->mdev = mdev;
Philipp Reisnere42325a2011-01-19 13:55:45 +01004538 drbd_queue_work_front(&mdev->tconn->data.work, w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004539 } else {
4540 dev_err(DEV, "kmalloc(w) failed.");
4541 ov_oos_print(mdev);
4542 drbd_resync_finished(mdev);
4543 }
4544 }
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004545 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004546 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004547}
4548
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004549static int got_skip(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisner0ced55a2010-04-30 15:26:20 +02004550{
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004551 return true;
Philipp Reisner0ced55a2010-04-30 15:26:20 +02004552}
4553
Philipp Reisnerb411b362009-09-25 16:07:19 -07004554struct asender_cmd {
4555 size_t pkt_size;
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004556 int (*process)(struct drbd_conf *mdev, enum drbd_packet cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004557};
4558
4559static struct asender_cmd *get_asender_cmd(int cmd)
4560{
4561 static struct asender_cmd asender_tbl[] = {
4562 /* anything missing from this table is in
4563 * the drbd_cmd_handler (drbd_default_handler) table,
4564 * see the beginning of drbdd() */
Philipp Reisner257d0af2011-01-26 12:15:29 +01004565 [P_PING] = { sizeof(struct p_header), got_Ping },
4566 [P_PING_ACK] = { sizeof(struct p_header), got_PingAck },
Philipp Reisnerb411b362009-09-25 16:07:19 -07004567 [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
4568 [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
4569 [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
4570 [P_DISCARD_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
4571 [P_NEG_ACK] = { sizeof(struct p_block_ack), got_NegAck },
4572 [P_NEG_DREPLY] = { sizeof(struct p_block_ack), got_NegDReply },
4573 [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), got_NegRSDReply},
4574 [P_OV_RESULT] = { sizeof(struct p_block_ack), got_OVResult },
4575 [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck },
4576 [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
4577 [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync },
Philipp Reisner02918be2010-08-20 14:35:10 +02004578 [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip },
Philipp Reisnerd612d302010-12-27 10:53:28 +01004579 [P_RS_CANCEL] = { sizeof(struct p_block_ack), got_NegRSDReply},
Philipp Reisnerfc3b10a2011-02-15 11:07:59 +01004580 [P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), got_RqSReply },
Philipp Reisnerb411b362009-09-25 16:07:19 -07004581 [P_MAX_CMD] = { 0, NULL },
4582 };
4583 if (cmd > P_MAX_CMD || asender_tbl[cmd].process == NULL)
4584 return NULL;
4585 return &asender_tbl[cmd];
4586}
4587
Philipp Reisner32862ec2011-02-08 16:41:01 +01004588static int _drbd_process_done_ee(int vnr, void *p, void *data)
4589{
4590 struct drbd_conf *mdev = (struct drbd_conf *)p;
4591 return !drbd_process_done_ee(mdev);
4592}
4593
4594static int _check_ee_empty(int vnr, void *p, void *data)
4595{
4596 struct drbd_conf *mdev = (struct drbd_conf *)p;
4597 struct drbd_tconn *tconn = mdev->tconn;
4598 int not_empty;
4599
4600 spin_lock_irq(&tconn->req_lock);
4601 not_empty = !list_empty(&mdev->done_ee);
4602 spin_unlock_irq(&tconn->req_lock);
4603
4604 return not_empty;
4605}
4606
4607static int tconn_process_done_ee(struct drbd_tconn *tconn)
4608{
4609 int not_empty, err;
4610
4611 do {
4612 clear_bit(SIGNAL_ASENDER, &tconn->flags);
4613 flush_signals(current);
4614 err = idr_for_each(&tconn->volumes, _drbd_process_done_ee, NULL);
4615 if (err)
4616 return err;
4617 set_bit(SIGNAL_ASENDER, &tconn->flags);
4618 not_empty = idr_for_each(&tconn->volumes, _check_ee_empty, NULL);
4619 } while (not_empty);
4620
4621 return 0;
4622}
4623
Philipp Reisnerb411b362009-09-25 16:07:19 -07004624int drbd_asender(struct drbd_thread *thi)
4625{
Philipp Reisner392c8802011-02-09 10:33:31 +01004626 struct drbd_tconn *tconn = thi->tconn;
Philipp Reisner32862ec2011-02-08 16:41:01 +01004627 struct p_header *h = &tconn->meta.rbuf.header;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004628 struct asender_cmd *cmd = NULL;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004629 struct packet_info pi;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004630 int rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004631 void *buf = h;
4632 int received = 0;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004633 int expect = sizeof(struct p_header);
Lars Ellenbergf36af182011-03-09 22:44:55 +01004634 int ping_timeout_active = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004635
Philipp Reisnerb411b362009-09-25 16:07:19 -07004636 current->policy = SCHED_RR; /* Make this a realtime task! */
4637 current->rt_priority = 2; /* more important than all other tasks */
4638
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +01004639 while (get_t_state(thi) == RUNNING) {
Philipp Reisner80822282011-02-08 12:46:30 +01004640 drbd_thread_current_set_cpu(thi);
Philipp Reisner32862ec2011-02-08 16:41:01 +01004641 if (test_and_clear_bit(SEND_PING, &tconn->flags)) {
Philipp Reisner2a67d8b2011-02-09 14:10:32 +01004642 if (!drbd_send_ping(tconn)) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004643 conn_err(tconn, "drbd_send_ping has failed\n");
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01004644 goto reconnect;
4645 }
Philipp Reisner32862ec2011-02-08 16:41:01 +01004646 tconn->meta.socket->sk->sk_rcvtimeo =
4647 tconn->net_conf->ping_timeo*HZ/10;
Lars Ellenbergf36af182011-03-09 22:44:55 +01004648 ping_timeout_active = 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004649 }
4650
Philipp Reisner32862ec2011-02-08 16:41:01 +01004651 /* TODO: conditionally cork; it may hurt latency if we cork without
4652 much to send */
4653 if (!tconn->net_conf->no_cork)
4654 drbd_tcp_cork(tconn->meta.socket);
4655 if (tconn_process_done_ee(tconn))
4656 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004657 /* but unconditionally uncork unless disabled */
Philipp Reisner32862ec2011-02-08 16:41:01 +01004658 if (!tconn->net_conf->no_cork)
4659 drbd_tcp_uncork(tconn->meta.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004660
4661 /* short circuit, recv_msg would return EINTR anyways. */
4662 if (signal_pending(current))
4663 continue;
4664
Philipp Reisner32862ec2011-02-08 16:41:01 +01004665 rv = drbd_recv_short(tconn->meta.socket, buf, expect-received, 0);
4666 clear_bit(SIGNAL_ASENDER, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004667
4668 flush_signals(current);
4669
4670 /* Note:
4671 * -EINTR (on meta) we got a signal
4672 * -EAGAIN (on meta) rcvtimeo expired
4673 * -ECONNRESET other side closed the connection
4674 * -ERESTARTSYS (on data) we got a signal
4675 * rv < 0 other than above: unexpected error!
4676 * rv == expected: full header or command
4677 * rv < expected: "woken" by signal during receive
4678 * rv == 0 : "connection shut down by peer"
4679 */
4680 if (likely(rv > 0)) {
4681 received += rv;
4682 buf += rv;
4683 } else if (rv == 0) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004684 conn_err(tconn, "meta connection shut down by peer.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004685 goto reconnect;
4686 } else if (rv == -EAGAIN) {
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02004687 /* If the data socket received something meanwhile,
4688 * that is good enough: peer is still alive. */
Philipp Reisner32862ec2011-02-08 16:41:01 +01004689 if (time_after(tconn->last_received,
4690 jiffies - tconn->meta.socket->sk->sk_rcvtimeo))
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02004691 continue;
Lars Ellenbergf36af182011-03-09 22:44:55 +01004692 if (ping_timeout_active) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004693 conn_err(tconn, "PingAck did not arrive in time.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004694 goto reconnect;
4695 }
Philipp Reisner32862ec2011-02-08 16:41:01 +01004696 set_bit(SEND_PING, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004697 continue;
4698 } else if (rv == -EINTR) {
4699 continue;
4700 } else {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004701 conn_err(tconn, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004702 goto reconnect;
4703 }
4704
4705 if (received == expect && cmd == NULL) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004706 if (!decode_header(tconn, h, &pi))
Philipp Reisnerb411b362009-09-25 16:07:19 -07004707 goto reconnect;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004708 cmd = get_asender_cmd(pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004709 if (unlikely(cmd == NULL)) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004710 conn_err(tconn, "unknown command %d on meta (l: %d)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004711 pi.cmd, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004712 goto disconnect;
4713 }
4714 expect = cmd->pkt_size;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004715 if (pi.size != expect - sizeof(struct p_header)) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004716 conn_err(tconn, "Wrong packet size on meta (c: %d, l: %d)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004717 pi.cmd, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004718 goto reconnect;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004719 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004720 }
4721 if (received == expect) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004722 tconn->last_received = jiffies;
4723 if (!cmd->process(vnr_to_mdev(tconn, pi.vnr), pi.cmd))
Philipp Reisnerb411b362009-09-25 16:07:19 -07004724 goto reconnect;
4725
Lars Ellenbergf36af182011-03-09 22:44:55 +01004726 /* the idle_timeout (ping-int)
4727 * has been restored in got_PingAck() */
4728 if (cmd == get_asender_cmd(P_PING_ACK))
4729 ping_timeout_active = 0;
4730
Philipp Reisnerb411b362009-09-25 16:07:19 -07004731 buf = h;
4732 received = 0;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004733 expect = sizeof(struct p_header);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004734 cmd = NULL;
4735 }
4736 }
4737
4738 if (0) {
4739reconnect:
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004740 conn_request_state(tconn, NS(conn, C_NETWORK_FAILURE), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004741 }
4742 if (0) {
4743disconnect:
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004744 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004745 }
Philipp Reisner32862ec2011-02-08 16:41:01 +01004746 clear_bit(SIGNAL_ASENDER, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004747
Philipp Reisner32862ec2011-02-08 16:41:01 +01004748 conn_info(tconn, "asender terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004749
4750 return 0;
4751}