blob: f21b0efff6d8cdf32f3130f27d62637e90162269 [file] [log] [blame]
Philipp Reisnerb411b362009-09-25 16:07:19 -07001/*
2 drbd_receiver.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23 */
24
25
Philipp Reisnerb411b362009-09-25 16:07:19 -070026#include <linux/module.h>
27
28#include <asm/uaccess.h>
29#include <net/sock.h>
30
Philipp Reisnerb411b362009-09-25 16:07:19 -070031#include <linux/drbd.h>
32#include <linux/fs.h>
33#include <linux/file.h>
34#include <linux/in.h>
35#include <linux/mm.h>
36#include <linux/memcontrol.h>
37#include <linux/mm_inline.h>
38#include <linux/slab.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070039#include <linux/pkt_sched.h>
40#define __KERNEL_SYSCALLS__
41#include <linux/unistd.h>
42#include <linux/vmalloc.h>
43#include <linux/random.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070044#include <linux/string.h>
45#include <linux/scatterlist.h>
46#include "drbd_int.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070047#include "drbd_req.h"
48
49#include "drbd_vli.h"
50
Philipp Reisner77351055b2011-02-07 17:24:26 +010051struct packet_info {
52 enum drbd_packet cmd;
53 int size;
54 int vnr;
55};
56
Philipp Reisnerb411b362009-09-25 16:07:19 -070057enum finish_epoch {
58 FE_STILL_LIVE,
59 FE_DESTROYED,
60 FE_RECYCLED,
61};
62
Philipp Reisner65d11ed2011-02-07 17:35:59 +010063static int drbd_do_handshake(struct drbd_tconn *tconn);
Philipp Reisner13e60372011-02-08 09:54:40 +010064static int drbd_do_auth(struct drbd_tconn *tconn);
Philipp Reisner360cc742011-02-08 14:29:53 +010065static int drbd_disconnected(int vnr, void *p, void *data);
Philipp Reisnerb411b362009-09-25 16:07:19 -070066
67static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *, struct drbd_epoch *, enum epoch_event);
68static int e_end_block(struct drbd_conf *, struct drbd_work *, int);
69
Philipp Reisnerb411b362009-09-25 16:07:19 -070070
71#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
72
Lars Ellenberg45bb9122010-05-14 17:10:48 +020073/*
74 * some helper functions to deal with single linked page lists,
75 * page->private being our "next" pointer.
76 */
77
78/* If at least n pages are linked at head, get n pages off.
79 * Otherwise, don't modify head, and return NULL.
80 * Locking is the responsibility of the caller.
81 */
82static struct page *page_chain_del(struct page **head, int n)
83{
84 struct page *page;
85 struct page *tmp;
86
87 BUG_ON(!n);
88 BUG_ON(!head);
89
90 page = *head;
Philipp Reisner23ce4222010-05-20 13:35:31 +020091
92 if (!page)
93 return NULL;
94
Lars Ellenberg45bb9122010-05-14 17:10:48 +020095 while (page) {
96 tmp = page_chain_next(page);
97 if (--n == 0)
98 break; /* found sufficient pages */
99 if (tmp == NULL)
100 /* insufficient pages, don't use any of them. */
101 return NULL;
102 page = tmp;
103 }
104
105 /* add end of list marker for the returned list */
106 set_page_private(page, 0);
107 /* actual return value, and adjustment of head */
108 page = *head;
109 *head = tmp;
110 return page;
111}
112
113/* may be used outside of locks to find the tail of a (usually short)
114 * "private" page chain, before adding it back to a global chain head
115 * with page_chain_add() under a spinlock. */
116static struct page *page_chain_tail(struct page *page, int *len)
117{
118 struct page *tmp;
119 int i = 1;
120 while ((tmp = page_chain_next(page)))
121 ++i, page = tmp;
122 if (len)
123 *len = i;
124 return page;
125}
126
127static int page_chain_free(struct page *page)
128{
129 struct page *tmp;
130 int i = 0;
131 page_chain_for_each_safe(page, tmp) {
132 put_page(page);
133 ++i;
134 }
135 return i;
136}
137
138static void page_chain_add(struct page **head,
139 struct page *chain_first, struct page *chain_last)
140{
141#if 1
142 struct page *tmp;
143 tmp = page_chain_tail(chain_first, NULL);
144 BUG_ON(tmp != chain_last);
145#endif
146
147 /* add chain to head */
148 set_page_private(chain_last, (unsigned long)*head);
149 *head = chain_first;
150}
151
152static struct page *drbd_pp_first_pages_or_try_alloc(struct drbd_conf *mdev, int number)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700153{
154 struct page *page = NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200155 struct page *tmp = NULL;
156 int i = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700157
158 /* Yes, testing drbd_pp_vacant outside the lock is racy.
159 * So what. It saves a spin_lock. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200160 if (drbd_pp_vacant >= number) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700161 spin_lock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200162 page = page_chain_del(&drbd_pp_pool, number);
163 if (page)
164 drbd_pp_vacant -= number;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700165 spin_unlock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200166 if (page)
167 return page;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700168 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200169
Philipp Reisnerb411b362009-09-25 16:07:19 -0700170 /* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD
171 * "criss-cross" setup, that might cause write-out on some other DRBD,
172 * which in turn might block on the other node at this very place. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200173 for (i = 0; i < number; i++) {
174 tmp = alloc_page(GFP_TRY);
175 if (!tmp)
176 break;
177 set_page_private(tmp, (unsigned long)page);
178 page = tmp;
179 }
180
181 if (i == number)
182 return page;
183
184 /* Not enough pages immediately available this time.
185 * No need to jump around here, drbd_pp_alloc will retry this
186 * function "soon". */
187 if (page) {
188 tmp = page_chain_tail(page, NULL);
189 spin_lock(&drbd_pp_lock);
190 page_chain_add(&drbd_pp_pool, page, tmp);
191 drbd_pp_vacant += i;
192 spin_unlock(&drbd_pp_lock);
193 }
194 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700195}
196
Philipp Reisnerb411b362009-09-25 16:07:19 -0700197static void reclaim_net_ee(struct drbd_conf *mdev, struct list_head *to_be_freed)
198{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100199 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700200 struct list_head *le, *tle;
201
202 /* The EEs are always appended to the end of the list. Since
203 they are sent in order over the wire, they have to finish
204 in order. As soon as we see the first not finished we can
205 stop to examine the list... */
206
207 list_for_each_safe(le, tle, &mdev->net_ee) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100208 peer_req = list_entry(le, struct drbd_peer_request, w.list);
209 if (drbd_ee_has_active_page(peer_req))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700210 break;
211 list_move(le, to_be_freed);
212 }
213}
214
215static void drbd_kick_lo_and_reclaim_net(struct drbd_conf *mdev)
216{
217 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100218 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700219
Philipp Reisner87eeee42011-01-19 14:16:30 +0100220 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700221 reclaim_net_ee(mdev, &reclaimed);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100222 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700223
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100224 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
225 drbd_free_net_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700226}
227
228/**
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200229 * drbd_pp_alloc() - Returns @number pages, retries forever (or until signalled)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700230 * @mdev: DRBD device.
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200231 * @number: number of pages requested
232 * @retry: whether to retry, if not enough pages are available right now
Philipp Reisnerb411b362009-09-25 16:07:19 -0700233 *
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200234 * Tries to allocate number pages, first from our own page pool, then from
235 * the kernel, unless this allocation would exceed the max_buffers setting.
236 * Possibly retry until DRBD frees sufficient pages somewhere else.
237 *
238 * Returns a page chain linked via page->private.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700239 */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200240static struct page *drbd_pp_alloc(struct drbd_conf *mdev, unsigned number, bool retry)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700241{
242 struct page *page = NULL;
243 DEFINE_WAIT(wait);
244
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200245 /* Yes, we may run up to @number over max_buffers. If we
246 * follow it strictly, the admin will get it wrong anyways. */
Philipp Reisner89e58e72011-01-19 13:12:45 +0100247 if (atomic_read(&mdev->pp_in_use) < mdev->tconn->net_conf->max_buffers)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200248 page = drbd_pp_first_pages_or_try_alloc(mdev, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700249
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200250 while (page == NULL) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700251 prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
252
253 drbd_kick_lo_and_reclaim_net(mdev);
254
Philipp Reisner89e58e72011-01-19 13:12:45 +0100255 if (atomic_read(&mdev->pp_in_use) < mdev->tconn->net_conf->max_buffers) {
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200256 page = drbd_pp_first_pages_or_try_alloc(mdev, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700257 if (page)
258 break;
259 }
260
261 if (!retry)
262 break;
263
264 if (signal_pending(current)) {
265 dev_warn(DEV, "drbd_pp_alloc interrupted!\n");
266 break;
267 }
268
269 schedule();
270 }
271 finish_wait(&drbd_pp_wait, &wait);
272
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200273 if (page)
274 atomic_add(number, &mdev->pp_in_use);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700275 return page;
276}
277
278/* Must not be used from irq, as that may deadlock: see drbd_pp_alloc.
Philipp Reisner87eeee42011-01-19 14:16:30 +0100279 * Is also used from inside an other spin_lock_irq(&mdev->tconn->req_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200280 * Either links the page chain back to the global pool,
281 * or returns all pages to the system. */
Lars Ellenberg435f0742010-09-06 12:30:25 +0200282static void drbd_pp_free(struct drbd_conf *mdev, struct page *page, int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700283{
Lars Ellenberg435f0742010-09-06 12:30:25 +0200284 atomic_t *a = is_net ? &mdev->pp_in_use_by_net : &mdev->pp_in_use;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700285 int i;
Lars Ellenberg435f0742010-09-06 12:30:25 +0200286
Lars Ellenberg1816a2b2010-11-11 15:19:07 +0100287 if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE)*minor_count)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200288 i = page_chain_free(page);
289 else {
290 struct page *tmp;
291 tmp = page_chain_tail(page, &i);
292 spin_lock(&drbd_pp_lock);
293 page_chain_add(&drbd_pp_pool, page, tmp);
294 drbd_pp_vacant += i;
295 spin_unlock(&drbd_pp_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700296 }
Lars Ellenberg435f0742010-09-06 12:30:25 +0200297 i = atomic_sub_return(i, a);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200298 if (i < 0)
Lars Ellenberg435f0742010-09-06 12:30:25 +0200299 dev_warn(DEV, "ASSERTION FAILED: %s: %d < 0\n",
300 is_net ? "pp_in_use_by_net" : "pp_in_use", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700301 wake_up(&drbd_pp_wait);
302}
303
304/*
305You need to hold the req_lock:
306 _drbd_wait_ee_list_empty()
307
308You must not have the req_lock:
309 drbd_free_ee()
310 drbd_alloc_ee()
311 drbd_init_ee()
312 drbd_release_ee()
313 drbd_ee_fix_bhs()
314 drbd_process_done_ee()
315 drbd_clear_done_ee()
316 drbd_wait_ee_list_empty()
317*/
318
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100319struct drbd_peer_request *
320drbd_alloc_ee(struct drbd_conf *mdev, u64 id, sector_t sector,
321 unsigned int data_size, gfp_t gfp_mask) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700322{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100323 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700324 struct page *page;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200325 unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700326
Andreas Gruenbacher0cf9d272010-12-07 10:43:29 +0100327 if (drbd_insert_fault(mdev, DRBD_FAULT_AL_EE))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700328 return NULL;
329
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100330 peer_req = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
331 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700332 if (!(gfp_mask & __GFP_NOWARN))
333 dev_err(DEV, "alloc_ee: Allocation of an EE failed\n");
334 return NULL;
335 }
336
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200337 page = drbd_pp_alloc(mdev, nr_pages, (gfp_mask & __GFP_WAIT));
338 if (!page)
339 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700340
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100341 drbd_clear_interval(&peer_req->i);
342 peer_req->i.size = data_size;
343 peer_req->i.sector = sector;
344 peer_req->i.local = false;
345 peer_req->i.waiting = false;
Andreas Gruenbacher53840642011-01-28 10:31:04 +0100346
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100347 peer_req->epoch = NULL;
Philipp Reisnera21e9292011-02-08 15:08:49 +0100348 peer_req->w.mdev = mdev;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100349 peer_req->pages = page;
350 atomic_set(&peer_req->pending_bios, 0);
351 peer_req->flags = 0;
Andreas Gruenbacher9a8e7752011-01-11 14:04:09 +0100352 /*
353 * The block_id is opaque to the receiver. It is not endianness
354 * converted, and sent back to the sender unchanged.
355 */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100356 peer_req->block_id = id;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700357
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100358 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700359
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200360 fail:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100361 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700362 return NULL;
363}
364
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100365void drbd_free_some_ee(struct drbd_conf *mdev, struct drbd_peer_request *peer_req,
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100366 int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700367{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100368 if (peer_req->flags & EE_HAS_DIGEST)
369 kfree(peer_req->digest);
370 drbd_pp_free(mdev, peer_req->pages, is_net);
371 D_ASSERT(atomic_read(&peer_req->pending_bios) == 0);
372 D_ASSERT(drbd_interval_empty(&peer_req->i));
373 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700374}
375
376int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list)
377{
378 LIST_HEAD(work_list);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100379 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700380 int count = 0;
Lars Ellenberg435f0742010-09-06 12:30:25 +0200381 int is_net = list == &mdev->net_ee;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700382
Philipp Reisner87eeee42011-01-19 14:16:30 +0100383 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700384 list_splice_init(list, &work_list);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100385 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700386
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100387 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
388 drbd_free_some_ee(mdev, peer_req, is_net);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700389 count++;
390 }
391 return count;
392}
393
394
Philipp Reisner32862ec2011-02-08 16:41:01 +0100395/* See also comments in _req_mod(,BARRIER_ACKED)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700396 * and receive_Barrier.
397 *
398 * Move entries from net_ee to done_ee, if ready.
399 * Grab done_ee, call all callbacks, free the entries.
400 * The callbacks typically send out ACKs.
401 */
402static int drbd_process_done_ee(struct drbd_conf *mdev)
403{
404 LIST_HEAD(work_list);
405 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100406 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700407 int ok = (mdev->state.conn >= C_WF_REPORT_PARAMS);
408
Philipp Reisner87eeee42011-01-19 14:16:30 +0100409 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700410 reclaim_net_ee(mdev, &reclaimed);
411 list_splice_init(&mdev->done_ee, &work_list);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100412 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700413
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100414 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
415 drbd_free_net_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700416
417 /* possible callbacks here:
418 * e_end_block, and e_end_resync_block, e_send_discard_ack.
419 * all ignore the last argument.
420 */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100421 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700422 /* list_del not necessary, next/prev members not touched */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100423 ok = peer_req->w.cb(mdev, &peer_req->w, !ok) && ok;
424 drbd_free_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700425 }
426 wake_up(&mdev->ee_wait);
427
428 return ok;
429}
430
431void _drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head)
432{
433 DEFINE_WAIT(wait);
434
435 /* avoids spin_lock/unlock
436 * and calling prepare_to_wait in the fast path */
437 while (!list_empty(head)) {
438 prepare_to_wait(&mdev->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100439 spin_unlock_irq(&mdev->tconn->req_lock);
Jens Axboe7eaceac2011-03-10 08:52:07 +0100440 io_schedule();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700441 finish_wait(&mdev->ee_wait, &wait);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100442 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700443 }
444}
445
446void drbd_wait_ee_list_empty(struct drbd_conf *mdev, struct list_head *head)
447{
Philipp Reisner87eeee42011-01-19 14:16:30 +0100448 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700449 _drbd_wait_ee_list_empty(mdev, head);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100450 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700451}
452
453/* see also kernel_accept; which is only present since 2.6.18.
454 * also we want to log which part of it failed, exactly */
Philipp Reisner76536202011-02-07 14:09:54 +0100455static int drbd_accept(const char **what, struct socket *sock, struct socket **newsock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700456{
457 struct sock *sk = sock->sk;
458 int err = 0;
459
460 *what = "listen";
461 err = sock->ops->listen(sock, 5);
462 if (err < 0)
463 goto out;
464
465 *what = "sock_create_lite";
466 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
467 newsock);
468 if (err < 0)
469 goto out;
470
471 *what = "accept";
472 err = sock->ops->accept(sock, *newsock, 0);
473 if (err < 0) {
474 sock_release(*newsock);
475 *newsock = NULL;
476 goto out;
477 }
478 (*newsock)->ops = sock->ops;
479
480out:
481 return err;
482}
483
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100484static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700485{
486 mm_segment_t oldfs;
487 struct kvec iov = {
488 .iov_base = buf,
489 .iov_len = size,
490 };
491 struct msghdr msg = {
492 .msg_iovlen = 1,
493 .msg_iov = (struct iovec *)&iov,
494 .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
495 };
496 int rv;
497
498 oldfs = get_fs();
499 set_fs(KERNEL_DS);
500 rv = sock_recvmsg(sock, &msg, size, msg.msg_flags);
501 set_fs(oldfs);
502
503 return rv;
504}
505
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100506static int drbd_recv(struct drbd_tconn *tconn, void *buf, size_t size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700507{
508 mm_segment_t oldfs;
509 struct kvec iov = {
510 .iov_base = buf,
511 .iov_len = size,
512 };
513 struct msghdr msg = {
514 .msg_iovlen = 1,
515 .msg_iov = (struct iovec *)&iov,
516 .msg_flags = MSG_WAITALL | MSG_NOSIGNAL
517 };
518 int rv;
519
520 oldfs = get_fs();
521 set_fs(KERNEL_DS);
522
523 for (;;) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100524 rv = sock_recvmsg(tconn->data.socket, &msg, size, msg.msg_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700525 if (rv == size)
526 break;
527
528 /* Note:
529 * ECONNRESET other side closed the connection
530 * ERESTARTSYS (on sock) we got a signal
531 */
532
533 if (rv < 0) {
534 if (rv == -ECONNRESET)
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100535 conn_info(tconn, "sock was reset by peer\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700536 else if (rv != -ERESTARTSYS)
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100537 conn_err(tconn, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700538 break;
539 } else if (rv == 0) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100540 conn_info(tconn, "sock was shut down by peer\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700541 break;
542 } else {
543 /* signal came in, or peer/link went down,
544 * after we read a partial message
545 */
546 /* D_ASSERT(signal_pending(current)); */
547 break;
548 }
549 };
550
551 set_fs(oldfs);
552
553 if (rv != size)
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100554 drbd_force_state(tconn->volume0, NS(conn, C_BROKEN_PIPE));
Philipp Reisnerb411b362009-09-25 16:07:19 -0700555
556 return rv;
557}
558
Lars Ellenberg5dbf1672010-05-25 16:18:01 +0200559/* quoting tcp(7):
560 * On individual connections, the socket buffer size must be set prior to the
561 * listen(2) or connect(2) calls in order to have it take effect.
562 * This is our wrapper to do so.
563 */
564static void drbd_setbufsize(struct socket *sock, unsigned int snd,
565 unsigned int rcv)
566{
567 /* open coded SO_SNDBUF, SO_RCVBUF */
568 if (snd) {
569 sock->sk->sk_sndbuf = snd;
570 sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
571 }
572 if (rcv) {
573 sock->sk->sk_rcvbuf = rcv;
574 sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
575 }
576}
577
Philipp Reisnereac3e992011-02-07 14:05:07 +0100578static struct socket *drbd_try_connect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700579{
580 const char *what;
581 struct socket *sock;
582 struct sockaddr_in6 src_in6;
583 int err;
584 int disconnect_on_error = 1;
585
Philipp Reisnereac3e992011-02-07 14:05:07 +0100586 if (!get_net_conf(tconn))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700587 return NULL;
588
589 what = "sock_create_kern";
Philipp Reisnereac3e992011-02-07 14:05:07 +0100590 err = sock_create_kern(((struct sockaddr *)tconn->net_conf->my_addr)->sa_family,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700591 SOCK_STREAM, IPPROTO_TCP, &sock);
592 if (err < 0) {
593 sock = NULL;
594 goto out;
595 }
596
597 sock->sk->sk_rcvtimeo =
Philipp Reisnereac3e992011-02-07 14:05:07 +0100598 sock->sk->sk_sndtimeo = tconn->net_conf->try_connect_int*HZ;
599 drbd_setbufsize(sock, tconn->net_conf->sndbuf_size,
600 tconn->net_conf->rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700601
602 /* explicitly bind to the configured IP as source IP
603 * for the outgoing connections.
604 * This is needed for multihomed hosts and to be
605 * able to use lo: interfaces for drbd.
606 * Make sure to use 0 as port number, so linux selects
607 * a free one dynamically.
608 */
Philipp Reisnereac3e992011-02-07 14:05:07 +0100609 memcpy(&src_in6, tconn->net_conf->my_addr,
610 min_t(int, tconn->net_conf->my_addr_len, sizeof(src_in6)));
611 if (((struct sockaddr *)tconn->net_conf->my_addr)->sa_family == AF_INET6)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700612 src_in6.sin6_port = 0;
613 else
614 ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
615
616 what = "bind before connect";
617 err = sock->ops->bind(sock,
618 (struct sockaddr *) &src_in6,
Philipp Reisnereac3e992011-02-07 14:05:07 +0100619 tconn->net_conf->my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700620 if (err < 0)
621 goto out;
622
623 /* connect may fail, peer not yet available.
624 * stay C_WF_CONNECTION, don't go Disconnecting! */
625 disconnect_on_error = 0;
626 what = "connect";
627 err = sock->ops->connect(sock,
Philipp Reisnereac3e992011-02-07 14:05:07 +0100628 (struct sockaddr *)tconn->net_conf->peer_addr,
629 tconn->net_conf->peer_addr_len, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700630
631out:
632 if (err < 0) {
633 if (sock) {
634 sock_release(sock);
635 sock = NULL;
636 }
637 switch (-err) {
638 /* timeout, busy, signal pending */
639 case ETIMEDOUT: case EAGAIN: case EINPROGRESS:
640 case EINTR: case ERESTARTSYS:
641 /* peer not (yet) available, network problem */
642 case ECONNREFUSED: case ENETUNREACH:
643 case EHOSTDOWN: case EHOSTUNREACH:
644 disconnect_on_error = 0;
645 break;
646 default:
Philipp Reisnereac3e992011-02-07 14:05:07 +0100647 conn_err(tconn, "%s failed, err = %d\n", what, err);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700648 }
649 if (disconnect_on_error)
Philipp Reisnereac3e992011-02-07 14:05:07 +0100650 drbd_force_state(tconn->volume0, NS(conn, C_DISCONNECTING));
Philipp Reisnerb411b362009-09-25 16:07:19 -0700651 }
Philipp Reisnereac3e992011-02-07 14:05:07 +0100652 put_net_conf(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700653 return sock;
654}
655
Philipp Reisner76536202011-02-07 14:09:54 +0100656static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700657{
658 int timeo, err;
659 struct socket *s_estab = NULL, *s_listen;
660 const char *what;
661
Philipp Reisner76536202011-02-07 14:09:54 +0100662 if (!get_net_conf(tconn))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700663 return NULL;
664
665 what = "sock_create_kern";
Philipp Reisner76536202011-02-07 14:09:54 +0100666 err = sock_create_kern(((struct sockaddr *)tconn->net_conf->my_addr)->sa_family,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700667 SOCK_STREAM, IPPROTO_TCP, &s_listen);
668 if (err) {
669 s_listen = NULL;
670 goto out;
671 }
672
Philipp Reisner76536202011-02-07 14:09:54 +0100673 timeo = tconn->net_conf->try_connect_int * HZ;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700674 timeo += (random32() & 1) ? timeo / 7 : -timeo / 7; /* 28.5% random jitter */
675
676 s_listen->sk->sk_reuse = 1; /* SO_REUSEADDR */
677 s_listen->sk->sk_rcvtimeo = timeo;
678 s_listen->sk->sk_sndtimeo = timeo;
Philipp Reisner76536202011-02-07 14:09:54 +0100679 drbd_setbufsize(s_listen, tconn->net_conf->sndbuf_size,
680 tconn->net_conf->rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700681
682 what = "bind before listen";
683 err = s_listen->ops->bind(s_listen,
Philipp Reisner76536202011-02-07 14:09:54 +0100684 (struct sockaddr *) tconn->net_conf->my_addr,
685 tconn->net_conf->my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700686 if (err < 0)
687 goto out;
688
Philipp Reisner76536202011-02-07 14:09:54 +0100689 err = drbd_accept(&what, s_listen, &s_estab);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700690
691out:
692 if (s_listen)
693 sock_release(s_listen);
694 if (err < 0) {
695 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
Philipp Reisner76536202011-02-07 14:09:54 +0100696 conn_err(tconn, "%s failed, err = %d\n", what, err);
697 drbd_force_state(tconn->volume0, NS(conn, C_DISCONNECTING));
Philipp Reisnerb411b362009-09-25 16:07:19 -0700698 }
699 }
Philipp Reisner76536202011-02-07 14:09:54 +0100700 put_net_conf(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700701
702 return s_estab;
703}
704
Philipp Reisnerd38e7872011-02-07 15:32:04 +0100705static int drbd_send_fp(struct drbd_tconn *tconn, struct socket *sock, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700706{
Philipp Reisnerd38e7872011-02-07 15:32:04 +0100707 struct p_header *h = &tconn->data.sbuf.header;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700708
Philipp Reisnerd38e7872011-02-07 15:32:04 +0100709 return _conn_send_cmd(tconn, 0, sock, cmd, h, sizeof(*h), 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700710}
711
Philipp Reisnera25b63f2011-02-07 15:43:45 +0100712static enum drbd_packet drbd_recv_fp(struct drbd_tconn *tconn, struct socket *sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700713{
Philipp Reisnera25b63f2011-02-07 15:43:45 +0100714 struct p_header80 *h = &tconn->data.rbuf.header.h80;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700715 int rr;
716
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100717 rr = drbd_recv_short(sock, h, sizeof(*h), 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700718
Andreas Gruenbacherca9bc122011-01-11 13:47:24 +0100719 if (rr == sizeof(*h) && h->magic == cpu_to_be32(DRBD_MAGIC))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700720 return be16_to_cpu(h->command);
721
722 return 0xffff;
723}
724
725/**
726 * drbd_socket_okay() - Free the socket if its connection is not okay
Philipp Reisnerb411b362009-09-25 16:07:19 -0700727 * @sock: pointer to the pointer to the socket.
728 */
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100729static int drbd_socket_okay(struct socket **sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700730{
731 int rr;
732 char tb[4];
733
734 if (!*sock)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100735 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700736
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100737 rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700738
739 if (rr > 0 || rr == -EAGAIN) {
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100740 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700741 } else {
742 sock_release(*sock);
743 *sock = NULL;
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100744 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700745 }
746}
747
Philipp Reisner907599e2011-02-08 11:25:37 +0100748static int drbd_connected(int vnr, void *p, void *data)
749{
750 struct drbd_conf *mdev = (struct drbd_conf *)p;
751 int ok = 1;
752
753 atomic_set(&mdev->packet_seq, 0);
754 mdev->peer_seq = 0;
755
756 ok &= drbd_send_sync_param(mdev, &mdev->sync_conf);
757 ok &= drbd_send_sizes(mdev, 0, 0);
758 ok &= drbd_send_uuids(mdev);
759 ok &= drbd_send_state(mdev);
760 clear_bit(USE_DEGR_WFC_T, &mdev->flags);
761 clear_bit(RESIZE_PENDING, &mdev->flags);
762
763 return !ok;
764}
765
Philipp Reisnerb411b362009-09-25 16:07:19 -0700766/*
767 * return values:
768 * 1 yes, we have a valid connection
769 * 0 oops, did not work out, please try again
770 * -1 peer talks different language,
771 * no point in trying again, please go standalone.
772 * -2 We do not have a network config...
773 */
Philipp Reisner907599e2011-02-08 11:25:37 +0100774static int drbd_connect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700775{
776 struct socket *s, *sock, *msock;
777 int try, h, ok;
778
Philipp Reisner907599e2011-02-08 11:25:37 +0100779 if (drbd_request_state(tconn->volume0, NS(conn, C_WF_CONNECTION)) < SS_SUCCESS)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700780 return -2;
781
Philipp Reisner907599e2011-02-08 11:25:37 +0100782 clear_bit(DISCARD_CONCURRENT, &tconn->flags);
783 tconn->agreed_pro_version = 99;
Philipp Reisnerfd340c12011-01-19 16:57:39 +0100784 /* agreed_pro_version must be smaller than 100 so we send the old
785 header (h80) in the first packet and in the handshake packet. */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700786
787 sock = NULL;
788 msock = NULL;
789
790 do {
791 for (try = 0;;) {
792 /* 3 tries, this should take less than a second! */
Philipp Reisner907599e2011-02-08 11:25:37 +0100793 s = drbd_try_connect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700794 if (s || ++try >= 3)
795 break;
796 /* give the other side time to call bind() & listen() */
Philipp Reisner20ee6392011-01-18 15:28:59 +0100797 schedule_timeout_interruptible(HZ / 10);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700798 }
799
800 if (s) {
801 if (!sock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100802 drbd_send_fp(tconn, s, P_HAND_SHAKE_S);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700803 sock = s;
804 s = NULL;
805 } else if (!msock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100806 drbd_send_fp(tconn, s, P_HAND_SHAKE_M);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700807 msock = s;
808 s = NULL;
809 } else {
Philipp Reisner907599e2011-02-08 11:25:37 +0100810 conn_err(tconn, "Logic error in drbd_connect()\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700811 goto out_release_sockets;
812 }
813 }
814
815 if (sock && msock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100816 schedule_timeout_interruptible(tconn->net_conf->ping_timeo*HZ/10);
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100817 ok = drbd_socket_okay(&sock);
818 ok = drbd_socket_okay(&msock) && ok;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700819 if (ok)
820 break;
821 }
822
823retry:
Philipp Reisner907599e2011-02-08 11:25:37 +0100824 s = drbd_wait_for_connect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700825 if (s) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100826 try = drbd_recv_fp(tconn, s);
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100827 drbd_socket_okay(&sock);
828 drbd_socket_okay(&msock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700829 switch (try) {
830 case P_HAND_SHAKE_S:
831 if (sock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100832 conn_warn(tconn, "initial packet S crossed\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700833 sock_release(sock);
834 }
835 sock = s;
836 break;
837 case P_HAND_SHAKE_M:
838 if (msock) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100839 conn_warn(tconn, "initial packet M crossed\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700840 sock_release(msock);
841 }
842 msock = s;
Philipp Reisner907599e2011-02-08 11:25:37 +0100843 set_bit(DISCARD_CONCURRENT, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700844 break;
845 default:
Philipp Reisner907599e2011-02-08 11:25:37 +0100846 conn_warn(tconn, "Error receiving initial packet\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700847 sock_release(s);
848 if (random32() & 1)
849 goto retry;
850 }
851 }
852
Philipp Reisner907599e2011-02-08 11:25:37 +0100853 if (tconn->volume0->state.conn <= C_DISCONNECTING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700854 goto out_release_sockets;
855 if (signal_pending(current)) {
856 flush_signals(current);
857 smp_rmb();
Philipp Reisner907599e2011-02-08 11:25:37 +0100858 if (get_t_state(&tconn->receiver) == EXITING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700859 goto out_release_sockets;
860 }
861
862 if (sock && msock) {
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100863 ok = drbd_socket_okay(&sock);
864 ok = drbd_socket_okay(&msock) && ok;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700865 if (ok)
866 break;
867 }
868 } while (1);
869
870 msock->sk->sk_reuse = 1; /* SO_REUSEADDR */
871 sock->sk->sk_reuse = 1; /* SO_REUSEADDR */
872
873 sock->sk->sk_allocation = GFP_NOIO;
874 msock->sk->sk_allocation = GFP_NOIO;
875
876 sock->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
877 msock->sk->sk_priority = TC_PRIO_INTERACTIVE;
878
Philipp Reisnerb411b362009-09-25 16:07:19 -0700879 /* NOT YET ...
Philipp Reisner907599e2011-02-08 11:25:37 +0100880 * sock->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700881 * sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
882 * first set it to the P_HAND_SHAKE timeout,
883 * which we set to 4x the configured ping_timeout. */
884 sock->sk->sk_sndtimeo =
Philipp Reisner907599e2011-02-08 11:25:37 +0100885 sock->sk->sk_rcvtimeo = tconn->net_conf->ping_timeo*4*HZ/10;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700886
Philipp Reisner907599e2011-02-08 11:25:37 +0100887 msock->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10;
888 msock->sk->sk_rcvtimeo = tconn->net_conf->ping_int*HZ;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700889
890 /* we don't want delays.
Lucas De Marchi25985ed2011-03-30 22:57:33 -0300891 * we use TCP_CORK where appropriate, though */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700892 drbd_tcp_nodelay(sock);
893 drbd_tcp_nodelay(msock);
894
Philipp Reisner907599e2011-02-08 11:25:37 +0100895 tconn->data.socket = sock;
896 tconn->meta.socket = msock;
897 tconn->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700898
Philipp Reisner907599e2011-02-08 11:25:37 +0100899 h = drbd_do_handshake(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700900 if (h <= 0)
901 return h;
902
Philipp Reisner907599e2011-02-08 11:25:37 +0100903 if (tconn->cram_hmac_tfm) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700904 /* drbd_request_state(mdev, NS(conn, WFAuth)); */
Philipp Reisner907599e2011-02-08 11:25:37 +0100905 switch (drbd_do_auth(tconn)) {
Johannes Thomab10d96c2010-01-07 16:02:50 +0100906 case -1:
Philipp Reisner907599e2011-02-08 11:25:37 +0100907 conn_err(tconn, "Authentication of peer failed\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700908 return -1;
Johannes Thomab10d96c2010-01-07 16:02:50 +0100909 case 0:
Philipp Reisner907599e2011-02-08 11:25:37 +0100910 conn_err(tconn, "Authentication of peer failed, trying again.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +0100911 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700912 }
913 }
914
Philipp Reisner907599e2011-02-08 11:25:37 +0100915 if (drbd_request_state(tconn->volume0, NS(conn, C_WF_REPORT_PARAMS)) < SS_SUCCESS)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700916 return 0;
917
Philipp Reisner907599e2011-02-08 11:25:37 +0100918 sock->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700919 sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
920
Philipp Reisner907599e2011-02-08 11:25:37 +0100921 drbd_thread_start(&tconn->asender);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700922
Philipp Reisner907599e2011-02-08 11:25:37 +0100923 if (drbd_send_protocol(tconn) == -1)
Philipp Reisner7e2455c2010-04-22 14:50:23 +0200924 return -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700925
Philipp Reisner907599e2011-02-08 11:25:37 +0100926 return !idr_for_each(&tconn->volumes, drbd_connected, tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700927
928out_release_sockets:
929 if (sock)
930 sock_release(sock);
931 if (msock)
932 sock_release(msock);
933 return -1;
934}
935
Philipp Reisnerce243852011-02-07 17:27:47 +0100936static bool decode_header(struct drbd_tconn *tconn, struct p_header *h, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700937{
Philipp Reisnerfd340c12011-01-19 16:57:39 +0100938 if (h->h80.magic == cpu_to_be32(DRBD_MAGIC)) {
Philipp Reisner77351055b2011-02-07 17:24:26 +0100939 pi->cmd = be16_to_cpu(h->h80.command);
940 pi->size = be16_to_cpu(h->h80.length);
Philipp Reisnereefc2f72011-02-08 12:55:24 +0100941 pi->vnr = 0;
Andreas Gruenbacherca9bc122011-01-11 13:47:24 +0100942 } else if (h->h95.magic == cpu_to_be16(DRBD_MAGIC_BIG)) {
Philipp Reisner77351055b2011-02-07 17:24:26 +0100943 pi->cmd = be16_to_cpu(h->h95.command);
944 pi->size = be32_to_cpu(h->h95.length) & 0x00ffffff;
945 pi->vnr = 0;
Philipp Reisner02918be2010-08-20 14:35:10 +0200946 } else {
Philipp Reisnerce243852011-02-07 17:27:47 +0100947 conn_err(tconn, "magic?? on data m: 0x%08x c: %d l: %d\n",
Lars Ellenberg004352f2010-10-05 20:13:58 +0200948 be32_to_cpu(h->h80.magic),
949 be16_to_cpu(h->h80.command),
950 be16_to_cpu(h->h80.length));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100951 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700952 }
Philipp Reisner257d0af2011-01-26 12:15:29 +0100953 return true;
954}
955
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100956static int drbd_recv_header(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisner257d0af2011-01-26 12:15:29 +0100957{
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100958 struct p_header *h = &tconn->data.rbuf.header;
Philipp Reisner257d0af2011-01-26 12:15:29 +0100959 int r;
960
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100961 r = drbd_recv(tconn, h, sizeof(*h));
Philipp Reisner257d0af2011-01-26 12:15:29 +0100962 if (unlikely(r != sizeof(*h))) {
963 if (!signal_pending(current))
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100964 conn_warn(tconn, "short read expecting header on sock: r=%d\n", r);
Philipp Reisner257d0af2011-01-26 12:15:29 +0100965 return false;
966 }
967
Philipp Reisner9ba7aa02011-02-07 17:32:41 +0100968 r = decode_header(tconn, h, pi);
969 tconn->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700970
Philipp Reisner257d0af2011-01-26 12:15:29 +0100971 return r;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700972}
973
Philipp Reisner2451fc32010-08-24 13:43:11 +0200974static void drbd_flush(struct drbd_conf *mdev)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700975{
976 int rv;
977
978 if (mdev->write_ordering >= WO_bdev_flush && get_ldev(mdev)) {
Dmitry Monakhovfbd9b092010-04-28 17:55:06 +0400979 rv = blkdev_issue_flush(mdev->ldev->backing_bdev, GFP_KERNEL,
Christoph Hellwigdd3932e2010-09-16 20:51:46 +0200980 NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700981 if (rv) {
982 dev_err(DEV, "local disk flush failed with status %d\n", rv);
983 /* would rather check on EOPNOTSUPP, but that is not reliable.
984 * don't try again for ANY return value != 0
985 * if (rv == -EOPNOTSUPP) */
986 drbd_bump_write_ordering(mdev, WO_drain_io);
987 }
988 put_ldev(mdev);
989 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700990}
991
992/**
993 * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it.
994 * @mdev: DRBD device.
995 * @epoch: Epoch object.
996 * @ev: Epoch event.
997 */
998static enum finish_epoch drbd_may_finish_epoch(struct drbd_conf *mdev,
999 struct drbd_epoch *epoch,
1000 enum epoch_event ev)
1001{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001002 int epoch_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001003 struct drbd_epoch *next_epoch;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001004 enum finish_epoch rv = FE_STILL_LIVE;
1005
1006 spin_lock(&mdev->epoch_lock);
1007 do {
1008 next_epoch = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001009
1010 epoch_size = atomic_read(&epoch->epoch_size);
1011
1012 switch (ev & ~EV_CLEANUP) {
1013 case EV_PUT:
1014 atomic_dec(&epoch->active);
1015 break;
1016 case EV_GOT_BARRIER_NR:
1017 set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001018 break;
1019 case EV_BECAME_LAST:
1020 /* nothing to do*/
1021 break;
1022 }
1023
Philipp Reisnerb411b362009-09-25 16:07:19 -07001024 if (epoch_size != 0 &&
1025 atomic_read(&epoch->active) == 0 &&
Philipp Reisner2451fc32010-08-24 13:43:11 +02001026 test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001027 if (!(ev & EV_CLEANUP)) {
1028 spin_unlock(&mdev->epoch_lock);
1029 drbd_send_b_ack(mdev, epoch->barrier_nr, epoch_size);
1030 spin_lock(&mdev->epoch_lock);
1031 }
1032 dec_unacked(mdev);
1033
1034 if (mdev->current_epoch != epoch) {
1035 next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
1036 list_del(&epoch->list);
1037 ev = EV_BECAME_LAST | (ev & EV_CLEANUP);
1038 mdev->epochs--;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001039 kfree(epoch);
1040
1041 if (rv == FE_STILL_LIVE)
1042 rv = FE_DESTROYED;
1043 } else {
1044 epoch->flags = 0;
1045 atomic_set(&epoch->epoch_size, 0);
Uwe Kleine-König698f9312010-07-02 20:41:51 +02001046 /* atomic_set(&epoch->active, 0); is already zero */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001047 if (rv == FE_STILL_LIVE)
1048 rv = FE_RECYCLED;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001049 wake_up(&mdev->ee_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001050 }
1051 }
1052
1053 if (!next_epoch)
1054 break;
1055
1056 epoch = next_epoch;
1057 } while (1);
1058
1059 spin_unlock(&mdev->epoch_lock);
1060
Philipp Reisnerb411b362009-09-25 16:07:19 -07001061 return rv;
1062}
1063
1064/**
1065 * drbd_bump_write_ordering() - Fall back to an other write ordering method
1066 * @mdev: DRBD device.
1067 * @wo: Write ordering method to try.
1068 */
1069void drbd_bump_write_ordering(struct drbd_conf *mdev, enum write_ordering_e wo) __must_hold(local)
1070{
1071 enum write_ordering_e pwo;
1072 static char *write_ordering_str[] = {
1073 [WO_none] = "none",
1074 [WO_drain_io] = "drain",
1075 [WO_bdev_flush] = "flush",
Philipp Reisnerb411b362009-09-25 16:07:19 -07001076 };
1077
1078 pwo = mdev->write_ordering;
1079 wo = min(pwo, wo);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001080 if (wo == WO_bdev_flush && mdev->ldev->dc.no_disk_flush)
1081 wo = WO_drain_io;
1082 if (wo == WO_drain_io && mdev->ldev->dc.no_disk_drain)
1083 wo = WO_none;
1084 mdev->write_ordering = wo;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001085 if (pwo != mdev->write_ordering || wo == WO_bdev_flush)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001086 dev_info(DEV, "Method to ensure write ordering: %s\n", write_ordering_str[mdev->write_ordering]);
1087}
1088
1089/**
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001090 * drbd_submit_ee()
1091 * @mdev: DRBD device.
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001092 * @peer_req: peer request
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001093 * @rw: flag field, see bio->bi_rw
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001094 *
1095 * May spread the pages to multiple bios,
1096 * depending on bio_add_page restrictions.
1097 *
1098 * Returns 0 if all bios have been submitted,
1099 * -ENOMEM if we could not allocate enough bios,
1100 * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a
1101 * single page to an empty bio (which should never happen and likely indicates
1102 * that the lower level IO stack is in some way broken). This has been observed
1103 * on certain Xen deployments.
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001104 */
1105/* TODO allocate from our own bio_set. */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001106int drbd_submit_ee(struct drbd_conf *mdev, struct drbd_peer_request *peer_req,
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +01001107 const unsigned rw, const int fault_type)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001108{
1109 struct bio *bios = NULL;
1110 struct bio *bio;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001111 struct page *page = peer_req->pages;
1112 sector_t sector = peer_req->i.sector;
1113 unsigned ds = peer_req->i.size;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001114 unsigned n_bios = 0;
1115 unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT;
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001116 int err = -ENOMEM;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001117
1118 /* In most cases, we will only need one bio. But in case the lower
1119 * level restrictions happen to be different at this offset on this
1120 * side than those of the sending peer, we may need to submit the
1121 * request in more than one bio. */
1122next_bio:
1123 bio = bio_alloc(GFP_NOIO, nr_pages);
1124 if (!bio) {
1125 dev_err(DEV, "submit_ee: Allocation of a bio failed\n");
1126 goto fail;
1127 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001128 /* > peer_req->i.sector, unless this is the first bio */
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001129 bio->bi_sector = sector;
1130 bio->bi_bdev = mdev->ldev->backing_bdev;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001131 bio->bi_rw = rw;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001132 bio->bi_private = peer_req;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001133 bio->bi_end_io = drbd_endio_sec;
1134
1135 bio->bi_next = bios;
1136 bios = bio;
1137 ++n_bios;
1138
1139 page_chain_for_each(page) {
1140 unsigned len = min_t(unsigned, ds, PAGE_SIZE);
1141 if (!bio_add_page(bio, page, len, 0)) {
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001142 /* A single page must always be possible!
1143 * But in case it fails anyways,
1144 * we deal with it, and complain (below). */
1145 if (bio->bi_vcnt == 0) {
1146 dev_err(DEV,
1147 "bio_add_page failed for len=%u, "
1148 "bi_vcnt=0 (bi_sector=%llu)\n",
1149 len, (unsigned long long)bio->bi_sector);
1150 err = -ENOSPC;
1151 goto fail;
1152 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001153 goto next_bio;
1154 }
1155 ds -= len;
1156 sector += len >> 9;
1157 --nr_pages;
1158 }
1159 D_ASSERT(page == NULL);
1160 D_ASSERT(ds == 0);
1161
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001162 atomic_set(&peer_req->pending_bios, n_bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001163 do {
1164 bio = bios;
1165 bios = bios->bi_next;
1166 bio->bi_next = NULL;
1167
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001168 drbd_generic_make_request(mdev, fault_type, bio);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001169 } while (bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001170 return 0;
1171
1172fail:
1173 while (bios) {
1174 bio = bios;
1175 bios = bios->bi_next;
1176 bio_put(bio);
1177 }
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001178 return err;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001179}
1180
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001181static void drbd_remove_epoch_entry_interval(struct drbd_conf *mdev,
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001182 struct drbd_peer_request *peer_req)
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001183{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001184 struct drbd_interval *i = &peer_req->i;
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001185
1186 drbd_remove_interval(&mdev->write_requests, i);
1187 drbd_clear_interval(i);
1188
Andreas Gruenbacher6c852be2011-02-04 15:38:52 +01001189 /* Wake up any processes waiting for this peer request to complete. */
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001190 if (i->waiting)
1191 wake_up(&mdev->misc_wait);
1192}
1193
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001194static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packet cmd,
1195 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001196{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001197 int rv;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001198 struct p_barrier *p = &mdev->tconn->data.rbuf.barrier;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001199 struct drbd_epoch *epoch;
1200
Philipp Reisnerb411b362009-09-25 16:07:19 -07001201 inc_unacked(mdev);
1202
Philipp Reisnerb411b362009-09-25 16:07:19 -07001203 mdev->current_epoch->barrier_nr = p->barrier;
1204 rv = drbd_may_finish_epoch(mdev, mdev->current_epoch, EV_GOT_BARRIER_NR);
1205
1206 /* P_BARRIER_ACK may imply that the corresponding extent is dropped from
1207 * the activity log, which means it would not be resynced in case the
1208 * R_PRIMARY crashes now.
1209 * Therefore we must send the barrier_ack after the barrier request was
1210 * completed. */
1211 switch (mdev->write_ordering) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001212 case WO_none:
1213 if (rv == FE_RECYCLED)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001214 return true;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001215
1216 /* receiver context, in the writeout path of the other node.
1217 * avoid potential distributed deadlock */
1218 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1219 if (epoch)
1220 break;
1221 else
1222 dev_warn(DEV, "Allocation of an epoch failed, slowing down\n");
1223 /* Fall through */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001224
1225 case WO_bdev_flush:
1226 case WO_drain_io:
Philipp Reisnerb411b362009-09-25 16:07:19 -07001227 drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
Philipp Reisner2451fc32010-08-24 13:43:11 +02001228 drbd_flush(mdev);
1229
1230 if (atomic_read(&mdev->current_epoch->epoch_size)) {
1231 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1232 if (epoch)
1233 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001234 }
1235
Philipp Reisner2451fc32010-08-24 13:43:11 +02001236 epoch = mdev->current_epoch;
1237 wait_event(mdev->ee_wait, atomic_read(&epoch->epoch_size) == 0);
1238
1239 D_ASSERT(atomic_read(&epoch->active) == 0);
1240 D_ASSERT(epoch->flags == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001241
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001242 return true;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001243 default:
1244 dev_err(DEV, "Strangeness in mdev->write_ordering %d\n", mdev->write_ordering);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001245 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001246 }
1247
1248 epoch->flags = 0;
1249 atomic_set(&epoch->epoch_size, 0);
1250 atomic_set(&epoch->active, 0);
1251
1252 spin_lock(&mdev->epoch_lock);
1253 if (atomic_read(&mdev->current_epoch->epoch_size)) {
1254 list_add(&epoch->list, &mdev->current_epoch->list);
1255 mdev->current_epoch = epoch;
1256 mdev->epochs++;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001257 } else {
1258 /* The current_epoch got recycled while we allocated this one... */
1259 kfree(epoch);
1260 }
1261 spin_unlock(&mdev->epoch_lock);
1262
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001263 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001264}
1265
1266/* used from receive_RSDataReply (recv_resync_read)
1267 * and from receive_Data */
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +01001268static struct drbd_peer_request *
1269read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector,
1270 int data_size) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001271{
Lars Ellenberg66660322010-04-06 12:15:04 +02001272 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001273 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001274 struct page *page;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001275 int dgs, ds, rr;
Philipp Reisnera0638452011-01-19 14:31:32 +01001276 void *dig_in = mdev->tconn->int_dig_in;
1277 void *dig_vv = mdev->tconn->int_dig_vv;
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001278 unsigned long *data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001279
Philipp Reisnera0638452011-01-19 14:31:32 +01001280 dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_r_tfm) ?
1281 crypto_hash_digestsize(mdev->tconn->integrity_r_tfm) : 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001282
1283 if (dgs) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001284 rr = drbd_recv(mdev->tconn, dig_in, dgs);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001285 if (rr != dgs) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001286 if (!signal_pending(current))
1287 dev_warn(DEV,
1288 "short read receiving data digest: read %d expected %d\n",
1289 rr, dgs);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001290 return NULL;
1291 }
1292 }
1293
1294 data_size -= dgs;
1295
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01001296 if (!expect(data_size != 0))
1297 return NULL;
1298 if (!expect(IS_ALIGNED(data_size, 512)))
1299 return NULL;
1300 if (!expect(data_size <= DRBD_MAX_BIO_SIZE))
1301 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001302
Lars Ellenberg66660322010-04-06 12:15:04 +02001303 /* even though we trust out peer,
1304 * we sometimes have to double check. */
1305 if (sector + (data_size>>9) > capacity) {
Lars Ellenbergfdda6542011-01-24 15:11:01 +01001306 dev_err(DEV, "request from peer beyond end of local disk: "
1307 "capacity: %llus < sector: %llus + size: %u\n",
Lars Ellenberg66660322010-04-06 12:15:04 +02001308 (unsigned long long)capacity,
1309 (unsigned long long)sector, data_size);
1310 return NULL;
1311 }
1312
Philipp Reisnerb411b362009-09-25 16:07:19 -07001313 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
1314 * "criss-cross" setup, that might cause write-out on some other DRBD,
1315 * which in turn might block on the other node at this very place. */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001316 peer_req = drbd_alloc_ee(mdev, id, sector, data_size, GFP_NOIO);
1317 if (!peer_req)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001318 return NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001319
Philipp Reisnerb411b362009-09-25 16:07:19 -07001320 ds = data_size;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001321 page = peer_req->pages;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001322 page_chain_for_each(page) {
1323 unsigned len = min_t(int, ds, PAGE_SIZE);
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001324 data = kmap(page);
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001325 rr = drbd_recv(mdev->tconn, data, len);
Andreas Gruenbacher0cf9d272010-12-07 10:43:29 +01001326 if (drbd_insert_fault(mdev, DRBD_FAULT_RECEIVE)) {
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001327 dev_err(DEV, "Fault injection: Corrupting data on receive\n");
1328 data[0] = data[0] ^ (unsigned long)-1;
1329 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001330 kunmap(page);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001331 if (rr != len) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001332 drbd_free_ee(mdev, peer_req);
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001333 if (!signal_pending(current))
1334 dev_warn(DEV, "short read receiving data: read %d expected %d\n",
1335 rr, len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001336 return NULL;
1337 }
1338 ds -= rr;
1339 }
1340
1341 if (dgs) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001342 drbd_csum_ee(mdev, mdev->tconn->integrity_r_tfm, peer_req, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001343 if (memcmp(dig_in, dig_vv, dgs)) {
Lars Ellenberg470be442010-11-10 10:36:52 +01001344 dev_err(DEV, "Digest integrity check FAILED: %llus +%u\n",
1345 (unsigned long long)sector, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001346 drbd_bcast_ee(mdev, "digest failed",
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001347 dgs, dig_in, dig_vv, peer_req);
1348 drbd_free_ee(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001349 return NULL;
1350 }
1351 }
1352 mdev->recv_cnt += data_size>>9;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001353 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001354}
1355
1356/* drbd_drain_block() just takes a data block
1357 * out of the socket input buffer, and discards it.
1358 */
1359static int drbd_drain_block(struct drbd_conf *mdev, int data_size)
1360{
1361 struct page *page;
1362 int rr, rv = 1;
1363 void *data;
1364
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001365 if (!data_size)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001366 return true;
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001367
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001368 page = drbd_pp_alloc(mdev, 1, 1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001369
1370 data = kmap(page);
1371 while (data_size) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001372 rr = drbd_recv(mdev->tconn, data, min_t(int, data_size, PAGE_SIZE));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001373 if (rr != min_t(int, data_size, PAGE_SIZE)) {
1374 rv = 0;
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001375 if (!signal_pending(current))
1376 dev_warn(DEV,
1377 "short read receiving data: read %d expected %d\n",
1378 rr, min_t(int, data_size, PAGE_SIZE));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001379 break;
1380 }
1381 data_size -= rr;
1382 }
1383 kunmap(page);
Lars Ellenberg435f0742010-09-06 12:30:25 +02001384 drbd_pp_free(mdev, page, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001385 return rv;
1386}
1387
1388static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req,
1389 sector_t sector, int data_size)
1390{
1391 struct bio_vec *bvec;
1392 struct bio *bio;
1393 int dgs, rr, i, expect;
Philipp Reisnera0638452011-01-19 14:31:32 +01001394 void *dig_in = mdev->tconn->int_dig_in;
1395 void *dig_vv = mdev->tconn->int_dig_vv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001396
Philipp Reisnera0638452011-01-19 14:31:32 +01001397 dgs = (mdev->tconn->agreed_pro_version >= 87 && mdev->tconn->integrity_r_tfm) ?
1398 crypto_hash_digestsize(mdev->tconn->integrity_r_tfm) : 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001399
1400 if (dgs) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001401 rr = drbd_recv(mdev->tconn, dig_in, dgs);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001402 if (rr != dgs) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001403 if (!signal_pending(current))
1404 dev_warn(DEV,
1405 "short read receiving data reply digest: read %d expected %d\n",
1406 rr, dgs);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001407 return 0;
1408 }
1409 }
1410
1411 data_size -= dgs;
1412
1413 /* optimistically update recv_cnt. if receiving fails below,
1414 * we disconnect anyways, and counters will be reset. */
1415 mdev->recv_cnt += data_size>>9;
1416
1417 bio = req->master_bio;
1418 D_ASSERT(sector == bio->bi_sector);
1419
1420 bio_for_each_segment(bvec, bio, i) {
1421 expect = min_t(int, data_size, bvec->bv_len);
Philipp Reisnerde0ff332011-02-07 16:56:20 +01001422 rr = drbd_recv(mdev->tconn,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001423 kmap(bvec->bv_page)+bvec->bv_offset,
1424 expect);
1425 kunmap(bvec->bv_page);
1426 if (rr != expect) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001427 if (!signal_pending(current))
1428 dev_warn(DEV, "short read receiving data reply: "
1429 "read %d expected %d\n",
1430 rr, expect);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001431 return 0;
1432 }
1433 data_size -= rr;
1434 }
1435
1436 if (dgs) {
Philipp Reisnera0638452011-01-19 14:31:32 +01001437 drbd_csum_bio(mdev, mdev->tconn->integrity_r_tfm, bio, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001438 if (memcmp(dig_in, dig_vv, dgs)) {
1439 dev_err(DEV, "Digest integrity check FAILED. Broken NICs?\n");
1440 return 0;
1441 }
1442 }
1443
1444 D_ASSERT(data_size == 0);
1445 return 1;
1446}
1447
1448/* e_end_resync_block() is called via
1449 * drbd_process_done_ee() by asender only */
1450static int e_end_resync_block(struct drbd_conf *mdev, struct drbd_work *w, int unused)
1451{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001452 struct drbd_peer_request *peer_req = (struct drbd_peer_request *)w;
1453 sector_t sector = peer_req->i.sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001454 int ok;
1455
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001456 D_ASSERT(drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001457
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001458 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
1459 drbd_set_in_sync(mdev, sector, peer_req->i.size);
1460 ok = drbd_send_ack(mdev, P_RS_WRITE_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001461 } else {
1462 /* Record failure to sync */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001463 drbd_rs_failed_io(mdev, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001464
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001465 ok = drbd_send_ack(mdev, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001466 }
1467 dec_unacked(mdev);
1468
1469 return ok;
1470}
1471
1472static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_size) __releases(local)
1473{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001474 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001475
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001476 peer_req = read_in_block(mdev, ID_SYNCER, sector, data_size);
1477 if (!peer_req)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001478 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001479
1480 dec_rs_pending(mdev);
1481
Philipp Reisnerb411b362009-09-25 16:07:19 -07001482 inc_unacked(mdev);
1483 /* corresponding dec_unacked() in e_end_resync_block()
1484 * respective _drbd_clear_done_ee */
1485
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001486 peer_req->w.cb = e_end_resync_block;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001487
Philipp Reisner87eeee42011-01-19 14:16:30 +01001488 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001489 list_add(&peer_req->w.list, &mdev->sync_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001490 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001491
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001492 atomic_add(data_size >> 9, &mdev->rs_sect_ev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001493 if (drbd_submit_ee(mdev, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001494 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001495
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001496 /* don't care for the reason here */
1497 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01001498 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001499 list_del(&peer_req->w.list);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001500 spin_unlock_irq(&mdev->tconn->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02001501
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001502 drbd_free_ee(mdev, peer_req);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001503fail:
1504 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001505 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001506}
1507
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001508static struct drbd_request *
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001509find_request(struct drbd_conf *mdev, struct rb_root *root, u64 id,
1510 sector_t sector, bool missing_ok, const char *func)
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001511{
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001512 struct drbd_request *req;
1513
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001514 /* Request object according to our peer */
1515 req = (struct drbd_request *)(unsigned long)id;
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001516 if (drbd_contains_interval(root, sector, &req->i) && req->i.local)
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001517 return req;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001518 if (!missing_ok) {
1519 dev_err(DEV, "%s: failed to find request %lu, sector %llus\n", func,
1520 (unsigned long)id, (unsigned long long)sector);
1521 }
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001522 return NULL;
1523}
1524
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001525static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packet cmd,
1526 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001527{
1528 struct drbd_request *req;
1529 sector_t sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001530 int ok;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001531 struct p_data *p = &mdev->tconn->data.rbuf.data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001532
1533 sector = be64_to_cpu(p->sector);
1534
Philipp Reisner87eeee42011-01-19 14:16:30 +01001535 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001536 req = find_request(mdev, &mdev->read_requests, p->block_id, sector, false, __func__);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001537 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001538 if (unlikely(!req))
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001539 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001540
Bart Van Assche24c48302011-05-21 18:32:29 +02001541 /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid
Philipp Reisnerb411b362009-09-25 16:07:19 -07001542 * special casing it there for the various failure cases.
1543 * still no race with drbd_fail_pending_reads */
1544 ok = recv_dless_read(mdev, req, sector, data_size);
1545
1546 if (ok)
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001547 req_mod(req, DATA_RECEIVED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001548 /* else: nothing. handled from drbd_disconnect...
1549 * I don't think we may complete this just yet
1550 * in case we are "on-disconnect: freeze" */
1551
1552 return ok;
1553}
1554
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001555static int receive_RSDataReply(struct drbd_conf *mdev, enum drbd_packet cmd,
1556 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001557{
1558 sector_t sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001559 int ok;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001560 struct p_data *p = &mdev->tconn->data.rbuf.data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001561
1562 sector = be64_to_cpu(p->sector);
1563 D_ASSERT(p->block_id == ID_SYNCER);
1564
1565 if (get_ldev(mdev)) {
1566 /* data is submitted to disk within recv_resync_read.
1567 * corresponding put_ldev done below on error,
Andreas Gruenbacher9c508422011-01-14 21:19:36 +01001568 * or in drbd_endio_sec. */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001569 ok = recv_resync_read(mdev, sector, data_size);
1570 } else {
1571 if (__ratelimit(&drbd_ratelimit_state))
1572 dev_err(DEV, "Can not write resync data to local disk.\n");
1573
1574 ok = drbd_drain_block(mdev, data_size);
1575
Lars Ellenberg2b2bf212010-10-06 11:46:55 +02001576 drbd_send_ack_dp(mdev, P_NEG_ACK, p, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001577 }
1578
Philipp Reisner778f2712010-07-06 11:14:00 +02001579 atomic_add(data_size >> 9, &mdev->rs_sect_in);
1580
Philipp Reisnerb411b362009-09-25 16:07:19 -07001581 return ok;
1582}
1583
1584/* e_end_block() is called via drbd_process_done_ee().
1585 * this means this function only runs in the asender thread
1586 */
1587static int e_end_block(struct drbd_conf *mdev, struct drbd_work *w, int cancel)
1588{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001589 struct drbd_peer_request *peer_req = (struct drbd_peer_request *)w;
1590 sector_t sector = peer_req->i.sector;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001591 int ok = 1, pcmd;
1592
Philipp Reisner89e58e72011-01-19 13:12:45 +01001593 if (mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001594 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001595 pcmd = (mdev->state.conn >= C_SYNC_SOURCE &&
1596 mdev->state.conn <= C_PAUSED_SYNC_T &&
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001597 peer_req->flags & EE_MAY_SET_IN_SYNC) ?
Philipp Reisnerb411b362009-09-25 16:07:19 -07001598 P_RS_WRITE_ACK : P_WRITE_ACK;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001599 ok &= drbd_send_ack(mdev, pcmd, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001600 if (pcmd == P_RS_WRITE_ACK)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001601 drbd_set_in_sync(mdev, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001602 } else {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001603 ok = drbd_send_ack(mdev, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001604 /* we expect it to be marked out of sync anyways...
1605 * maybe assert this? */
1606 }
1607 dec_unacked(mdev);
1608 }
1609 /* we delete from the conflict detection hash _after_ we sent out the
1610 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */
Philipp Reisner89e58e72011-01-19 13:12:45 +01001611 if (mdev->tconn->net_conf->two_primaries) {
Philipp Reisner87eeee42011-01-19 14:16:30 +01001612 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001613 D_ASSERT(!drbd_interval_empty(&peer_req->i));
1614 drbd_remove_epoch_entry_interval(mdev, peer_req);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001615 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +01001616 } else
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001617 D_ASSERT(drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001618
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001619 drbd_may_finish_epoch(mdev, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001620
1621 return ok;
1622}
1623
1624static int e_send_discard_ack(struct drbd_conf *mdev, struct drbd_work *w, int unused)
1625{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001626 struct drbd_peer_request *peer_req = (struct drbd_peer_request *)w;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001627 int ok = 1;
1628
Philipp Reisner89e58e72011-01-19 13:12:45 +01001629 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001630 ok = drbd_send_ack(mdev, P_DISCARD_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001631
Philipp Reisner87eeee42011-01-19 14:16:30 +01001632 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001633 D_ASSERT(!drbd_interval_empty(&peer_req->i));
1634 drbd_remove_epoch_entry_interval(mdev, peer_req);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001635 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001636
1637 dec_unacked(mdev);
1638
1639 return ok;
1640}
1641
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001642static bool seq_greater(u32 a, u32 b)
1643{
1644 /*
1645 * We assume 32-bit wrap-around here.
1646 * For 24-bit wrap-around, we would have to shift:
1647 * a <<= 8; b <<= 8;
1648 */
1649 return (s32)a - (s32)b > 0;
1650}
1651
1652static u32 seq_max(u32 a, u32 b)
1653{
1654 return seq_greater(a, b) ? a : b;
1655}
1656
Andreas Gruenbacher43ae0772011-02-03 18:42:08 +01001657static void update_peer_seq(struct drbd_conf *mdev, unsigned int peer_seq)
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001658{
Andreas Gruenbacher43ae0772011-02-03 18:42:08 +01001659 unsigned int old_peer_seq;
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001660
1661 spin_lock(&mdev->peer_seq_lock);
Andreas Gruenbacher43ae0772011-02-03 18:42:08 +01001662 old_peer_seq = mdev->peer_seq;
1663 mdev->peer_seq = seq_max(mdev->peer_seq, peer_seq);
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001664 spin_unlock(&mdev->peer_seq_lock);
Andreas Gruenbacher43ae0772011-02-03 18:42:08 +01001665 if (old_peer_seq != peer_seq)
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001666 wake_up(&mdev->seq_wait);
1667}
1668
Philipp Reisnerb411b362009-09-25 16:07:19 -07001669/* Called from receive_Data.
1670 * Synchronize packets on sock with packets on msock.
1671 *
1672 * This is here so even when a P_DATA packet traveling via sock overtook an Ack
1673 * packet traveling on msock, they are still processed in the order they have
1674 * been sent.
1675 *
1676 * Note: we don't care for Ack packets overtaking P_DATA packets.
1677 *
1678 * In case packet_seq is larger than mdev->peer_seq number, there are
1679 * outstanding packets on the msock. We wait for them to arrive.
1680 * In case we are the logically next packet, we update mdev->peer_seq
1681 * ourselves. Correctly handles 32bit wrap around.
1682 *
1683 * Assume we have a 10 GBit connection, that is about 1<<30 byte per second,
1684 * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds
1685 * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have
1686 * 1<<9 == 512 seconds aka ages for the 32bit wrap around...
1687 *
1688 * returns 0 if we may process the packet,
1689 * -ERESTARTSYS if we were interrupted (by disconnect signal). */
1690static int drbd_wait_peer_seq(struct drbd_conf *mdev, const u32 packet_seq)
1691{
1692 DEFINE_WAIT(wait);
1693 unsigned int p_seq;
1694 long timeout;
1695 int ret = 0;
1696 spin_lock(&mdev->peer_seq_lock);
1697 for (;;) {
1698 prepare_to_wait(&mdev->seq_wait, &wait, TASK_INTERRUPTIBLE);
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001699 if (!seq_greater(packet_seq, mdev->peer_seq + 1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07001700 break;
1701 if (signal_pending(current)) {
1702 ret = -ERESTARTSYS;
1703 break;
1704 }
1705 p_seq = mdev->peer_seq;
1706 spin_unlock(&mdev->peer_seq_lock);
1707 timeout = schedule_timeout(30*HZ);
1708 spin_lock(&mdev->peer_seq_lock);
1709 if (timeout == 0 && p_seq == mdev->peer_seq) {
1710 ret = -ETIMEDOUT;
1711 dev_err(DEV, "ASSERT FAILED waited 30 seconds for sequence update, forcing reconnect\n");
1712 break;
1713 }
1714 }
1715 finish_wait(&mdev->seq_wait, &wait);
1716 if (mdev->peer_seq+1 == packet_seq)
1717 mdev->peer_seq++;
1718 spin_unlock(&mdev->peer_seq_lock);
1719 return ret;
1720}
1721
Lars Ellenberg688593c2010-11-17 22:25:03 +01001722/* see also bio_flags_to_wire()
1723 * DRBD_REQ_*, because we need to semantically map the flags to data packet
1724 * flags and back. We may replicate to other kernel versions. */
1725static unsigned long wire_flags_to_bio(struct drbd_conf *mdev, u32 dpf)
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02001726{
Lars Ellenberg688593c2010-11-17 22:25:03 +01001727 return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
1728 (dpf & DP_FUA ? REQ_FUA : 0) |
1729 (dpf & DP_FLUSH ? REQ_FLUSH : 0) |
1730 (dpf & DP_DISCARD ? REQ_DISCARD : 0);
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02001731}
1732
Philipp Reisnerb411b362009-09-25 16:07:19 -07001733/* mirrored write */
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01001734static int receive_Data(struct drbd_conf *mdev, enum drbd_packet cmd,
1735 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001736{
1737 sector_t sector;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001738 struct drbd_peer_request *peer_req;
Philipp Reisnere42325a2011-01-19 13:55:45 +01001739 struct p_data *p = &mdev->tconn->data.rbuf.data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001740 int rw = WRITE;
1741 u32 dp_flags;
1742
Philipp Reisnerb411b362009-09-25 16:07:19 -07001743 if (!get_ldev(mdev)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001744 spin_lock(&mdev->peer_seq_lock);
1745 if (mdev->peer_seq+1 == be32_to_cpu(p->seq_num))
1746 mdev->peer_seq++;
1747 spin_unlock(&mdev->peer_seq_lock);
1748
Lars Ellenberg2b2bf212010-10-06 11:46:55 +02001749 drbd_send_ack_dp(mdev, P_NEG_ACK, p, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001750 atomic_inc(&mdev->current_epoch->epoch_size);
1751 return drbd_drain_block(mdev, data_size);
1752 }
1753
1754 /* get_ldev(mdev) successful.
1755 * Corresponding put_ldev done either below (on various errors),
Andreas Gruenbacher9c508422011-01-14 21:19:36 +01001756 * or in drbd_endio_sec, if we successfully submit the data at
Philipp Reisnerb411b362009-09-25 16:07:19 -07001757 * the end of this function. */
1758
1759 sector = be64_to_cpu(p->sector);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001760 peer_req = read_in_block(mdev, p->block_id, sector, data_size);
1761 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001762 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001763 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001764 }
1765
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001766 peer_req->w.cb = e_end_block;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001767
Lars Ellenberg688593c2010-11-17 22:25:03 +01001768 dp_flags = be32_to_cpu(p->dp_flags);
1769 rw |= wire_flags_to_bio(mdev, dp_flags);
1770
1771 if (dp_flags & DP_MAY_SET_IN_SYNC)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001772 peer_req->flags |= EE_MAY_SET_IN_SYNC;
Lars Ellenberg688593c2010-11-17 22:25:03 +01001773
Philipp Reisnerb411b362009-09-25 16:07:19 -07001774 spin_lock(&mdev->epoch_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001775 peer_req->epoch = mdev->current_epoch;
1776 atomic_inc(&peer_req->epoch->epoch_size);
1777 atomic_inc(&peer_req->epoch->active);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001778 spin_unlock(&mdev->epoch_lock);
1779
Philipp Reisnerb411b362009-09-25 16:07:19 -07001780 /* I'm the receiver, I do hold a net_cnt reference. */
Philipp Reisner89e58e72011-01-19 13:12:45 +01001781 if (!mdev->tconn->net_conf->two_primaries) {
Philipp Reisner87eeee42011-01-19 14:16:30 +01001782 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001783 } else {
1784 /* don't get the req_lock yet,
1785 * we may sleep in drbd_wait_peer_seq */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001786 const int size = peer_req->i.size;
Philipp Reisner25703f82011-02-07 14:35:25 +01001787 const int discard = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001788 DEFINE_WAIT(wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001789 int first;
1790
Philipp Reisner89e58e72011-01-19 13:12:45 +01001791 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001792
1793 /* conflict detection and handling:
1794 * 1. wait on the sequence number,
1795 * in case this data packet overtook ACK packets.
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001796 * 2. check for conflicting write requests.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001797 *
1798 * Note: for two_primaries, we are protocol C,
1799 * so there cannot be any request that is DONE
1800 * but still on the transfer log.
1801 *
Philipp Reisnerb411b362009-09-25 16:07:19 -07001802 * if no conflicting request is found:
1803 * submit.
1804 *
1805 * if any conflicting request is found
1806 * that has not yet been acked,
1807 * AND I have the "discard concurrent writes" flag:
1808 * queue (via done_ee) the P_DISCARD_ACK; OUT.
1809 *
1810 * if any conflicting request is found:
1811 * block the receiver, waiting on misc_wait
1812 * until no more conflicting requests are there,
1813 * or we get interrupted (disconnect).
1814 *
1815 * we do not just write after local io completion of those
1816 * requests, but only after req is done completely, i.e.
1817 * we wait for the P_DISCARD_ACK to arrive!
1818 *
1819 * then proceed normally, i.e. submit.
1820 */
1821 if (drbd_wait_peer_seq(mdev, be32_to_cpu(p->seq_num)))
1822 goto out_interrupted;
1823
Philipp Reisner87eeee42011-01-19 14:16:30 +01001824 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001825
Philipp Reisnerb411b362009-09-25 16:07:19 -07001826 first = 1;
1827 for (;;) {
Andreas Gruenbacherde696712011-01-20 15:00:24 +01001828 struct drbd_interval *i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001829 int have_unacked = 0;
1830 int have_conflict = 0;
1831 prepare_to_wait(&mdev->misc_wait, &wait,
1832 TASK_INTERRUPTIBLE);
Andreas Gruenbacherde696712011-01-20 15:00:24 +01001833
1834 i = drbd_find_overlap(&mdev->write_requests, sector, size);
1835 if (i) {
Andreas Gruenbacherde696712011-01-20 15:00:24 +01001836 /* only ALERT on first iteration,
1837 * we may be woken up early... */
1838 if (first)
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001839 dev_alert(DEV, "%s[%u] Concurrent %s write detected!"
Andreas Gruenbacherde696712011-01-20 15:00:24 +01001840 " new: %llus +%u; pending: %llus +%u\n",
1841 current->comm, current->pid,
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001842 i->local ? "local" : "remote",
Andreas Gruenbacherde696712011-01-20 15:00:24 +01001843 (unsigned long long)sector, size,
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001844 (unsigned long long)i->sector, i->size);
1845
1846 if (i->local) {
1847 struct drbd_request *req2;
1848
1849 req2 = container_of(i, struct drbd_request, i);
1850 if (req2->rq_state & RQ_NET_PENDING)
1851 ++have_unacked;
1852 }
Andreas Gruenbacherde696712011-01-20 15:00:24 +01001853 ++have_conflict;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001854 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001855 if (!have_conflict)
1856 break;
1857
1858 /* Discard Ack only for the _first_ iteration */
1859 if (first && discard && have_unacked) {
1860 dev_alert(DEV, "Concurrent write! [DISCARD BY FLAG] sec=%llus\n",
1861 (unsigned long long)sector);
1862 inc_unacked(mdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001863 peer_req->w.cb = e_send_discard_ack;
1864 list_add_tail(&peer_req->w.list, &mdev->done_ee);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001865
Philipp Reisner87eeee42011-01-19 14:16:30 +01001866 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001867
1868 /* we could probably send that P_DISCARD_ACK ourselves,
1869 * but I don't like the receiver using the msock */
1870
1871 put_ldev(mdev);
Philipp Reisner0625ac12011-02-07 14:49:19 +01001872 wake_asender(mdev->tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001873 finish_wait(&mdev->misc_wait, &wait);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001874 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001875 }
1876
1877 if (signal_pending(current)) {
Philipp Reisner87eeee42011-01-19 14:16:30 +01001878 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001879 finish_wait(&mdev->misc_wait, &wait);
1880 goto out_interrupted;
1881 }
1882
Andreas Gruenbachera500c2e2011-01-27 14:12:23 +01001883 /* Indicate to wake up mdev->misc_wait upon completion. */
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001884 i->waiting = true;
Andreas Gruenbachera500c2e2011-01-27 14:12:23 +01001885
Philipp Reisner87eeee42011-01-19 14:16:30 +01001886 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001887 if (first) {
1888 first = 0;
1889 dev_alert(DEV, "Concurrent write! [W AFTERWARDS] "
1890 "sec=%llus\n", (unsigned long long)sector);
1891 } else if (discard) {
1892 /* we had none on the first iteration.
1893 * there must be none now. */
1894 D_ASSERT(have_unacked == 0);
1895 }
1896 schedule();
Philipp Reisner87eeee42011-01-19 14:16:30 +01001897 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001898 }
1899 finish_wait(&mdev->misc_wait, &wait);
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001900
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001901 drbd_insert_interval(&mdev->write_requests, &peer_req->i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001902 }
1903
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001904 list_add(&peer_req->w.list, &mdev->active_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001905 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001906
Philipp Reisner89e58e72011-01-19 13:12:45 +01001907 switch (mdev->tconn->net_conf->wire_protocol) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001908 case DRBD_PROT_C:
1909 inc_unacked(mdev);
1910 /* corresponding dec_unacked() in e_end_block()
1911 * respective _drbd_clear_done_ee */
1912 break;
1913 case DRBD_PROT_B:
1914 /* I really don't like it that the receiver thread
1915 * sends on the msock, but anyways */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001916 drbd_send_ack(mdev, P_RECV_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001917 break;
1918 case DRBD_PROT_A:
1919 /* nothing to do */
1920 break;
1921 }
1922
Lars Ellenberg6719fb02010-10-18 23:04:07 +02001923 if (mdev->state.pdsk < D_INCONSISTENT) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001924 /* In case we have the only disk of the cluster, */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001925 drbd_set_out_of_sync(mdev, peer_req->i.sector, peer_req->i.size);
1926 peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
1927 peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
1928 drbd_al_begin_io(mdev, peer_req->i.sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001929 }
1930
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001931 if (drbd_submit_ee(mdev, peer_req, rw, DRBD_FAULT_DT_WR) == 0)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001932 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001933
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001934 /* don't care for the reason here */
1935 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01001936 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001937 list_del(&peer_req->w.list);
1938 drbd_remove_epoch_entry_interval(mdev, peer_req);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001939 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001940 if (peer_req->flags & EE_CALL_AL_COMPLETE_IO)
1941 drbd_al_complete_io(mdev, peer_req->i.sector);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02001942
Philipp Reisnerb411b362009-09-25 16:07:19 -07001943out_interrupted:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001944 drbd_may_finish_epoch(mdev, peer_req->epoch, EV_PUT + EV_CLEANUP);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001945 put_ldev(mdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001946 drbd_free_ee(mdev, peer_req);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001947 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001948}
1949
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001950/* We may throttle resync, if the lower device seems to be busy,
1951 * and current sync rate is above c_min_rate.
1952 *
1953 * To decide whether or not the lower device is busy, we use a scheme similar
1954 * to MD RAID is_mddev_idle(): if the partition stats reveal "significant"
1955 * (more than 64 sectors) of activity we cannot account for with our own resync
1956 * activity, it obviously is "busy".
1957 *
1958 * The current sync rate used here uses only the most recent two step marks,
1959 * to have a short time average so we can react faster.
1960 */
Philipp Reisnere3555d82010-11-07 15:56:29 +01001961int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001962{
1963 struct gendisk *disk = mdev->ldev->backing_bdev->bd_contains->bd_disk;
1964 unsigned long db, dt, dbdt;
Philipp Reisnere3555d82010-11-07 15:56:29 +01001965 struct lc_element *tmp;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001966 int curr_events;
1967 int throttle = 0;
1968
1969 /* feature disabled? */
1970 if (mdev->sync_conf.c_min_rate == 0)
1971 return 0;
1972
Philipp Reisnere3555d82010-11-07 15:56:29 +01001973 spin_lock_irq(&mdev->al_lock);
1974 tmp = lc_find(mdev->resync, BM_SECT_TO_EXT(sector));
1975 if (tmp) {
1976 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
1977 if (test_bit(BME_PRIORITY, &bm_ext->flags)) {
1978 spin_unlock_irq(&mdev->al_lock);
1979 return 0;
1980 }
1981 /* Do not slow down if app IO is already waiting for this extent */
1982 }
1983 spin_unlock_irq(&mdev->al_lock);
1984
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001985 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
1986 (int)part_stat_read(&disk->part0, sectors[1]) -
1987 atomic_read(&mdev->rs_sect_ev);
Philipp Reisnere3555d82010-11-07 15:56:29 +01001988
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001989 if (!mdev->rs_last_events || curr_events - mdev->rs_last_events > 64) {
1990 unsigned long rs_left;
1991 int i;
1992
1993 mdev->rs_last_events = curr_events;
1994
1995 /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
1996 * approx. */
Lars Ellenberg2649f082010-11-05 10:05:47 +01001997 i = (mdev->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
1998
1999 if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T)
2000 rs_left = mdev->ov_left;
2001 else
2002 rs_left = drbd_bm_total_weight(mdev) - mdev->rs_failed;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002003
2004 dt = ((long)jiffies - (long)mdev->rs_mark_time[i]) / HZ;
2005 if (!dt)
2006 dt++;
2007 db = mdev->rs_mark_left[i] - rs_left;
2008 dbdt = Bit2KB(db/dt);
2009
2010 if (dbdt > mdev->sync_conf.c_min_rate)
2011 throttle = 1;
2012 }
2013 return throttle;
2014}
2015
2016
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01002017static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packet cmd,
2018 unsigned int digest_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002019{
2020 sector_t sector;
2021 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002022 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002023 struct digest_info *di = NULL;
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002024 int size, verb;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002025 unsigned int fault_type;
Philipp Reisnere42325a2011-01-19 13:55:45 +01002026 struct p_block_req *p = &mdev->tconn->data.rbuf.block_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002027
2028 sector = be64_to_cpu(p->sector);
2029 size = be32_to_cpu(p->blksize);
2030
Lars Ellenberg1816a2b2010-11-11 15:19:07 +01002031 if (size <= 0 || (size & 0x1ff) != 0 || size > DRBD_MAX_BIO_SIZE) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002032 dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2033 (unsigned long long)sector, size);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002034 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002035 }
2036 if (sector + (size>>9) > capacity) {
2037 dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2038 (unsigned long long)sector, size);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002039 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002040 }
2041
2042 if (!get_ldev_if_state(mdev, D_UP_TO_DATE)) {
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002043 verb = 1;
2044 switch (cmd) {
2045 case P_DATA_REQUEST:
2046 drbd_send_ack_rp(mdev, P_NEG_DREPLY, p);
2047 break;
2048 case P_RS_DATA_REQUEST:
2049 case P_CSUM_RS_REQUEST:
2050 case P_OV_REQUEST:
2051 drbd_send_ack_rp(mdev, P_NEG_RS_DREPLY , p);
2052 break;
2053 case P_OV_REPLY:
2054 verb = 0;
2055 dec_rs_pending(mdev);
2056 drbd_send_ack_ex(mdev, P_OV_RESULT, sector, size, ID_IN_SYNC);
2057 break;
2058 default:
2059 dev_err(DEV, "unexpected command (%s) in receive_DataRequest\n",
2060 cmdname(cmd));
2061 }
2062 if (verb && __ratelimit(&drbd_ratelimit_state))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002063 dev_err(DEV, "Can not satisfy peer's read request, "
2064 "no local data.\n");
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002065
Lars Ellenberga821cc42010-09-06 12:31:37 +02002066 /* drain possibly payload */
2067 return drbd_drain_block(mdev, digest_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002068 }
2069
2070 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
2071 * "criss-cross" setup, that might cause write-out on some other DRBD,
2072 * which in turn might block on the other node at this very place. */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002073 peer_req = drbd_alloc_ee(mdev, p->block_id, sector, size, GFP_NOIO);
2074 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002075 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002076 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002077 }
2078
Philipp Reisner02918be2010-08-20 14:35:10 +02002079 switch (cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002080 case P_DATA_REQUEST:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002081 peer_req->w.cb = w_e_end_data_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002082 fault_type = DRBD_FAULT_DT_RD;
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002083 /* application IO, don't drbd_rs_begin_io */
2084 goto submit;
2085
Philipp Reisnerb411b362009-09-25 16:07:19 -07002086 case P_RS_DATA_REQUEST:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002087 peer_req->w.cb = w_e_end_rsdata_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002088 fault_type = DRBD_FAULT_RS_RD;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002089 /* used in the sector offset progress display */
2090 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002091 break;
2092
2093 case P_OV_REPLY:
2094 case P_CSUM_RS_REQUEST:
2095 fault_type = DRBD_FAULT_RS_RD;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002096 di = kmalloc(sizeof(*di) + digest_size, GFP_NOIO);
2097 if (!di)
2098 goto out_free_e;
2099
2100 di->digest_size = digest_size;
2101 di->digest = (((char *)di)+sizeof(struct digest_info));
2102
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002103 peer_req->digest = di;
2104 peer_req->flags |= EE_HAS_DIGEST;
Lars Ellenbergc36c3ce2010-08-11 20:42:55 +02002105
Philipp Reisnerde0ff332011-02-07 16:56:20 +01002106 if (drbd_recv(mdev->tconn, di->digest, digest_size) != digest_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002107 goto out_free_e;
2108
Philipp Reisner02918be2010-08-20 14:35:10 +02002109 if (cmd == P_CSUM_RS_REQUEST) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002110 D_ASSERT(mdev->tconn->agreed_pro_version >= 89);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002111 peer_req->w.cb = w_e_end_csum_rs_req;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002112 /* used in the sector offset progress display */
2113 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
Philipp Reisner02918be2010-08-20 14:35:10 +02002114 } else if (cmd == P_OV_REPLY) {
Lars Ellenberg2649f082010-11-05 10:05:47 +01002115 /* track progress, we may need to throttle */
2116 atomic_add(size >> 9, &mdev->rs_sect_in);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002117 peer_req->w.cb = w_e_end_ov_reply;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002118 dec_rs_pending(mdev);
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002119 /* drbd_rs_begin_io done when we sent this request,
2120 * but accounting still needs to be done. */
2121 goto submit_for_resync;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002122 }
2123 break;
2124
2125 case P_OV_REQUEST:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002126 if (mdev->ov_start_sector == ~(sector_t)0 &&
Philipp Reisner31890f42011-01-19 14:12:51 +01002127 mdev->tconn->agreed_pro_version >= 90) {
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002128 unsigned long now = jiffies;
2129 int i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002130 mdev->ov_start_sector = sector;
2131 mdev->ov_position = sector;
Lars Ellenberg30b743a2010-11-05 09:39:06 +01002132 mdev->ov_left = drbd_bm_bits(mdev) - BM_SECT_TO_BIT(sector);
2133 mdev->rs_total = mdev->ov_left;
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002134 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
2135 mdev->rs_mark_left[i] = mdev->ov_left;
2136 mdev->rs_mark_time[i] = now;
2137 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002138 dev_info(DEV, "Online Verify start sector: %llu\n",
2139 (unsigned long long)sector);
2140 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002141 peer_req->w.cb = w_e_end_ov_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002142 fault_type = DRBD_FAULT_RS_RD;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002143 break;
2144
Philipp Reisnerb411b362009-09-25 16:07:19 -07002145 default:
2146 dev_err(DEV, "unexpected command (%s) in receive_DataRequest\n",
Philipp Reisner02918be2010-08-20 14:35:10 +02002147 cmdname(cmd));
Philipp Reisnerb411b362009-09-25 16:07:19 -07002148 fault_type = DRBD_FAULT_MAX;
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002149 goto out_free_e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002150 }
2151
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002152 /* Throttle, drbd_rs_begin_io and submit should become asynchronous
2153 * wrt the receiver, but it is not as straightforward as it may seem.
2154 * Various places in the resync start and stop logic assume resync
2155 * requests are processed in order, requeuing this on the worker thread
2156 * introduces a bunch of new code for synchronization between threads.
2157 *
2158 * Unlimited throttling before drbd_rs_begin_io may stall the resync
2159 * "forever", throttling after drbd_rs_begin_io will lock that extent
2160 * for application writes for the same time. For now, just throttle
2161 * here, where the rest of the code expects the receiver to sleep for
2162 * a while, anyways.
2163 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002164
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002165 /* Throttle before drbd_rs_begin_io, as that locks out application IO;
2166 * this defers syncer requests for some time, before letting at least
2167 * on request through. The resync controller on the receiving side
2168 * will adapt to the incoming rate accordingly.
2169 *
2170 * We cannot throttle here if remote is Primary/SyncTarget:
2171 * we would also throttle its application reads.
2172 * In that case, throttling is done on the SyncTarget only.
2173 */
Philipp Reisnere3555d82010-11-07 15:56:29 +01002174 if (mdev->state.peer != R_PRIMARY && drbd_rs_should_slow_down(mdev, sector))
2175 schedule_timeout_uninterruptible(HZ/10);
2176 if (drbd_rs_begin_io(mdev, sector))
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002177 goto out_free_e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002178
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002179submit_for_resync:
2180 atomic_add(size >> 9, &mdev->rs_sect_ev);
2181
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002182submit:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002183 inc_unacked(mdev);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002184 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002185 list_add_tail(&peer_req->w.list, &mdev->read_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002186 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002187
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002188 if (drbd_submit_ee(mdev, peer_req, READ, fault_type) == 0)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002189 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002190
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002191 /* don't care for the reason here */
2192 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01002193 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002194 list_del(&peer_req->w.list);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002195 spin_unlock_irq(&mdev->tconn->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002196 /* no drbd_rs_complete_io(), we are dropping the connection anyways */
2197
Philipp Reisnerb411b362009-09-25 16:07:19 -07002198out_free_e:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002199 put_ldev(mdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002200 drbd_free_ee(mdev, peer_req);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002201 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002202}
2203
2204static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local)
2205{
2206 int self, peer, rv = -100;
2207 unsigned long ch_self, ch_peer;
2208
2209 self = mdev->ldev->md.uuid[UI_BITMAP] & 1;
2210 peer = mdev->p_uuid[UI_BITMAP] & 1;
2211
2212 ch_peer = mdev->p_uuid[UI_SIZE];
2213 ch_self = mdev->comm_bm_set;
2214
Philipp Reisner89e58e72011-01-19 13:12:45 +01002215 switch (mdev->tconn->net_conf->after_sb_0p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002216 case ASB_CONSENSUS:
2217 case ASB_DISCARD_SECONDARY:
2218 case ASB_CALL_HELPER:
2219 dev_err(DEV, "Configuration error.\n");
2220 break;
2221 case ASB_DISCONNECT:
2222 break;
2223 case ASB_DISCARD_YOUNGER_PRI:
2224 if (self == 0 && peer == 1) {
2225 rv = -1;
2226 break;
2227 }
2228 if (self == 1 && peer == 0) {
2229 rv = 1;
2230 break;
2231 }
2232 /* Else fall through to one of the other strategies... */
2233 case ASB_DISCARD_OLDER_PRI:
2234 if (self == 0 && peer == 1) {
2235 rv = 1;
2236 break;
2237 }
2238 if (self == 1 && peer == 0) {
2239 rv = -1;
2240 break;
2241 }
2242 /* Else fall through to one of the other strategies... */
Lars Ellenbergad19bf62009-10-14 09:36:49 +02002243 dev_warn(DEV, "Discard younger/older primary did not find a decision\n"
Philipp Reisnerb411b362009-09-25 16:07:19 -07002244 "Using discard-least-changes instead\n");
2245 case ASB_DISCARD_ZERO_CHG:
2246 if (ch_peer == 0 && ch_self == 0) {
Philipp Reisner25703f82011-02-07 14:35:25 +01002247 rv = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002248 ? -1 : 1;
2249 break;
2250 } else {
2251 if (ch_peer == 0) { rv = 1; break; }
2252 if (ch_self == 0) { rv = -1; break; }
2253 }
Philipp Reisner89e58e72011-01-19 13:12:45 +01002254 if (mdev->tconn->net_conf->after_sb_0p == ASB_DISCARD_ZERO_CHG)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002255 break;
2256 case ASB_DISCARD_LEAST_CHG:
2257 if (ch_self < ch_peer)
2258 rv = -1;
2259 else if (ch_self > ch_peer)
2260 rv = 1;
2261 else /* ( ch_self == ch_peer ) */
2262 /* Well, then use something else. */
Philipp Reisner25703f82011-02-07 14:35:25 +01002263 rv = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002264 ? -1 : 1;
2265 break;
2266 case ASB_DISCARD_LOCAL:
2267 rv = -1;
2268 break;
2269 case ASB_DISCARD_REMOTE:
2270 rv = 1;
2271 }
2272
2273 return rv;
2274}
2275
2276static int drbd_asb_recover_1p(struct drbd_conf *mdev) __must_hold(local)
2277{
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002278 int hg, rv = -100;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002279
Philipp Reisner89e58e72011-01-19 13:12:45 +01002280 switch (mdev->tconn->net_conf->after_sb_1p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002281 case ASB_DISCARD_YOUNGER_PRI:
2282 case ASB_DISCARD_OLDER_PRI:
2283 case ASB_DISCARD_LEAST_CHG:
2284 case ASB_DISCARD_LOCAL:
2285 case ASB_DISCARD_REMOTE:
2286 dev_err(DEV, "Configuration error.\n");
2287 break;
2288 case ASB_DISCONNECT:
2289 break;
2290 case ASB_CONSENSUS:
2291 hg = drbd_asb_recover_0p(mdev);
2292 if (hg == -1 && mdev->state.role == R_SECONDARY)
2293 rv = hg;
2294 if (hg == 1 && mdev->state.role == R_PRIMARY)
2295 rv = hg;
2296 break;
2297 case ASB_VIOLENTLY:
2298 rv = drbd_asb_recover_0p(mdev);
2299 break;
2300 case ASB_DISCARD_SECONDARY:
2301 return mdev->state.role == R_PRIMARY ? 1 : -1;
2302 case ASB_CALL_HELPER:
2303 hg = drbd_asb_recover_0p(mdev);
2304 if (hg == -1 && mdev->state.role == R_PRIMARY) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002305 enum drbd_state_rv rv2;
2306
2307 drbd_set_role(mdev, R_SECONDARY, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002308 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2309 * we might be here in C_WF_REPORT_PARAMS which is transient.
2310 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002311 rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
2312 if (rv2 != SS_SUCCESS) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002313 drbd_khelper(mdev, "pri-lost-after-sb");
2314 } else {
2315 dev_warn(DEV, "Successfully gave up primary role.\n");
2316 rv = hg;
2317 }
2318 } else
2319 rv = hg;
2320 }
2321
2322 return rv;
2323}
2324
2325static int drbd_asb_recover_2p(struct drbd_conf *mdev) __must_hold(local)
2326{
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002327 int hg, rv = -100;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002328
Philipp Reisner89e58e72011-01-19 13:12:45 +01002329 switch (mdev->tconn->net_conf->after_sb_2p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002330 case ASB_DISCARD_YOUNGER_PRI:
2331 case ASB_DISCARD_OLDER_PRI:
2332 case ASB_DISCARD_LEAST_CHG:
2333 case ASB_DISCARD_LOCAL:
2334 case ASB_DISCARD_REMOTE:
2335 case ASB_CONSENSUS:
2336 case ASB_DISCARD_SECONDARY:
2337 dev_err(DEV, "Configuration error.\n");
2338 break;
2339 case ASB_VIOLENTLY:
2340 rv = drbd_asb_recover_0p(mdev);
2341 break;
2342 case ASB_DISCONNECT:
2343 break;
2344 case ASB_CALL_HELPER:
2345 hg = drbd_asb_recover_0p(mdev);
2346 if (hg == -1) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002347 enum drbd_state_rv rv2;
2348
Philipp Reisnerb411b362009-09-25 16:07:19 -07002349 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2350 * we might be here in C_WF_REPORT_PARAMS which is transient.
2351 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002352 rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
2353 if (rv2 != SS_SUCCESS) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002354 drbd_khelper(mdev, "pri-lost-after-sb");
2355 } else {
2356 dev_warn(DEV, "Successfully gave up primary role.\n");
2357 rv = hg;
2358 }
2359 } else
2360 rv = hg;
2361 }
2362
2363 return rv;
2364}
2365
2366static void drbd_uuid_dump(struct drbd_conf *mdev, char *text, u64 *uuid,
2367 u64 bits, u64 flags)
2368{
2369 if (!uuid) {
2370 dev_info(DEV, "%s uuid info vanished while I was looking!\n", text);
2371 return;
2372 }
2373 dev_info(DEV, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n",
2374 text,
2375 (unsigned long long)uuid[UI_CURRENT],
2376 (unsigned long long)uuid[UI_BITMAP],
2377 (unsigned long long)uuid[UI_HISTORY_START],
2378 (unsigned long long)uuid[UI_HISTORY_END],
2379 (unsigned long long)bits,
2380 (unsigned long long)flags);
2381}
2382
2383/*
2384 100 after split brain try auto recover
2385 2 C_SYNC_SOURCE set BitMap
2386 1 C_SYNC_SOURCE use BitMap
2387 0 no Sync
2388 -1 C_SYNC_TARGET use BitMap
2389 -2 C_SYNC_TARGET set BitMap
2390 -100 after split brain, disconnect
2391-1000 unrelated data
Philipp Reisner4a23f262011-01-11 17:42:17 +01002392-1091 requires proto 91
2393-1096 requires proto 96
Philipp Reisnerb411b362009-09-25 16:07:19 -07002394 */
2395static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(local)
2396{
2397 u64 self, peer;
2398 int i, j;
2399
2400 self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2401 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
2402
2403 *rule_nr = 10;
2404 if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED)
2405 return 0;
2406
2407 *rule_nr = 20;
2408 if ((self == UUID_JUST_CREATED || self == (u64)0) &&
2409 peer != UUID_JUST_CREATED)
2410 return -2;
2411
2412 *rule_nr = 30;
2413 if (self != UUID_JUST_CREATED &&
2414 (peer == UUID_JUST_CREATED || peer == (u64)0))
2415 return 2;
2416
2417 if (self == peer) {
2418 int rct, dc; /* roles at crash time */
2419
2420 if (mdev->p_uuid[UI_BITMAP] == (u64)0 && mdev->ldev->md.uuid[UI_BITMAP] != (u64)0) {
2421
Philipp Reisner31890f42011-01-19 14:12:51 +01002422 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002423 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002424
2425 if ((mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
2426 (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
2427 dev_info(DEV, "was SyncSource, missed the resync finished event, corrected myself:\n");
2428 drbd_uuid_set_bm(mdev, 0UL);
2429
2430 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
2431 mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
2432 *rule_nr = 34;
2433 } else {
2434 dev_info(DEV, "was SyncSource (peer failed to write sync_uuid)\n");
2435 *rule_nr = 36;
2436 }
2437
2438 return 1;
2439 }
2440
2441 if (mdev->ldev->md.uuid[UI_BITMAP] == (u64)0 && mdev->p_uuid[UI_BITMAP] != (u64)0) {
2442
Philipp Reisner31890f42011-01-19 14:12:51 +01002443 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002444 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002445
2446 if ((mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_BITMAP] & ~((u64)1)) &&
2447 (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
2448 dev_info(DEV, "was SyncTarget, peer missed the resync finished event, corrected peer:\n");
2449
2450 mdev->p_uuid[UI_HISTORY_START + 1] = mdev->p_uuid[UI_HISTORY_START];
2451 mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_BITMAP];
2452 mdev->p_uuid[UI_BITMAP] = 0UL;
2453
2454 drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2455 *rule_nr = 35;
2456 } else {
2457 dev_info(DEV, "was SyncTarget (failed to write sync_uuid)\n");
2458 *rule_nr = 37;
2459 }
2460
2461 return -1;
2462 }
2463
2464 /* Common power [off|failure] */
2465 rct = (test_bit(CRASHED_PRIMARY, &mdev->flags) ? 1 : 0) +
2466 (mdev->p_uuid[UI_FLAGS] & 2);
2467 /* lowest bit is set when we were primary,
2468 * next bit (weight 2) is set when peer was primary */
2469 *rule_nr = 40;
2470
2471 switch (rct) {
2472 case 0: /* !self_pri && !peer_pri */ return 0;
2473 case 1: /* self_pri && !peer_pri */ return 1;
2474 case 2: /* !self_pri && peer_pri */ return -1;
2475 case 3: /* self_pri && peer_pri */
Philipp Reisner25703f82011-02-07 14:35:25 +01002476 dc = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002477 return dc ? -1 : 1;
2478 }
2479 }
2480
2481 *rule_nr = 50;
2482 peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1);
2483 if (self == peer)
2484 return -1;
2485
2486 *rule_nr = 51;
2487 peer = mdev->p_uuid[UI_HISTORY_START] & ~((u64)1);
2488 if (self == peer) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002489 if (mdev->tconn->agreed_pro_version < 96 ?
Philipp Reisner4a23f262011-01-11 17:42:17 +01002490 (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
2491 (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
2492 peer + UUID_NEW_BM_OFFSET == (mdev->p_uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002493 /* The last P_SYNC_UUID did not get though. Undo the last start of
2494 resync as sync source modifications of the peer's UUIDs. */
2495
Philipp Reisner31890f42011-01-19 14:12:51 +01002496 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002497 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002498
2499 mdev->p_uuid[UI_BITMAP] = mdev->p_uuid[UI_HISTORY_START];
2500 mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_HISTORY_START + 1];
Philipp Reisner4a23f262011-01-11 17:42:17 +01002501
2502 dev_info(DEV, "Did not got last syncUUID packet, corrected:\n");
2503 drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2504
Philipp Reisnerb411b362009-09-25 16:07:19 -07002505 return -1;
2506 }
2507 }
2508
2509 *rule_nr = 60;
2510 self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2511 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2512 peer = mdev->p_uuid[i] & ~((u64)1);
2513 if (self == peer)
2514 return -2;
2515 }
2516
2517 *rule_nr = 70;
2518 self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2519 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
2520 if (self == peer)
2521 return 1;
2522
2523 *rule_nr = 71;
2524 self = mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
2525 if (self == peer) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002526 if (mdev->tconn->agreed_pro_version < 96 ?
Philipp Reisner4a23f262011-01-11 17:42:17 +01002527 (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
2528 (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
2529 self + UUID_NEW_BM_OFFSET == (mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002530 /* The last P_SYNC_UUID did not get though. Undo the last start of
2531 resync as sync source modifications of our UUIDs. */
2532
Philipp Reisner31890f42011-01-19 14:12:51 +01002533 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002534 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002535
2536 _drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_HISTORY_START]);
2537 _drbd_uuid_set(mdev, UI_HISTORY_START, mdev->ldev->md.uuid[UI_HISTORY_START + 1]);
2538
Philipp Reisner4a23f262011-01-11 17:42:17 +01002539 dev_info(DEV, "Last syncUUID did not get through, corrected:\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002540 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
2541 mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
2542
2543 return 1;
2544 }
2545 }
2546
2547
2548 *rule_nr = 80;
Philipp Reisnerd8c2a362009-11-18 15:52:51 +01002549 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002550 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2551 self = mdev->ldev->md.uuid[i] & ~((u64)1);
2552 if (self == peer)
2553 return 2;
2554 }
2555
2556 *rule_nr = 90;
2557 self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2558 peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1);
2559 if (self == peer && self != ((u64)0))
2560 return 100;
2561
2562 *rule_nr = 100;
2563 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2564 self = mdev->ldev->md.uuid[i] & ~((u64)1);
2565 for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) {
2566 peer = mdev->p_uuid[j] & ~((u64)1);
2567 if (self == peer)
2568 return -100;
2569 }
2570 }
2571
2572 return -1000;
2573}
2574
2575/* drbd_sync_handshake() returns the new conn state on success, or
2576 CONN_MASK (-1) on failure.
2577 */
2578static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_role peer_role,
2579 enum drbd_disk_state peer_disk) __must_hold(local)
2580{
2581 int hg, rule_nr;
2582 enum drbd_conns rv = C_MASK;
2583 enum drbd_disk_state mydisk;
2584
2585 mydisk = mdev->state.disk;
2586 if (mydisk == D_NEGOTIATING)
2587 mydisk = mdev->new_state_tmp.disk;
2588
2589 dev_info(DEV, "drbd_sync_handshake:\n");
2590 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid, mdev->comm_bm_set, 0);
2591 drbd_uuid_dump(mdev, "peer", mdev->p_uuid,
2592 mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2593
2594 hg = drbd_uuid_compare(mdev, &rule_nr);
2595
2596 dev_info(DEV, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
2597
2598 if (hg == -1000) {
2599 dev_alert(DEV, "Unrelated data, aborting!\n");
2600 return C_MASK;
2601 }
Philipp Reisner4a23f262011-01-11 17:42:17 +01002602 if (hg < -1000) {
2603 dev_alert(DEV, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002604 return C_MASK;
2605 }
2606
2607 if ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) ||
2608 (peer_disk == D_INCONSISTENT && mydisk > D_INCONSISTENT)) {
2609 int f = (hg == -100) || abs(hg) == 2;
2610 hg = mydisk > D_INCONSISTENT ? 1 : -1;
2611 if (f)
2612 hg = hg*2;
2613 dev_info(DEV, "Becoming sync %s due to disk states.\n",
2614 hg > 0 ? "source" : "target");
2615 }
2616
Adam Gandelman3a11a482010-04-08 16:48:23 -07002617 if (abs(hg) == 100)
2618 drbd_khelper(mdev, "initial-split-brain");
2619
Philipp Reisner89e58e72011-01-19 13:12:45 +01002620 if (hg == 100 || (hg == -100 && mdev->tconn->net_conf->always_asbp)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002621 int pcount = (mdev->state.role == R_PRIMARY)
2622 + (peer_role == R_PRIMARY);
2623 int forced = (hg == -100);
2624
2625 switch (pcount) {
2626 case 0:
2627 hg = drbd_asb_recover_0p(mdev);
2628 break;
2629 case 1:
2630 hg = drbd_asb_recover_1p(mdev);
2631 break;
2632 case 2:
2633 hg = drbd_asb_recover_2p(mdev);
2634 break;
2635 }
2636 if (abs(hg) < 100) {
2637 dev_warn(DEV, "Split-Brain detected, %d primaries, "
2638 "automatically solved. Sync from %s node\n",
2639 pcount, (hg < 0) ? "peer" : "this");
2640 if (forced) {
2641 dev_warn(DEV, "Doing a full sync, since"
2642 " UUIDs where ambiguous.\n");
2643 hg = hg*2;
2644 }
2645 }
2646 }
2647
2648 if (hg == -100) {
Philipp Reisner89e58e72011-01-19 13:12:45 +01002649 if (mdev->tconn->net_conf->want_lose && !(mdev->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002650 hg = -1;
Philipp Reisner89e58e72011-01-19 13:12:45 +01002651 if (!mdev->tconn->net_conf->want_lose && (mdev->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002652 hg = 1;
2653
2654 if (abs(hg) < 100)
2655 dev_warn(DEV, "Split-Brain detected, manually solved. "
2656 "Sync from %s node\n",
2657 (hg < 0) ? "peer" : "this");
2658 }
2659
2660 if (hg == -100) {
Lars Ellenberg580b9762010-02-26 23:15:23 +01002661 /* FIXME this log message is not correct if we end up here
2662 * after an attempted attach on a diskless node.
2663 * We just refuse to attach -- well, we drop the "connection"
2664 * to that disk, in a way... */
Adam Gandelman3a11a482010-04-08 16:48:23 -07002665 dev_alert(DEV, "Split-Brain detected but unresolved, dropping connection!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002666 drbd_khelper(mdev, "split-brain");
2667 return C_MASK;
2668 }
2669
2670 if (hg > 0 && mydisk <= D_INCONSISTENT) {
2671 dev_err(DEV, "I shall become SyncSource, but I am inconsistent!\n");
2672 return C_MASK;
2673 }
2674
2675 if (hg < 0 && /* by intention we do not use mydisk here. */
2676 mdev->state.role == R_PRIMARY && mdev->state.disk >= D_CONSISTENT) {
Philipp Reisner89e58e72011-01-19 13:12:45 +01002677 switch (mdev->tconn->net_conf->rr_conflict) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002678 case ASB_CALL_HELPER:
2679 drbd_khelper(mdev, "pri-lost");
2680 /* fall through */
2681 case ASB_DISCONNECT:
2682 dev_err(DEV, "I shall become SyncTarget, but I am primary!\n");
2683 return C_MASK;
2684 case ASB_VIOLENTLY:
2685 dev_warn(DEV, "Becoming SyncTarget, violating the stable-data"
2686 "assumption\n");
2687 }
2688 }
2689
Philipp Reisner89e58e72011-01-19 13:12:45 +01002690 if (mdev->tconn->net_conf->dry_run || test_bit(CONN_DRY_RUN, &mdev->flags)) {
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002691 if (hg == 0)
2692 dev_info(DEV, "dry-run connect: No resync, would become Connected immediately.\n");
2693 else
2694 dev_info(DEV, "dry-run connect: Would become %s, doing a %s resync.",
2695 drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET),
2696 abs(hg) >= 2 ? "full" : "bit-map based");
2697 return C_MASK;
2698 }
2699
Philipp Reisnerb411b362009-09-25 16:07:19 -07002700 if (abs(hg) >= 2) {
2701 dev_info(DEV, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01002702 if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
2703 BM_LOCKED_SET_ALLOWED))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002704 return C_MASK;
2705 }
2706
2707 if (hg > 0) { /* become sync source. */
2708 rv = C_WF_BITMAP_S;
2709 } else if (hg < 0) { /* become sync target */
2710 rv = C_WF_BITMAP_T;
2711 } else {
2712 rv = C_CONNECTED;
2713 if (drbd_bm_total_weight(mdev)) {
2714 dev_info(DEV, "No resync, but %lu bits in bitmap!\n",
2715 drbd_bm_total_weight(mdev));
2716 }
2717 }
2718
2719 return rv;
2720}
2721
2722/* returns 1 if invalid */
2723static int cmp_after_sb(enum drbd_after_sb_p peer, enum drbd_after_sb_p self)
2724{
2725 /* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */
2726 if ((peer == ASB_DISCARD_REMOTE && self == ASB_DISCARD_LOCAL) ||
2727 (self == ASB_DISCARD_REMOTE && peer == ASB_DISCARD_LOCAL))
2728 return 0;
2729
2730 /* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */
2731 if (peer == ASB_DISCARD_REMOTE || peer == ASB_DISCARD_LOCAL ||
2732 self == ASB_DISCARD_REMOTE || self == ASB_DISCARD_LOCAL)
2733 return 1;
2734
2735 /* everything else is valid if they are equal on both sides. */
2736 if (peer == self)
2737 return 0;
2738
2739 /* everything es is invalid. */
2740 return 1;
2741}
2742
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01002743static int receive_protocol(struct drbd_conf *mdev, enum drbd_packet cmd,
2744 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002745{
Philipp Reisnere42325a2011-01-19 13:55:45 +01002746 struct p_protocol *p = &mdev->tconn->data.rbuf.protocol;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002747 int p_proto, p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002748 int p_want_lose, p_two_primaries, cf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002749 char p_integrity_alg[SHARED_SECRET_MAX] = "";
2750
Philipp Reisnerb411b362009-09-25 16:07:19 -07002751 p_proto = be32_to_cpu(p->protocol);
2752 p_after_sb_0p = be32_to_cpu(p->after_sb_0p);
2753 p_after_sb_1p = be32_to_cpu(p->after_sb_1p);
2754 p_after_sb_2p = be32_to_cpu(p->after_sb_2p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002755 p_two_primaries = be32_to_cpu(p->two_primaries);
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002756 cf = be32_to_cpu(p->conn_flags);
2757 p_want_lose = cf & CF_WANT_LOSE;
2758
2759 clear_bit(CONN_DRY_RUN, &mdev->flags);
2760
2761 if (cf & CF_DRY_RUN)
2762 set_bit(CONN_DRY_RUN, &mdev->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002763
Philipp Reisner89e58e72011-01-19 13:12:45 +01002764 if (p_proto != mdev->tconn->net_conf->wire_protocol) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002765 dev_err(DEV, "incompatible communication protocols\n");
2766 goto disconnect;
2767 }
2768
Philipp Reisner89e58e72011-01-19 13:12:45 +01002769 if (cmp_after_sb(p_after_sb_0p, mdev->tconn->net_conf->after_sb_0p)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002770 dev_err(DEV, "incompatible after-sb-0pri settings\n");
2771 goto disconnect;
2772 }
2773
Philipp Reisner89e58e72011-01-19 13:12:45 +01002774 if (cmp_after_sb(p_after_sb_1p, mdev->tconn->net_conf->after_sb_1p)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002775 dev_err(DEV, "incompatible after-sb-1pri settings\n");
2776 goto disconnect;
2777 }
2778
Philipp Reisner89e58e72011-01-19 13:12:45 +01002779 if (cmp_after_sb(p_after_sb_2p, mdev->tconn->net_conf->after_sb_2p)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002780 dev_err(DEV, "incompatible after-sb-2pri settings\n");
2781 goto disconnect;
2782 }
2783
Philipp Reisner89e58e72011-01-19 13:12:45 +01002784 if (p_want_lose && mdev->tconn->net_conf->want_lose) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002785 dev_err(DEV, "both sides have the 'want_lose' flag set\n");
2786 goto disconnect;
2787 }
2788
Philipp Reisner89e58e72011-01-19 13:12:45 +01002789 if (p_two_primaries != mdev->tconn->net_conf->two_primaries) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002790 dev_err(DEV, "incompatible setting of the two-primaries options\n");
2791 goto disconnect;
2792 }
2793
Philipp Reisner31890f42011-01-19 14:12:51 +01002794 if (mdev->tconn->agreed_pro_version >= 87) {
Philipp Reisner89e58e72011-01-19 13:12:45 +01002795 unsigned char *my_alg = mdev->tconn->net_conf->integrity_alg;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002796
Philipp Reisnerde0ff332011-02-07 16:56:20 +01002797 if (drbd_recv(mdev->tconn, p_integrity_alg, data_size) != data_size)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002798 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002799
2800 p_integrity_alg[SHARED_SECRET_MAX-1] = 0;
2801 if (strcmp(p_integrity_alg, my_alg)) {
2802 dev_err(DEV, "incompatible setting of the data-integrity-alg\n");
2803 goto disconnect;
2804 }
2805 dev_info(DEV, "data-integrity-alg: %s\n",
2806 my_alg[0] ? my_alg : (unsigned char *)"<not-used>");
2807 }
2808
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002809 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002810
2811disconnect:
2812 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002813 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002814}
2815
2816/* helper function
2817 * input: alg name, feature name
2818 * return: NULL (alg name was "")
2819 * ERR_PTR(error) if something goes wrong
2820 * or the crypto hash ptr, if it worked out ok. */
2821struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_conf *mdev,
2822 const char *alg, const char *name)
2823{
2824 struct crypto_hash *tfm;
2825
2826 if (!alg[0])
2827 return NULL;
2828
2829 tfm = crypto_alloc_hash(alg, 0, CRYPTO_ALG_ASYNC);
2830 if (IS_ERR(tfm)) {
2831 dev_err(DEV, "Can not allocate \"%s\" as %s (reason: %ld)\n",
2832 alg, name, PTR_ERR(tfm));
2833 return tfm;
2834 }
2835 if (!drbd_crypto_is_hash(crypto_hash_tfm(tfm))) {
2836 crypto_free_hash(tfm);
2837 dev_err(DEV, "\"%s\" is not a digest (%s)\n", alg, name);
2838 return ERR_PTR(-EINVAL);
2839 }
2840 return tfm;
2841}
2842
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01002843static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packet cmd,
2844 unsigned int packet_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002845{
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002846 int ok = true;
Philipp Reisnere42325a2011-01-19 13:55:45 +01002847 struct p_rs_param_95 *p = &mdev->tconn->data.rbuf.rs_param_95;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002848 unsigned int header_size, data_size, exp_max_sz;
2849 struct crypto_hash *verify_tfm = NULL;
2850 struct crypto_hash *csums_tfm = NULL;
Philipp Reisner31890f42011-01-19 14:12:51 +01002851 const int apv = mdev->tconn->agreed_pro_version;
Philipp Reisner778f2712010-07-06 11:14:00 +02002852 int *rs_plan_s = NULL;
2853 int fifo_size = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002854
2855 exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param)
2856 : apv == 88 ? sizeof(struct p_rs_param)
2857 + SHARED_SECRET_MAX
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002858 : apv <= 94 ? sizeof(struct p_rs_param_89)
2859 : /* apv >= 95 */ sizeof(struct p_rs_param_95);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002860
Philipp Reisner02918be2010-08-20 14:35:10 +02002861 if (packet_size > exp_max_sz) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002862 dev_err(DEV, "SyncParam packet too long: received %u, expected <= %u bytes\n",
Philipp Reisner02918be2010-08-20 14:35:10 +02002863 packet_size, exp_max_sz);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002864 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002865 }
2866
2867 if (apv <= 88) {
Philipp Reisner257d0af2011-01-26 12:15:29 +01002868 header_size = sizeof(struct p_rs_param) - sizeof(struct p_header);
Philipp Reisner02918be2010-08-20 14:35:10 +02002869 data_size = packet_size - header_size;
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002870 } else if (apv <= 94) {
Philipp Reisner257d0af2011-01-26 12:15:29 +01002871 header_size = sizeof(struct p_rs_param_89) - sizeof(struct p_header);
Philipp Reisner02918be2010-08-20 14:35:10 +02002872 data_size = packet_size - header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002873 D_ASSERT(data_size == 0);
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002874 } else {
Philipp Reisner257d0af2011-01-26 12:15:29 +01002875 header_size = sizeof(struct p_rs_param_95) - sizeof(struct p_header);
Philipp Reisner02918be2010-08-20 14:35:10 +02002876 data_size = packet_size - header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002877 D_ASSERT(data_size == 0);
2878 }
2879
2880 /* initialize verify_alg and csums_alg */
2881 memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
2882
Philipp Reisnerde0ff332011-02-07 16:56:20 +01002883 if (drbd_recv(mdev->tconn, &p->head.payload, header_size) != header_size)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002884 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002885
2886 mdev->sync_conf.rate = be32_to_cpu(p->rate);
2887
2888 if (apv >= 88) {
2889 if (apv == 88) {
2890 if (data_size > SHARED_SECRET_MAX) {
2891 dev_err(DEV, "verify-alg too long, "
2892 "peer wants %u, accepting only %u byte\n",
2893 data_size, SHARED_SECRET_MAX);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002894 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002895 }
2896
Philipp Reisnerde0ff332011-02-07 16:56:20 +01002897 if (drbd_recv(mdev->tconn, p->verify_alg, data_size) != data_size)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002898 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002899
2900 /* we expect NUL terminated string */
2901 /* but just in case someone tries to be evil */
2902 D_ASSERT(p->verify_alg[data_size-1] == 0);
2903 p->verify_alg[data_size-1] = 0;
2904
2905 } else /* apv >= 89 */ {
2906 /* we still expect NUL terminated strings */
2907 /* but just in case someone tries to be evil */
2908 D_ASSERT(p->verify_alg[SHARED_SECRET_MAX-1] == 0);
2909 D_ASSERT(p->csums_alg[SHARED_SECRET_MAX-1] == 0);
2910 p->verify_alg[SHARED_SECRET_MAX-1] = 0;
2911 p->csums_alg[SHARED_SECRET_MAX-1] = 0;
2912 }
2913
2914 if (strcmp(mdev->sync_conf.verify_alg, p->verify_alg)) {
2915 if (mdev->state.conn == C_WF_REPORT_PARAMS) {
2916 dev_err(DEV, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
2917 mdev->sync_conf.verify_alg, p->verify_alg);
2918 goto disconnect;
2919 }
2920 verify_tfm = drbd_crypto_alloc_digest_safe(mdev,
2921 p->verify_alg, "verify-alg");
2922 if (IS_ERR(verify_tfm)) {
2923 verify_tfm = NULL;
2924 goto disconnect;
2925 }
2926 }
2927
2928 if (apv >= 89 && strcmp(mdev->sync_conf.csums_alg, p->csums_alg)) {
2929 if (mdev->state.conn == C_WF_REPORT_PARAMS) {
2930 dev_err(DEV, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
2931 mdev->sync_conf.csums_alg, p->csums_alg);
2932 goto disconnect;
2933 }
2934 csums_tfm = drbd_crypto_alloc_digest_safe(mdev,
2935 p->csums_alg, "csums-alg");
2936 if (IS_ERR(csums_tfm)) {
2937 csums_tfm = NULL;
2938 goto disconnect;
2939 }
2940 }
2941
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002942 if (apv > 94) {
2943 mdev->sync_conf.rate = be32_to_cpu(p->rate);
2944 mdev->sync_conf.c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
2945 mdev->sync_conf.c_delay_target = be32_to_cpu(p->c_delay_target);
2946 mdev->sync_conf.c_fill_target = be32_to_cpu(p->c_fill_target);
2947 mdev->sync_conf.c_max_rate = be32_to_cpu(p->c_max_rate);
Philipp Reisner778f2712010-07-06 11:14:00 +02002948
2949 fifo_size = (mdev->sync_conf.c_plan_ahead * 10 * SLEEP_TIME) / HZ;
2950 if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) {
2951 rs_plan_s = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL);
2952 if (!rs_plan_s) {
2953 dev_err(DEV, "kmalloc of fifo_buffer failed");
2954 goto disconnect;
2955 }
2956 }
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002957 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002958
2959 spin_lock(&mdev->peer_seq_lock);
2960 /* lock against drbd_nl_syncer_conf() */
2961 if (verify_tfm) {
2962 strcpy(mdev->sync_conf.verify_alg, p->verify_alg);
2963 mdev->sync_conf.verify_alg_len = strlen(p->verify_alg) + 1;
2964 crypto_free_hash(mdev->verify_tfm);
2965 mdev->verify_tfm = verify_tfm;
2966 dev_info(DEV, "using verify-alg: \"%s\"\n", p->verify_alg);
2967 }
2968 if (csums_tfm) {
2969 strcpy(mdev->sync_conf.csums_alg, p->csums_alg);
2970 mdev->sync_conf.csums_alg_len = strlen(p->csums_alg) + 1;
2971 crypto_free_hash(mdev->csums_tfm);
2972 mdev->csums_tfm = csums_tfm;
2973 dev_info(DEV, "using csums-alg: \"%s\"\n", p->csums_alg);
2974 }
Philipp Reisner778f2712010-07-06 11:14:00 +02002975 if (fifo_size != mdev->rs_plan_s.size) {
2976 kfree(mdev->rs_plan_s.values);
2977 mdev->rs_plan_s.values = rs_plan_s;
2978 mdev->rs_plan_s.size = fifo_size;
2979 mdev->rs_planed = 0;
2980 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002981 spin_unlock(&mdev->peer_seq_lock);
2982 }
2983
2984 return ok;
2985disconnect:
2986 /* just for completeness: actually not needed,
2987 * as this is not reached if csums_tfm was ok. */
2988 crypto_free_hash(csums_tfm);
2989 /* but free the verify_tfm again, if csums_tfm did not work out */
2990 crypto_free_hash(verify_tfm);
2991 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002992 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002993}
2994
Philipp Reisnerb411b362009-09-25 16:07:19 -07002995/* warn if the arguments differ by more than 12.5% */
2996static void warn_if_differ_considerably(struct drbd_conf *mdev,
2997 const char *s, sector_t a, sector_t b)
2998{
2999 sector_t d;
3000 if (a == 0 || b == 0)
3001 return;
3002 d = (a > b) ? (a - b) : (b - a);
3003 if (d > (a>>3) || d > (b>>3))
3004 dev_warn(DEV, "Considerable difference in %s: %llus vs. %llus\n", s,
3005 (unsigned long long)a, (unsigned long long)b);
3006}
3007
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003008static int receive_sizes(struct drbd_conf *mdev, enum drbd_packet cmd,
3009 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003010{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003011 struct p_sizes *p = &mdev->tconn->data.rbuf.sizes;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003012 enum determine_dev_size dd = unchanged;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003013 sector_t p_size, p_usize, my_usize;
3014 int ldsc = 0; /* local disk size changed */
Philipp Reisnere89b5912010-03-24 17:11:33 +01003015 enum dds_flags ddsf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003016
Philipp Reisnerb411b362009-09-25 16:07:19 -07003017 p_size = be64_to_cpu(p->d_size);
3018 p_usize = be64_to_cpu(p->u_size);
3019
3020 if (p_size == 0 && mdev->state.disk == D_DISKLESS) {
3021 dev_err(DEV, "some backing storage is needed\n");
3022 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003023 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003024 }
3025
3026 /* just store the peer's disk size for now.
3027 * we still need to figure out whether we accept that. */
3028 mdev->p_size = p_size;
3029
Philipp Reisnerb411b362009-09-25 16:07:19 -07003030 if (get_ldev(mdev)) {
3031 warn_if_differ_considerably(mdev, "lower level device sizes",
3032 p_size, drbd_get_max_capacity(mdev->ldev));
3033 warn_if_differ_considerably(mdev, "user requested size",
3034 p_usize, mdev->ldev->dc.disk_size);
3035
3036 /* if this is the first connect, or an otherwise expected
3037 * param exchange, choose the minimum */
3038 if (mdev->state.conn == C_WF_REPORT_PARAMS)
3039 p_usize = min_not_zero((sector_t)mdev->ldev->dc.disk_size,
3040 p_usize);
3041
3042 my_usize = mdev->ldev->dc.disk_size;
3043
3044 if (mdev->ldev->dc.disk_size != p_usize) {
3045 mdev->ldev->dc.disk_size = p_usize;
3046 dev_info(DEV, "Peer sets u_size to %lu sectors\n",
3047 (unsigned long)mdev->ldev->dc.disk_size);
3048 }
3049
3050 /* Never shrink a device with usable data during connect.
3051 But allow online shrinking if we are connected. */
Philipp Reisnera393db62009-12-22 13:35:52 +01003052 if (drbd_new_dev_size(mdev, mdev->ldev, 0) <
Philipp Reisnerb411b362009-09-25 16:07:19 -07003053 drbd_get_capacity(mdev->this_bdev) &&
3054 mdev->state.disk >= D_OUTDATED &&
3055 mdev->state.conn < C_CONNECTED) {
3056 dev_err(DEV, "The peer's disk size is too small!\n");
3057 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
3058 mdev->ldev->dc.disk_size = my_usize;
3059 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003060 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003061 }
3062 put_ldev(mdev);
3063 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003064
Philipp Reisnere89b5912010-03-24 17:11:33 +01003065 ddsf = be16_to_cpu(p->dds_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003066 if (get_ldev(mdev)) {
Bart Van Assche24c48302011-05-21 18:32:29 +02003067 dd = drbd_determine_dev_size(mdev, ddsf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003068 put_ldev(mdev);
3069 if (dd == dev_size_error)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003070 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003071 drbd_md_sync(mdev);
3072 } else {
3073 /* I am diskless, need to accept the peer's size. */
3074 drbd_set_my_capacity(mdev, p_size);
3075 }
3076
Philipp Reisner99432fc2011-05-20 16:39:13 +02003077 mdev->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
3078 drbd_reconsider_max_bio_size(mdev);
3079
Philipp Reisnerb411b362009-09-25 16:07:19 -07003080 if (get_ldev(mdev)) {
3081 if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) {
3082 mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev);
3083 ldsc = 1;
3084 }
3085
Philipp Reisnerb411b362009-09-25 16:07:19 -07003086 put_ldev(mdev);
3087 }
3088
3089 if (mdev->state.conn > C_WF_REPORT_PARAMS) {
3090 if (be64_to_cpu(p->c_size) !=
3091 drbd_get_capacity(mdev->this_bdev) || ldsc) {
3092 /* we have different sizes, probably peer
3093 * needs to know my new size... */
Philipp Reisnere89b5912010-03-24 17:11:33 +01003094 drbd_send_sizes(mdev, 0, ddsf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003095 }
3096 if (test_and_clear_bit(RESIZE_PENDING, &mdev->flags) ||
3097 (dd == grew && mdev->state.conn == C_CONNECTED)) {
3098 if (mdev->state.pdsk >= D_INCONSISTENT &&
Philipp Reisnere89b5912010-03-24 17:11:33 +01003099 mdev->state.disk >= D_INCONSISTENT) {
3100 if (ddsf & DDSF_NO_RESYNC)
3101 dev_info(DEV, "Resync of new storage suppressed with --assume-clean\n");
3102 else
3103 resync_after_online_grow(mdev);
3104 } else
Philipp Reisnerb411b362009-09-25 16:07:19 -07003105 set_bit(RESYNC_AFTER_NEG, &mdev->flags);
3106 }
3107 }
3108
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003109 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003110}
3111
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003112static int receive_uuids(struct drbd_conf *mdev, enum drbd_packet cmd,
3113 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003114{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003115 struct p_uuids *p = &mdev->tconn->data.rbuf.uuids;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003116 u64 *p_uuid;
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003117 int i, updated_uuids = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003118
Philipp Reisnerb411b362009-09-25 16:07:19 -07003119 p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO);
3120
3121 for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
3122 p_uuid[i] = be64_to_cpu(p->uuid[i]);
3123
3124 kfree(mdev->p_uuid);
3125 mdev->p_uuid = p_uuid;
3126
3127 if (mdev->state.conn < C_CONNECTED &&
3128 mdev->state.disk < D_INCONSISTENT &&
3129 mdev->state.role == R_PRIMARY &&
3130 (mdev->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
3131 dev_err(DEV, "Can only connect to data with current UUID=%016llX\n",
3132 (unsigned long long)mdev->ed_uuid);
3133 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003134 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003135 }
3136
3137 if (get_ldev(mdev)) {
3138 int skip_initial_sync =
3139 mdev->state.conn == C_CONNECTED &&
Philipp Reisner31890f42011-01-19 14:12:51 +01003140 mdev->tconn->agreed_pro_version >= 90 &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003141 mdev->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
3142 (p_uuid[UI_FLAGS] & 8);
3143 if (skip_initial_sync) {
3144 dev_info(DEV, "Accepted new current UUID, preparing to skip initial sync\n");
3145 drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write,
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003146 "clear_n_write from receive_uuids",
3147 BM_LOCKED_TEST_ALLOWED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003148 _drbd_uuid_set(mdev, UI_CURRENT, p_uuid[UI_CURRENT]);
3149 _drbd_uuid_set(mdev, UI_BITMAP, 0);
3150 _drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
3151 CS_VERBOSE, NULL);
3152 drbd_md_sync(mdev);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003153 updated_uuids = 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003154 }
3155 put_ldev(mdev);
Philipp Reisner18a50fa2010-06-21 14:14:15 +02003156 } else if (mdev->state.disk < D_INCONSISTENT &&
3157 mdev->state.role == R_PRIMARY) {
3158 /* I am a diskless primary, the peer just created a new current UUID
3159 for me. */
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003160 updated_uuids = drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003161 }
3162
3163 /* Before we test for the disk state, we should wait until an eventually
3164 ongoing cluster wide state change is finished. That is important if
3165 we are primary and are detaching from our disk. We need to see the
3166 new disk state... */
3167 wait_event(mdev->misc_wait, !test_bit(CLUSTER_ST_CHANGE, &mdev->flags));
3168 if (mdev->state.conn >= C_CONNECTED && mdev->state.disk < D_INCONSISTENT)
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003169 updated_uuids |= drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
3170
3171 if (updated_uuids)
3172 drbd_print_uuids(mdev, "receiver updated UUIDs to");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003173
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003174 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003175}
3176
3177/**
3178 * convert_state() - Converts the peer's view of the cluster state to our point of view
3179 * @ps: The state as seen by the peer.
3180 */
3181static union drbd_state convert_state(union drbd_state ps)
3182{
3183 union drbd_state ms;
3184
3185 static enum drbd_conns c_tab[] = {
3186 [C_CONNECTED] = C_CONNECTED,
3187
3188 [C_STARTING_SYNC_S] = C_STARTING_SYNC_T,
3189 [C_STARTING_SYNC_T] = C_STARTING_SYNC_S,
3190 [C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */
3191 [C_VERIFY_S] = C_VERIFY_T,
3192 [C_MASK] = C_MASK,
3193 };
3194
3195 ms.i = ps.i;
3196
3197 ms.conn = c_tab[ps.conn];
3198 ms.peer = ps.role;
3199 ms.role = ps.peer;
3200 ms.pdsk = ps.disk;
3201 ms.disk = ps.pdsk;
3202 ms.peer_isp = (ps.aftr_isp | ps.user_isp);
3203
3204 return ms;
3205}
3206
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003207static int receive_req_state(struct drbd_conf *mdev, enum drbd_packet cmd,
3208 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003209{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003210 struct p_req_state *p = &mdev->tconn->data.rbuf.req_state;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003211 union drbd_state mask, val;
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +01003212 enum drbd_state_rv rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003213
Philipp Reisnerb411b362009-09-25 16:07:19 -07003214 mask.i = be32_to_cpu(p->mask);
3215 val.i = be32_to_cpu(p->val);
3216
Philipp Reisner25703f82011-02-07 14:35:25 +01003217 if (test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags) &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003218 test_bit(CLUSTER_ST_CHANGE, &mdev->flags)) {
3219 drbd_send_sr_reply(mdev, SS_CONCURRENT_ST_CHG);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003220 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003221 }
3222
3223 mask = convert_state(mask);
3224 val = convert_state(val);
3225
3226 rv = drbd_change_state(mdev, CS_VERBOSE, mask, val);
3227
3228 drbd_send_sr_reply(mdev, rv);
3229 drbd_md_sync(mdev);
3230
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003231 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003232}
3233
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003234static int receive_state(struct drbd_conf *mdev, enum drbd_packet cmd,
3235 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003236{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003237 struct p_state *p = &mdev->tconn->data.rbuf.state;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003238 union drbd_state os, ns, peer_state;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003239 enum drbd_disk_state real_peer_disk;
Philipp Reisner65d922c2010-06-16 16:18:09 +02003240 enum chg_state_flags cs_flags;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003241 int rv;
3242
Philipp Reisnerb411b362009-09-25 16:07:19 -07003243 peer_state.i = be32_to_cpu(p->state);
3244
3245 real_peer_disk = peer_state.disk;
3246 if (peer_state.disk == D_NEGOTIATING) {
3247 real_peer_disk = mdev->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT;
3248 dev_info(DEV, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
3249 }
3250
Philipp Reisner87eeee42011-01-19 14:16:30 +01003251 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003252 retry:
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003253 os = ns = mdev->state;
Philipp Reisner87eeee42011-01-19 14:16:30 +01003254 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003255
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003256 /* peer says his disk is uptodate, while we think it is inconsistent,
3257 * and this happens while we think we have a sync going on. */
3258 if (os.pdsk == D_INCONSISTENT && real_peer_disk == D_UP_TO_DATE &&
3259 os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
3260 /* If we are (becoming) SyncSource, but peer is still in sync
3261 * preparation, ignore its uptodate-ness to avoid flapping, it
3262 * will change to inconsistent once the peer reaches active
3263 * syncing states.
3264 * It may have changed syncer-paused flags, however, so we
3265 * cannot ignore this completely. */
3266 if (peer_state.conn > C_CONNECTED &&
3267 peer_state.conn < C_SYNC_SOURCE)
3268 real_peer_disk = D_INCONSISTENT;
3269
3270 /* if peer_state changes to connected at the same time,
3271 * it explicitly notifies us that it finished resync.
3272 * Maybe we should finish it up, too? */
3273 else if (os.conn >= C_SYNC_SOURCE &&
3274 peer_state.conn == C_CONNECTED) {
3275 if (drbd_bm_total_weight(mdev) <= mdev->rs_failed)
3276 drbd_resync_finished(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003277 return true;
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003278 }
3279 }
3280
3281 /* peer says his disk is inconsistent, while we think it is uptodate,
3282 * and this happens while the peer still thinks we have a sync going on,
3283 * but we think we are already done with the sync.
3284 * We ignore this to avoid flapping pdsk.
3285 * This should not happen, if the peer is a recent version of drbd. */
3286 if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT &&
3287 os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE)
3288 real_peer_disk = D_UP_TO_DATE;
3289
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003290 if (ns.conn == C_WF_REPORT_PARAMS)
3291 ns.conn = C_CONNECTED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003292
Philipp Reisner67531712010-10-27 12:21:30 +02003293 if (peer_state.conn == C_AHEAD)
3294 ns.conn = C_BEHIND;
3295
Philipp Reisnerb411b362009-09-25 16:07:19 -07003296 if (mdev->p_uuid && peer_state.disk >= D_NEGOTIATING &&
3297 get_ldev_if_state(mdev, D_NEGOTIATING)) {
3298 int cr; /* consider resync */
3299
3300 /* if we established a new connection */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003301 cr = (os.conn < C_CONNECTED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003302 /* if we had an established connection
3303 * and one of the nodes newly attaches a disk */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003304 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003305 (peer_state.disk == D_NEGOTIATING ||
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003306 os.disk == D_NEGOTIATING));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003307 /* if we have both been inconsistent, and the peer has been
3308 * forced to be UpToDate with --overwrite-data */
3309 cr |= test_bit(CONSIDER_RESYNC, &mdev->flags);
3310 /* if we had been plain connected, and the admin requested to
3311 * start a sync by "invalidate" or "invalidate-remote" */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003312 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003313 (peer_state.conn >= C_STARTING_SYNC_S &&
3314 peer_state.conn <= C_WF_BITMAP_T));
3315
3316 if (cr)
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003317 ns.conn = drbd_sync_handshake(mdev, peer_state.role, real_peer_disk);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003318
3319 put_ldev(mdev);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003320 if (ns.conn == C_MASK) {
3321 ns.conn = C_CONNECTED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003322 if (mdev->state.disk == D_NEGOTIATING) {
Lars Ellenberg82f59cc2010-10-16 12:13:47 +02003323 drbd_force_state(mdev, NS(disk, D_FAILED));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003324 } else if (peer_state.disk == D_NEGOTIATING) {
3325 dev_err(DEV, "Disk attach process on the peer node was aborted.\n");
3326 peer_state.disk = D_DISKLESS;
Lars Ellenberg580b9762010-02-26 23:15:23 +01003327 real_peer_disk = D_DISKLESS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003328 } else {
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003329 if (test_and_clear_bit(CONN_DRY_RUN, &mdev->flags))
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003330 return false;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003331 D_ASSERT(os.conn == C_WF_REPORT_PARAMS);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003332 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003333 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003334 }
3335 }
3336 }
3337
Philipp Reisner87eeee42011-01-19 14:16:30 +01003338 spin_lock_irq(&mdev->tconn->req_lock);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003339 if (mdev->state.i != os.i)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003340 goto retry;
3341 clear_bit(CONSIDER_RESYNC, &mdev->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003342 ns.peer = peer_state.role;
3343 ns.pdsk = real_peer_disk;
3344 ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003345 if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003346 ns.disk = mdev->new_state_tmp.disk;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003347 cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
3348 if (ns.pdsk == D_CONSISTENT && is_susp(ns) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
Philipp Reisner481c6f52010-06-22 14:03:27 +02003349 test_bit(NEW_CUR_UUID, &mdev->flags)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01003350 /* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this
Philipp Reisner481c6f52010-06-22 14:03:27 +02003351 for temporal network outages! */
Philipp Reisner87eeee42011-01-19 14:16:30 +01003352 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisner481c6f52010-06-22 14:03:27 +02003353 dev_err(DEV, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
3354 tl_clear(mdev);
3355 drbd_uuid_new_current(mdev);
3356 clear_bit(NEW_CUR_UUID, &mdev->flags);
3357 drbd_force_state(mdev, NS2(conn, C_PROTOCOL_ERROR, susp, 0));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003358 return false;
Philipp Reisner481c6f52010-06-22 14:03:27 +02003359 }
Philipp Reisner65d922c2010-06-16 16:18:09 +02003360 rv = _drbd_set_state(mdev, ns, cs_flags, NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003361 ns = mdev->state;
Philipp Reisner87eeee42011-01-19 14:16:30 +01003362 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003363
3364 if (rv < SS_SUCCESS) {
3365 drbd_force_state(mdev, NS(conn, C_DISCONNECTING));
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003366 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003367 }
3368
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003369 if (os.conn > C_WF_REPORT_PARAMS) {
3370 if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003371 peer_state.disk != D_NEGOTIATING ) {
3372 /* we want resync, peer has not yet decided to sync... */
3373 /* Nowadays only used when forcing a node into primary role and
3374 setting its disk to UpToDate with that */
3375 drbd_send_uuids(mdev);
3376 drbd_send_state(mdev);
3377 }
3378 }
3379
Philipp Reisner89e58e72011-01-19 13:12:45 +01003380 mdev->tconn->net_conf->want_lose = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003381
3382 drbd_md_sync(mdev); /* update connected indicator, la_size, ... */
3383
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003384 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003385}
3386
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003387static int receive_sync_uuid(struct drbd_conf *mdev, enum drbd_packet cmd,
3388 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003389{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003390 struct p_rs_uuid *p = &mdev->tconn->data.rbuf.rs_uuid;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003391
3392 wait_event(mdev->misc_wait,
3393 mdev->state.conn == C_WF_SYNC_UUID ||
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02003394 mdev->state.conn == C_BEHIND ||
Philipp Reisnerb411b362009-09-25 16:07:19 -07003395 mdev->state.conn < C_CONNECTED ||
3396 mdev->state.disk < D_NEGOTIATING);
3397
3398 /* D_ASSERT( mdev->state.conn == C_WF_SYNC_UUID ); */
3399
Philipp Reisnerb411b362009-09-25 16:07:19 -07003400 /* Here the _drbd_uuid_ functions are right, current should
3401 _not_ be rotated into the history */
3402 if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
3403 _drbd_uuid_set(mdev, UI_CURRENT, be64_to_cpu(p->uuid));
3404 _drbd_uuid_set(mdev, UI_BITMAP, 0UL);
3405
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003406 drbd_print_uuids(mdev, "updated sync uuid");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003407 drbd_start_resync(mdev, C_SYNC_TARGET);
3408
3409 put_ldev(mdev);
3410 } else
3411 dev_err(DEV, "Ignoring SyncUUID packet!\n");
3412
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003413 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003414}
3415
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003416/**
3417 * receive_bitmap_plain
3418 *
3419 * Return 0 when done, 1 when another iteration is needed, and a negative error
3420 * code upon failure.
3421 */
3422static int
Philipp Reisner02918be2010-08-20 14:35:10 +02003423receive_bitmap_plain(struct drbd_conf *mdev, unsigned int data_size,
3424 unsigned long *buffer, struct bm_xfer_ctx *c)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003425{
3426 unsigned num_words = min_t(size_t, BM_PACKET_WORDS, c->bm_words - c->word_offset);
3427 unsigned want = num_words * sizeof(long);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003428 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003429
Philipp Reisner02918be2010-08-20 14:35:10 +02003430 if (want != data_size) {
3431 dev_err(DEV, "%s:want (%u) != data_size (%u)\n", __func__, want, data_size);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003432 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003433 }
3434 if (want == 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003435 return 0;
Philipp Reisnerde0ff332011-02-07 16:56:20 +01003436 err = drbd_recv(mdev->tconn, buffer, want);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003437 if (err != want) {
3438 if (err >= 0)
3439 err = -EIO;
3440 return err;
3441 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003442
3443 drbd_bm_merge_lel(mdev, c->word_offset, num_words, buffer);
3444
3445 c->word_offset += num_words;
3446 c->bit_offset = c->word_offset * BITS_PER_LONG;
3447 if (c->bit_offset > c->bm_bits)
3448 c->bit_offset = c->bm_bits;
3449
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003450 return 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003451}
3452
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003453/**
3454 * recv_bm_rle_bits
3455 *
3456 * Return 0 when done, 1 when another iteration is needed, and a negative error
3457 * code upon failure.
3458 */
3459static int
Philipp Reisnerb411b362009-09-25 16:07:19 -07003460recv_bm_rle_bits(struct drbd_conf *mdev,
3461 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003462 struct bm_xfer_ctx *c,
3463 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003464{
3465 struct bitstream bs;
3466 u64 look_ahead;
3467 u64 rl;
3468 u64 tmp;
3469 unsigned long s = c->bit_offset;
3470 unsigned long e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003471 int toggle = DCBP_get_start(p);
3472 int have;
3473 int bits;
3474
3475 bitstream_init(&bs, p->code, len, DCBP_get_pad_bits(p));
3476
3477 bits = bitstream_get_bits(&bs, &look_ahead, 64);
3478 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003479 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003480
3481 for (have = bits; have > 0; s += rl, toggle = !toggle) {
3482 bits = vli_decode_bits(&rl, look_ahead);
3483 if (bits <= 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003484 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003485
3486 if (toggle) {
3487 e = s + rl -1;
3488 if (e >= c->bm_bits) {
3489 dev_err(DEV, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003490 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003491 }
3492 _drbd_bm_set_bits(mdev, s, e);
3493 }
3494
3495 if (have < bits) {
3496 dev_err(DEV, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n",
3497 have, bits, look_ahead,
3498 (unsigned int)(bs.cur.b - p->code),
3499 (unsigned int)bs.buf_len);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003500 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003501 }
3502 look_ahead >>= bits;
3503 have -= bits;
3504
3505 bits = bitstream_get_bits(&bs, &tmp, 64 - have);
3506 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003507 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003508 look_ahead |= tmp << have;
3509 have += bits;
3510 }
3511
3512 c->bit_offset = s;
3513 bm_xfer_ctx_bit_to_word_offset(c);
3514
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003515 return (s != c->bm_bits);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003516}
3517
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003518/**
3519 * decode_bitmap_c
3520 *
3521 * Return 0 when done, 1 when another iteration is needed, and a negative error
3522 * code upon failure.
3523 */
3524static int
Philipp Reisnerb411b362009-09-25 16:07:19 -07003525decode_bitmap_c(struct drbd_conf *mdev,
3526 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003527 struct bm_xfer_ctx *c,
3528 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003529{
3530 if (DCBP_get_code(p) == RLE_VLI_Bits)
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003531 return recv_bm_rle_bits(mdev, p, c, len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003532
3533 /* other variants had been implemented for evaluation,
3534 * but have been dropped as this one turned out to be "best"
3535 * during all our tests. */
3536
3537 dev_err(DEV, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
3538 drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR));
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003539 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003540}
3541
3542void INFO_bm_xfer_stats(struct drbd_conf *mdev,
3543 const char *direction, struct bm_xfer_ctx *c)
3544{
3545 /* what would it take to transfer it "plaintext" */
Philipp Reisnerc0129492011-01-19 16:58:16 +01003546 unsigned plain = sizeof(struct p_header) *
Philipp Reisnerb411b362009-09-25 16:07:19 -07003547 ((c->bm_words+BM_PACKET_WORDS-1)/BM_PACKET_WORDS+1)
3548 + c->bm_words * sizeof(long);
3549 unsigned total = c->bytes[0] + c->bytes[1];
3550 unsigned r;
3551
3552 /* total can not be zero. but just in case: */
3553 if (total == 0)
3554 return;
3555
3556 /* don't report if not compressed */
3557 if (total >= plain)
3558 return;
3559
3560 /* total < plain. check for overflow, still */
3561 r = (total > UINT_MAX/1000) ? (total / (plain/1000))
3562 : (1000 * total / plain);
3563
3564 if (r > 1000)
3565 r = 1000;
3566
3567 r = 1000 - r;
3568 dev_info(DEV, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
3569 "total %u; compression: %u.%u%%\n",
3570 direction,
3571 c->bytes[1], c->packets[1],
3572 c->bytes[0], c->packets[0],
3573 total, r/10, r % 10);
3574}
3575
3576/* Since we are processing the bitfield from lower addresses to higher,
3577 it does not matter if the process it in 32 bit chunks or 64 bit
3578 chunks as long as it is little endian. (Understand it as byte stream,
3579 beginning with the lowest byte...) If we would use big endian
3580 we would need to process it from the highest address to the lowest,
3581 in order to be agnostic to the 32 vs 64 bits issue.
3582
3583 returns 0 on failure, 1 if we successfully received it. */
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003584static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packet cmd,
3585 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003586{
3587 struct bm_xfer_ctx c;
3588 void *buffer;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003589 int err;
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003590 int ok = false;
Philipp Reisner257d0af2011-01-26 12:15:29 +01003591 struct p_header *h = &mdev->tconn->data.rbuf.header;
Philipp Reisner77351055b2011-02-07 17:24:26 +01003592 struct packet_info pi;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003593
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003594 drbd_bm_lock(mdev, "receive bitmap", BM_LOCKED_SET_ALLOWED);
3595 /* you are supposed to send additional out-of-sync information
3596 * if you actually set bits during this phase */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003597
3598 /* maybe we should use some per thread scratch page,
3599 * and allocate that during initial device creation? */
3600 buffer = (unsigned long *) __get_free_page(GFP_NOIO);
3601 if (!buffer) {
3602 dev_err(DEV, "failed to allocate one page buffer in %s\n", __func__);
3603 goto out;
3604 }
3605
3606 c = (struct bm_xfer_ctx) {
3607 .bm_bits = drbd_bm_bits(mdev),
3608 .bm_words = drbd_bm_words(mdev),
3609 };
3610
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003611 for(;;) {
Philipp Reisner02918be2010-08-20 14:35:10 +02003612 if (cmd == P_BITMAP) {
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003613 err = receive_bitmap_plain(mdev, data_size, buffer, &c);
Philipp Reisner02918be2010-08-20 14:35:10 +02003614 } else if (cmd == P_COMPRESSED_BITMAP) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003615 /* MAYBE: sanity check that we speak proto >= 90,
3616 * and the feature is enabled! */
3617 struct p_compressed_bm *p;
3618
Philipp Reisner02918be2010-08-20 14:35:10 +02003619 if (data_size > BM_PACKET_PAYLOAD_BYTES) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003620 dev_err(DEV, "ReportCBitmap packet too large\n");
3621 goto out;
3622 }
3623 /* use the page buff */
3624 p = buffer;
3625 memcpy(p, h, sizeof(*h));
Philipp Reisnerde0ff332011-02-07 16:56:20 +01003626 if (drbd_recv(mdev->tconn, p->head.payload, data_size) != data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003627 goto out;
Lars Ellenberg004352f2010-10-05 20:13:58 +02003628 if (data_size <= (sizeof(*p) - sizeof(p->head))) {
3629 dev_err(DEV, "ReportCBitmap packet too small (l:%u)\n", data_size);
Andreas Gruenbacher78fcbda2010-12-10 22:18:27 +01003630 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003631 }
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01003632 err = decode_bitmap_c(mdev, p, &c, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003633 } else {
Philipp Reisner02918be2010-08-20 14:35:10 +02003634 dev_warn(DEV, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003635 goto out;
3636 }
3637
Philipp Reisner02918be2010-08-20 14:35:10 +02003638 c.packets[cmd == P_BITMAP]++;
Philipp Reisner257d0af2011-01-26 12:15:29 +01003639 c.bytes[cmd == P_BITMAP] += sizeof(struct p_header) + data_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003640
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003641 if (err <= 0) {
3642 if (err < 0)
3643 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003644 break;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003645 }
Philipp Reisner9ba7aa02011-02-07 17:32:41 +01003646 if (!drbd_recv_header(mdev->tconn, &pi))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003647 goto out;
Philipp Reisner77351055b2011-02-07 17:24:26 +01003648 cmd = pi.cmd;
3649 data_size = pi.size;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003650 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003651
3652 INFO_bm_xfer_stats(mdev, "receive", &c);
3653
3654 if (mdev->state.conn == C_WF_BITMAP_T) {
Andreas Gruenbacherde1f8e42010-12-10 21:04:00 +01003655 enum drbd_state_rv rv;
3656
Philipp Reisnerb411b362009-09-25 16:07:19 -07003657 ok = !drbd_send_bitmap(mdev);
3658 if (!ok)
3659 goto out;
3660 /* Omit CS_ORDERED with this state transition to avoid deadlocks. */
Andreas Gruenbacherde1f8e42010-12-10 21:04:00 +01003661 rv = _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
3662 D_ASSERT(rv == SS_SUCCESS);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003663 } else if (mdev->state.conn != C_WF_BITMAP_S) {
3664 /* admin may have requested C_DISCONNECTING,
3665 * other threads may have noticed network errors */
3666 dev_info(DEV, "unexpected cstate (%s) in receive_bitmap\n",
3667 drbd_conn_str(mdev->state.conn));
3668 }
3669
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003670 ok = true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003671 out:
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003672 drbd_bm_unlock(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003673 if (ok && mdev->state.conn == C_WF_BITMAP_S)
3674 drbd_start_resync(mdev, C_SYNC_SOURCE);
3675 free_page((unsigned long) buffer);
3676 return ok;
3677}
3678
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003679static int receive_skip(struct drbd_conf *mdev, enum drbd_packet cmd,
3680 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003681{
3682 /* TODO zero copy sink :) */
3683 static char sink[128];
3684 int size, want, r;
3685
Philipp Reisner02918be2010-08-20 14:35:10 +02003686 dev_warn(DEV, "skipping unknown optional packet type %d, l: %d!\n",
3687 cmd, data_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003688
Philipp Reisner02918be2010-08-20 14:35:10 +02003689 size = data_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003690 while (size > 0) {
3691 want = min_t(int, size, sizeof(sink));
Philipp Reisnerde0ff332011-02-07 16:56:20 +01003692 r = drbd_recv(mdev->tconn, sink, want);
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01003693 if (!expect(r > 0))
3694 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003695 size -= r;
3696 }
3697 return size == 0;
3698}
3699
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003700static int receive_UnplugRemote(struct drbd_conf *mdev, enum drbd_packet cmd,
3701 unsigned int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003702{
Philipp Reisnerb411b362009-09-25 16:07:19 -07003703 /* Make sure we've acked all the TCP data associated
3704 * with the data requests being unplugged */
Philipp Reisnere42325a2011-01-19 13:55:45 +01003705 drbd_tcp_quickack(mdev->tconn->data.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003706
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003707 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003708}
3709
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003710static int receive_out_of_sync(struct drbd_conf *mdev, enum drbd_packet cmd,
3711 unsigned int data_size)
Philipp Reisner73a01a12010-10-27 14:33:00 +02003712{
Philipp Reisnere42325a2011-01-19 13:55:45 +01003713 struct p_block_desc *p = &mdev->tconn->data.rbuf.block_desc;
Philipp Reisner73a01a12010-10-27 14:33:00 +02003714
Lars Ellenbergf735e3632010-12-17 21:06:18 +01003715 switch (mdev->state.conn) {
3716 case C_WF_SYNC_UUID:
3717 case C_WF_BITMAP_T:
3718 case C_BEHIND:
3719 break;
3720 default:
3721 dev_err(DEV, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n",
3722 drbd_conn_str(mdev->state.conn));
3723 }
3724
Philipp Reisner73a01a12010-10-27 14:33:00 +02003725 drbd_set_out_of_sync(mdev, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
3726
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003727 return true;
Philipp Reisner73a01a12010-10-27 14:33:00 +02003728}
3729
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01003730typedef int (*drbd_cmd_handler_f)(struct drbd_conf *, enum drbd_packet cmd,
3731 unsigned int to_receive);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003732
Philipp Reisner02918be2010-08-20 14:35:10 +02003733struct data_cmd {
3734 int expect_payload;
3735 size_t pkt_size;
3736 drbd_cmd_handler_f function;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003737};
3738
Philipp Reisner02918be2010-08-20 14:35:10 +02003739static struct data_cmd drbd_cmd_handler[] = {
3740 [P_DATA] = { 1, sizeof(struct p_data), receive_Data },
3741 [P_DATA_REPLY] = { 1, sizeof(struct p_data), receive_DataReply },
3742 [P_RS_DATA_REPLY] = { 1, sizeof(struct p_data), receive_RSDataReply } ,
3743 [P_BARRIER] = { 0, sizeof(struct p_barrier), receive_Barrier } ,
Philipp Reisner257d0af2011-01-26 12:15:29 +01003744 [P_BITMAP] = { 1, sizeof(struct p_header), receive_bitmap } ,
3745 [P_COMPRESSED_BITMAP] = { 1, sizeof(struct p_header), receive_bitmap } ,
3746 [P_UNPLUG_REMOTE] = { 0, sizeof(struct p_header), receive_UnplugRemote },
Philipp Reisner02918be2010-08-20 14:35:10 +02003747 [P_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
3748 [P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
Philipp Reisner257d0af2011-01-26 12:15:29 +01003749 [P_SYNC_PARAM] = { 1, sizeof(struct p_header), receive_SyncParam },
3750 [P_SYNC_PARAM89] = { 1, sizeof(struct p_header), receive_SyncParam },
Philipp Reisner02918be2010-08-20 14:35:10 +02003751 [P_PROTOCOL] = { 1, sizeof(struct p_protocol), receive_protocol },
3752 [P_UUIDS] = { 0, sizeof(struct p_uuids), receive_uuids },
3753 [P_SIZES] = { 0, sizeof(struct p_sizes), receive_sizes },
3754 [P_STATE] = { 0, sizeof(struct p_state), receive_state },
3755 [P_STATE_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_state },
3756 [P_SYNC_UUID] = { 0, sizeof(struct p_rs_uuid), receive_sync_uuid },
3757 [P_OV_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
3758 [P_OV_REPLY] = { 1, sizeof(struct p_block_req), receive_DataRequest },
3759 [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest },
3760 [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip },
Philipp Reisner73a01a12010-10-27 14:33:00 +02003761 [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
Philipp Reisner02918be2010-08-20 14:35:10 +02003762 /* anything missing from this table is in
3763 * the asender_tbl, see get_asender_cmd */
3764 [P_MAX_CMD] = { 0, 0, NULL },
3765};
3766
3767/* All handler functions that expect a sub-header get that sub-heder in
Philipp Reisnere42325a2011-01-19 13:55:45 +01003768 mdev->tconn->data.rbuf.header.head.payload.
Philipp Reisner02918be2010-08-20 14:35:10 +02003769
Philipp Reisnere42325a2011-01-19 13:55:45 +01003770 Usually in mdev->tconn->data.rbuf.header.head the callback can find the usual
Philipp Reisner02918be2010-08-20 14:35:10 +02003771 p_header, but they may not rely on that. Since there is also p_header95 !
3772 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003773
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003774static void drbdd(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003775{
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003776 struct p_header *header = &tconn->data.rbuf.header;
Philipp Reisner77351055b2011-02-07 17:24:26 +01003777 struct packet_info pi;
Philipp Reisner02918be2010-08-20 14:35:10 +02003778 size_t shs; /* sub header size */
3779 int rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003780
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003781 while (get_t_state(&tconn->receiver) == RUNNING) {
3782 drbd_thread_current_set_cpu(&tconn->receiver);
3783 if (!drbd_recv_header(tconn, &pi))
Philipp Reisner02918be2010-08-20 14:35:10 +02003784 goto err_out;
3785
Philipp Reisner77351055b2011-02-07 17:24:26 +01003786 if (unlikely(pi.cmd >= P_MAX_CMD || !drbd_cmd_handler[pi.cmd].function)) {
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003787 conn_err(tconn, "unknown packet type %d, l: %d!\n", pi.cmd, pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02003788 goto err_out;
Lars Ellenberg0b33a912009-11-16 15:58:04 +01003789 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003790
Philipp Reisner77351055b2011-02-07 17:24:26 +01003791 shs = drbd_cmd_handler[pi.cmd].pkt_size - sizeof(struct p_header);
3792 if (pi.size - shs > 0 && !drbd_cmd_handler[pi.cmd].expect_payload) {
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003793 conn_err(tconn, "No payload expected %s l:%d\n", cmdname(pi.cmd), pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02003794 goto err_out;
3795 }
3796
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02003797 if (shs) {
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003798 rv = drbd_recv(tconn, &header->payload, shs);
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02003799 if (unlikely(rv != shs)) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01003800 if (!signal_pending(current))
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003801 conn_warn(tconn, "short read while reading sub header: rv=%d\n", rv);
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02003802 goto err_out;
3803 }
3804 }
3805
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003806 rv = drbd_cmd_handler[pi.cmd].function(vnr_to_mdev(tconn, pi.vnr), pi.cmd, pi.size - shs);
Philipp Reisner02918be2010-08-20 14:35:10 +02003807
3808 if (unlikely(!rv)) {
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003809 conn_err(tconn, "error receiving %s, l: %d!\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01003810 cmdname(pi.cmd), pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02003811 goto err_out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003812 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003813 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003814
Philipp Reisner02918be2010-08-20 14:35:10 +02003815 if (0) {
3816 err_out:
Philipp Reisnereefc2f72011-02-08 12:55:24 +01003817 drbd_force_state(tconn->volume0, NS(conn, C_PROTOCOL_ERROR));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003818 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003819}
3820
Philipp Reisnera21e9292011-02-08 15:08:49 +01003821void drbd_flush_workqueue(struct drbd_conf *mdev)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003822{
3823 struct drbd_wq_barrier barr;
3824
3825 barr.w.cb = w_prev_work_done;
Philipp Reisnera21e9292011-02-08 15:08:49 +01003826 barr.w.mdev = mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003827 init_completion(&barr.done);
Philipp Reisnera21e9292011-02-08 15:08:49 +01003828 drbd_queue_work(&mdev->tconn->data.work, &barr.w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003829 wait_for_completion(&barr.done);
3830}
3831
Philipp Reisner360cc742011-02-08 14:29:53 +01003832static void drbd_disconnect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003833{
Philipp Reisnerb411b362009-09-25 16:07:19 -07003834 union drbd_state os, ns;
3835 int rv = SS_UNKNOWN_ERROR;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003836
Philipp Reisner360cc742011-02-08 14:29:53 +01003837 if (tconn->volume0->state.conn == C_STANDALONE)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003838 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003839
3840 /* asender does not clean up anything. it must not interfere, either */
Philipp Reisner360cc742011-02-08 14:29:53 +01003841 drbd_thread_stop(&tconn->asender);
3842 drbd_free_sock(tconn);
3843
3844 idr_for_each(&tconn->volumes, drbd_disconnected, tconn);
3845
3846 conn_info(tconn, "Connection closed\n");
3847
3848 spin_lock_irq(&tconn->req_lock);
3849 os = tconn->volume0->state;
3850 if (os.conn >= C_UNCONNECTED) {
3851 /* Do not restart in case we are C_DISCONNECTING */
3852 ns.i = os.i;
3853 ns.conn = C_UNCONNECTED;
3854 rv = _drbd_set_state(tconn->volume0, ns, CS_VERBOSE, NULL);
3855 }
3856 spin_unlock_irq(&tconn->req_lock);
3857
3858 if (os.conn == C_DISCONNECTING) {
3859 wait_event(tconn->net_cnt_wait, atomic_read(&tconn->net_cnt) == 0);
3860
3861 crypto_free_hash(tconn->cram_hmac_tfm);
3862 tconn->cram_hmac_tfm = NULL;
3863
3864 kfree(tconn->net_conf);
3865 tconn->net_conf = NULL;
3866 drbd_request_state(tconn->volume0, NS(conn, C_STANDALONE));
3867 }
3868}
3869
3870static int drbd_disconnected(int vnr, void *p, void *data)
3871{
3872 struct drbd_conf *mdev = (struct drbd_conf *)p;
3873 enum drbd_fencing_p fp;
3874 unsigned int i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003875
Philipp Reisner85719572010-07-21 10:20:17 +02003876 /* wait for current activity to cease. */
Philipp Reisner87eeee42011-01-19 14:16:30 +01003877 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003878 _drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
3879 _drbd_wait_ee_list_empty(mdev, &mdev->sync_ee);
3880 _drbd_wait_ee_list_empty(mdev, &mdev->read_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01003881 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003882
3883 /* We do not have data structures that would allow us to
3884 * get the rs_pending_cnt down to 0 again.
3885 * * On C_SYNC_TARGET we do not have any data structures describing
3886 * the pending RSDataRequest's we have sent.
3887 * * On C_SYNC_SOURCE there is no data structure that tracks
3888 * the P_RS_DATA_REPLY blocks that we sent to the SyncTarget.
3889 * And no, it is not the sum of the reference counts in the
3890 * resync_LRU. The resync_LRU tracks the whole operation including
3891 * the disk-IO, while the rs_pending_cnt only tracks the blocks
3892 * on the fly. */
3893 drbd_rs_cancel_all(mdev);
3894 mdev->rs_total = 0;
3895 mdev->rs_failed = 0;
3896 atomic_set(&mdev->rs_pending_cnt, 0);
3897 wake_up(&mdev->misc_wait);
3898
Philipp Reisner7fde2be2011-03-01 11:08:28 +01003899 del_timer(&mdev->request_timer);
3900
Philipp Reisnerb411b362009-09-25 16:07:19 -07003901 /* make sure syncer is stopped and w_resume_next_sg queued */
3902 del_timer_sync(&mdev->resync_timer);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003903 resync_timer_fn((unsigned long)mdev);
3904
Philipp Reisnerb411b362009-09-25 16:07:19 -07003905 /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
3906 * w_make_resync_request etc. which may still be on the worker queue
3907 * to be "canceled" */
Philipp Reisnera21e9292011-02-08 15:08:49 +01003908 drbd_flush_workqueue(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003909
3910 /* This also does reclaim_net_ee(). If we do this too early, we might
3911 * miss some resync ee and pages.*/
3912 drbd_process_done_ee(mdev);
3913
3914 kfree(mdev->p_uuid);
3915 mdev->p_uuid = NULL;
3916
Philipp Reisnerfb22c402010-09-08 23:20:21 +02003917 if (!is_susp(mdev->state))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003918 tl_clear(mdev);
3919
Philipp Reisnerb411b362009-09-25 16:07:19 -07003920 drbd_md_sync(mdev);
3921
3922 fp = FP_DONT_CARE;
3923 if (get_ldev(mdev)) {
3924 fp = mdev->ldev->dc.fencing;
3925 put_ldev(mdev);
3926 }
3927
Philipp Reisner87f7be42010-06-11 13:56:33 +02003928 if (mdev->state.role == R_PRIMARY && fp >= FP_RESOURCE && mdev->state.pdsk >= D_UNKNOWN)
3929 drbd_try_outdate_peer_async(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003930
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003931 /* serialize with bitmap writeout triggered by the state change,
3932 * if any. */
3933 wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
3934
Philipp Reisnerb411b362009-09-25 16:07:19 -07003935 /* tcp_close and release of sendpage pages can be deferred. I don't
3936 * want to use SO_LINGER, because apparently it can be deferred for
3937 * more than 20 seconds (longest time I checked).
3938 *
3939 * Actually we don't care for exactly when the network stack does its
3940 * put_page(), but release our reference on these pages right here.
3941 */
3942 i = drbd_release_ee(mdev, &mdev->net_ee);
3943 if (i)
3944 dev_info(DEV, "net_ee not empty, killed %u entries\n", i);
Lars Ellenberg435f0742010-09-06 12:30:25 +02003945 i = atomic_read(&mdev->pp_in_use_by_net);
3946 if (i)
3947 dev_info(DEV, "pp_in_use_by_net = %d, expected 0\n", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003948 i = atomic_read(&mdev->pp_in_use);
3949 if (i)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02003950 dev_info(DEV, "pp_in_use = %d, expected 0\n", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003951
3952 D_ASSERT(list_empty(&mdev->read_ee));
3953 D_ASSERT(list_empty(&mdev->active_ee));
3954 D_ASSERT(list_empty(&mdev->sync_ee));
3955 D_ASSERT(list_empty(&mdev->done_ee));
3956
3957 /* ok, no more ee's on the fly, it is safe to reset the epoch_size */
3958 atomic_set(&mdev->current_epoch->epoch_size, 0);
3959 D_ASSERT(list_empty(&mdev->current_epoch->list));
Philipp Reisner360cc742011-02-08 14:29:53 +01003960
3961 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003962}
3963
3964/*
3965 * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version
3966 * we can agree on is stored in agreed_pro_version.
3967 *
3968 * feature flags and the reserved array should be enough room for future
3969 * enhancements of the handshake protocol, and possible plugins...
3970 *
3971 * for now, they are expected to be zero, but ignored.
3972 */
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01003973static int drbd_send_handshake(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003974{
Philipp Reisnere6b3ea82011-01-19 14:02:01 +01003975 /* ASSERT current == mdev->tconn->receiver ... */
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01003976 struct p_handshake *p = &tconn->data.sbuf.handshake;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003977 int ok;
3978
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01003979 if (mutex_lock_interruptible(&tconn->data.mutex)) {
3980 conn_err(tconn, "interrupted during initial handshake\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003981 return 0; /* interrupted. not ok. */
3982 }
3983
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01003984 if (tconn->data.socket == NULL) {
3985 mutex_unlock(&tconn->data.mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003986 return 0;
3987 }
3988
3989 memset(p, 0, sizeof(*p));
3990 p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
3991 p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
Philipp Reisner8a22ccc2011-02-07 16:47:12 +01003992 ok = _conn_send_cmd(tconn, 0, tconn->data.socket, P_HAND_SHAKE,
3993 &p->head, sizeof(*p), 0);
3994 mutex_unlock(&tconn->data.mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003995 return ok;
3996}
3997
3998/*
3999 * return values:
4000 * 1 yes, we have a valid connection
4001 * 0 oops, did not work out, please try again
4002 * -1 peer talks different language,
4003 * no point in trying again, please go standalone.
4004 */
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004005static int drbd_do_handshake(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004006{
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004007 /* ASSERT current == tconn->receiver ... */
4008 struct p_handshake *p = &tconn->data.rbuf.handshake;
Philipp Reisner02918be2010-08-20 14:35:10 +02004009 const int expect = sizeof(struct p_handshake) - sizeof(struct p_header80);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004010 struct packet_info pi;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004011 int rv;
4012
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004013 rv = drbd_send_handshake(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004014 if (!rv)
4015 return 0;
4016
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004017 rv = drbd_recv_header(tconn, &pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004018 if (!rv)
4019 return 0;
4020
Philipp Reisner77351055b2011-02-07 17:24:26 +01004021 if (pi.cmd != P_HAND_SHAKE) {
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004022 conn_err(tconn, "expected HandShake packet, received: %s (0x%04x)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004023 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004024 return -1;
4025 }
4026
Philipp Reisner77351055b2011-02-07 17:24:26 +01004027 if (pi.size != expect) {
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004028 conn_err(tconn, "expected HandShake length: %u, received: %u\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004029 expect, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004030 return -1;
4031 }
4032
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004033 rv = drbd_recv(tconn, &p->head.payload, expect);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004034
4035 if (rv != expect) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01004036 if (!signal_pending(current))
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004037 conn_warn(tconn, "short read receiving handshake packet: l=%u\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004038 return 0;
4039 }
4040
Philipp Reisnerb411b362009-09-25 16:07:19 -07004041 p->protocol_min = be32_to_cpu(p->protocol_min);
4042 p->protocol_max = be32_to_cpu(p->protocol_max);
4043 if (p->protocol_max == 0)
4044 p->protocol_max = p->protocol_min;
4045
4046 if (PRO_VERSION_MAX < p->protocol_min ||
4047 PRO_VERSION_MIN > p->protocol_max)
4048 goto incompat;
4049
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004050 tconn->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004051
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004052 conn_info(tconn, "Handshake successful: "
4053 "Agreed network protocol version %d\n", tconn->agreed_pro_version);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004054
4055 return 1;
4056
4057 incompat:
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004058 conn_err(tconn, "incompatible DRBD dialects: "
Philipp Reisnerb411b362009-09-25 16:07:19 -07004059 "I support %d-%d, peer supports %d-%d\n",
4060 PRO_VERSION_MIN, PRO_VERSION_MAX,
4061 p->protocol_min, p->protocol_max);
4062 return -1;
4063}
4064
4065#if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
Philipp Reisner13e60372011-02-08 09:54:40 +01004066static int drbd_do_auth(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004067{
4068 dev_err(DEV, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
4069 dev_err(DEV, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004070 return -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004071}
4072#else
4073#define CHALLENGE_LEN 64
Johannes Thomab10d96c2010-01-07 16:02:50 +01004074
4075/* Return value:
4076 1 - auth succeeded,
4077 0 - failed, try again (network error),
4078 -1 - auth failed, don't try again.
4079*/
4080
Philipp Reisner13e60372011-02-08 09:54:40 +01004081static int drbd_do_auth(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004082{
4083 char my_challenge[CHALLENGE_LEN]; /* 64 Bytes... */
4084 struct scatterlist sg;
4085 char *response = NULL;
4086 char *right_response = NULL;
4087 char *peers_ch = NULL;
Philipp Reisner13e60372011-02-08 09:54:40 +01004088 unsigned int key_len = strlen(tconn->net_conf->shared_secret);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004089 unsigned int resp_size;
4090 struct hash_desc desc;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004091 struct packet_info pi;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004092 int rv;
4093
Philipp Reisner13e60372011-02-08 09:54:40 +01004094 desc.tfm = tconn->cram_hmac_tfm;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004095 desc.flags = 0;
4096
Philipp Reisner13e60372011-02-08 09:54:40 +01004097 rv = crypto_hash_setkey(tconn->cram_hmac_tfm,
4098 (u8 *)tconn->net_conf->shared_secret, key_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004099 if (rv) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004100 conn_err(tconn, "crypto_hash_setkey() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004101 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004102 goto fail;
4103 }
4104
4105 get_random_bytes(my_challenge, CHALLENGE_LEN);
4106
Philipp Reisner13e60372011-02-08 09:54:40 +01004107 rv = conn_send_cmd2(tconn, P_AUTH_CHALLENGE, my_challenge, CHALLENGE_LEN);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004108 if (!rv)
4109 goto fail;
4110
Philipp Reisner13e60372011-02-08 09:54:40 +01004111 rv = drbd_recv_header(tconn, &pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004112 if (!rv)
4113 goto fail;
4114
Philipp Reisner77351055b2011-02-07 17:24:26 +01004115 if (pi.cmd != P_AUTH_CHALLENGE) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004116 conn_err(tconn, "expected AuthChallenge packet, received: %s (0x%04x)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004117 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004118 rv = 0;
4119 goto fail;
4120 }
4121
Philipp Reisner77351055b2011-02-07 17:24:26 +01004122 if (pi.size > CHALLENGE_LEN * 2) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004123 conn_err(tconn, "expected AuthChallenge payload too big.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004124 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004125 goto fail;
4126 }
4127
Philipp Reisner77351055b2011-02-07 17:24:26 +01004128 peers_ch = kmalloc(pi.size, GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004129 if (peers_ch == NULL) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004130 conn_err(tconn, "kmalloc of peers_ch failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004131 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004132 goto fail;
4133 }
4134
Philipp Reisner13e60372011-02-08 09:54:40 +01004135 rv = drbd_recv(tconn, peers_ch, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004136
Philipp Reisner77351055b2011-02-07 17:24:26 +01004137 if (rv != pi.size) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01004138 if (!signal_pending(current))
Philipp Reisner13e60372011-02-08 09:54:40 +01004139 conn_warn(tconn, "short read AuthChallenge: l=%u\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004140 rv = 0;
4141 goto fail;
4142 }
4143
Philipp Reisner13e60372011-02-08 09:54:40 +01004144 resp_size = crypto_hash_digestsize(tconn->cram_hmac_tfm);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004145 response = kmalloc(resp_size, GFP_NOIO);
4146 if (response == NULL) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004147 conn_err(tconn, "kmalloc of response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004148 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004149 goto fail;
4150 }
4151
4152 sg_init_table(&sg, 1);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004153 sg_set_buf(&sg, peers_ch, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004154
4155 rv = crypto_hash_digest(&desc, &sg, sg.length, response);
4156 if (rv) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004157 conn_err(tconn, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004158 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004159 goto fail;
4160 }
4161
Philipp Reisner13e60372011-02-08 09:54:40 +01004162 rv = conn_send_cmd2(tconn, P_AUTH_RESPONSE, response, resp_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004163 if (!rv)
4164 goto fail;
4165
Philipp Reisner13e60372011-02-08 09:54:40 +01004166 rv = drbd_recv_header(tconn, &pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004167 if (!rv)
4168 goto fail;
4169
Philipp Reisner77351055b2011-02-07 17:24:26 +01004170 if (pi.cmd != P_AUTH_RESPONSE) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004171 conn_err(tconn, "expected AuthResponse packet, received: %s (0x%04x)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004172 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004173 rv = 0;
4174 goto fail;
4175 }
4176
Philipp Reisner77351055b2011-02-07 17:24:26 +01004177 if (pi.size != resp_size) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004178 conn_err(tconn, "expected AuthResponse payload of wrong size\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004179 rv = 0;
4180 goto fail;
4181 }
4182
Philipp Reisner13e60372011-02-08 09:54:40 +01004183 rv = drbd_recv(tconn, response , resp_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004184
4185 if (rv != resp_size) {
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01004186 if (!signal_pending(current))
Philipp Reisner13e60372011-02-08 09:54:40 +01004187 conn_warn(tconn, "short read receiving AuthResponse: l=%u\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004188 rv = 0;
4189 goto fail;
4190 }
4191
4192 right_response = kmalloc(resp_size, GFP_NOIO);
Julia Lawall2d1ee872009-12-27 22:27:11 +01004193 if (right_response == NULL) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004194 conn_err(tconn, "kmalloc of right_response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004195 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004196 goto fail;
4197 }
4198
4199 sg_set_buf(&sg, my_challenge, CHALLENGE_LEN);
4200
4201 rv = crypto_hash_digest(&desc, &sg, sg.length, right_response);
4202 if (rv) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004203 conn_err(tconn, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004204 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004205 goto fail;
4206 }
4207
4208 rv = !memcmp(response, right_response, resp_size);
4209
4210 if (rv)
Philipp Reisner13e60372011-02-08 09:54:40 +01004211 conn_info(tconn, "Peer authenticated using %d bytes of '%s' HMAC\n",
4212 resp_size, tconn->net_conf->cram_hmac_alg);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004213 else
4214 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004215
4216 fail:
4217 kfree(peers_ch);
4218 kfree(response);
4219 kfree(right_response);
4220
4221 return rv;
4222}
4223#endif
4224
4225int drbdd_init(struct drbd_thread *thi)
4226{
Philipp Reisner392c8802011-02-09 10:33:31 +01004227 struct drbd_tconn *tconn = thi->tconn;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004228 int h;
4229
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004230 conn_info(tconn, "receiver (re)started\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004231
4232 do {
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004233 h = drbd_connect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004234 if (h == 0) {
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004235 drbd_disconnect(tconn);
Philipp Reisner20ee6392011-01-18 15:28:59 +01004236 schedule_timeout_interruptible(HZ);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004237 }
4238 if (h == -1) {
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004239 conn_warn(tconn, "Discarding network configuration.\n");
4240 drbd_force_state(tconn->volume0, NS(conn, C_DISCONNECTING));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004241 }
4242 } while (h == 0);
4243
4244 if (h > 0) {
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004245 if (get_net_conf(tconn)) {
4246 drbdd(tconn);
4247 put_net_conf(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004248 }
4249 }
4250
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004251 drbd_disconnect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004252
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004253 conn_info(tconn, "receiver terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004254 return 0;
4255}
4256
4257/* ********* acknowledge sender ******** */
4258
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004259static int got_RqSReply(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004260{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004261 struct p_req_state_reply *p = &mdev->tconn->meta.rbuf.req_state_reply;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004262
4263 int retcode = be32_to_cpu(p->retcode);
4264
4265 if (retcode >= SS_SUCCESS) {
4266 set_bit(CL_ST_CHG_SUCCESS, &mdev->flags);
4267 } else {
4268 set_bit(CL_ST_CHG_FAIL, &mdev->flags);
4269 dev_err(DEV, "Requested state change failed by peer: %s (%d)\n",
4270 drbd_set_st_err_str(retcode), retcode);
4271 }
4272 wake_up(&mdev->state_wait);
4273
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004274 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004275}
4276
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004277static int got_Ping(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004278{
4279 return drbd_send_ping_ack(mdev);
4280
4281}
4282
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004283static int got_PingAck(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004284{
4285 /* restore idle timeout */
Philipp Reisnere42325a2011-01-19 13:55:45 +01004286 mdev->tconn->meta.socket->sk->sk_rcvtimeo = mdev->tconn->net_conf->ping_int*HZ;
Philipp Reisner309d1602010-03-02 15:03:44 +01004287 if (!test_and_set_bit(GOT_PING_ACK, &mdev->flags))
4288 wake_up(&mdev->misc_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004289
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004290 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004291}
4292
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004293static int got_IsInSync(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004294{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004295 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004296 sector_t sector = be64_to_cpu(p->sector);
4297 int blksize = be32_to_cpu(p->blksize);
4298
Philipp Reisner31890f42011-01-19 14:12:51 +01004299 D_ASSERT(mdev->tconn->agreed_pro_version >= 89);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004300
4301 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4302
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004303 if (get_ldev(mdev)) {
4304 drbd_rs_complete_io(mdev, sector);
4305 drbd_set_in_sync(mdev, sector, blksize);
4306 /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
4307 mdev->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
4308 put_ldev(mdev);
4309 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004310 dec_rs_pending(mdev);
Philipp Reisner778f2712010-07-06 11:14:00 +02004311 atomic_add(blksize >> 9, &mdev->rs_sect_in);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004312
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004313 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004314}
4315
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004316static int
4317validate_req_change_req_state(struct drbd_conf *mdev, u64 id, sector_t sector,
4318 struct rb_root *root, const char *func,
4319 enum drbd_req_event what, bool missing_ok)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004320{
4321 struct drbd_request *req;
4322 struct bio_and_error m;
4323
Philipp Reisner87eeee42011-01-19 14:16:30 +01004324 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004325 req = find_request(mdev, root, id, sector, missing_ok, func);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004326 if (unlikely(!req)) {
Philipp Reisner87eeee42011-01-19 14:16:30 +01004327 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004328 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004329 }
4330 __req_mod(req, what, &m);
Philipp Reisner87eeee42011-01-19 14:16:30 +01004331 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004332
4333 if (m.bio)
4334 complete_master_bio(mdev, &m);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004335 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004336}
4337
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004338static int got_BlockAck(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004339{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004340 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004341 sector_t sector = be64_to_cpu(p->sector);
4342 int blksize = be32_to_cpu(p->blksize);
4343 enum drbd_req_event what;
4344
4345 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4346
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01004347 if (p->block_id == ID_SYNCER) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004348 drbd_set_in_sync(mdev, sector, blksize);
4349 dec_rs_pending(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004350 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004351 }
Philipp Reisner257d0af2011-01-26 12:15:29 +01004352 switch (cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004353 case P_RS_WRITE_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004354 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004355 what = WRITE_ACKED_BY_PEER_AND_SIS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004356 break;
4357 case P_WRITE_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004358 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004359 what = WRITE_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004360 break;
4361 case P_RECV_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004362 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_B);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004363 what = RECV_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004364 break;
4365 case P_DISCARD_ACK:
Philipp Reisner89e58e72011-01-19 13:12:45 +01004366 D_ASSERT(mdev->tconn->net_conf->wire_protocol == DRBD_PROT_C);
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004367 what = CONFLICT_DISCARDED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004368 break;
4369 default:
4370 D_ASSERT(0);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004371 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004372 }
4373
4374 return validate_req_change_req_state(mdev, p->block_id, sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004375 &mdev->write_requests, __func__,
4376 what, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004377}
4378
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004379static int got_NegAck(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004380{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004381 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004382 sector_t sector = be64_to_cpu(p->sector);
Philipp Reisner2deb8332011-01-17 18:39:18 +01004383 int size = be32_to_cpu(p->blksize);
Philipp Reisner89e58e72011-01-19 13:12:45 +01004384 bool missing_ok = mdev->tconn->net_conf->wire_protocol == DRBD_PROT_A ||
4385 mdev->tconn->net_conf->wire_protocol == DRBD_PROT_B;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004386 bool found;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004387
4388 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4389
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01004390 if (p->block_id == ID_SYNCER) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004391 dec_rs_pending(mdev);
4392 drbd_rs_failed_io(mdev, sector, size);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004393 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004394 }
Philipp Reisner2deb8332011-01-17 18:39:18 +01004395
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004396 found = validate_req_change_req_state(mdev, p->block_id, sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004397 &mdev->write_requests, __func__,
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004398 NEG_ACKED, missing_ok);
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004399 if (!found) {
4400 /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs.
4401 The master bio might already be completed, therefore the
4402 request is no longer in the collision hash. */
4403 /* In Protocol B we might already have got a P_RECV_ACK
4404 but then get a P_NEG_ACK afterwards. */
4405 if (!missing_ok)
Philipp Reisner2deb8332011-01-17 18:39:18 +01004406 return false;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004407 drbd_set_out_of_sync(mdev, sector, size);
Philipp Reisner2deb8332011-01-17 18:39:18 +01004408 }
Philipp Reisner2deb8332011-01-17 18:39:18 +01004409 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004410}
4411
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004412static int got_NegDReply(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004413{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004414 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004415 sector_t sector = be64_to_cpu(p->sector);
4416
4417 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4418 dev_err(DEV, "Got NegDReply; Sector %llus, len %u; Fail original request.\n",
4419 (unsigned long long)sector, be32_to_cpu(p->blksize));
4420
4421 return validate_req_change_req_state(mdev, p->block_id, sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004422 &mdev->read_requests, __func__,
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004423 NEG_ACKED, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004424}
4425
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004426static int got_NegRSDReply(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004427{
4428 sector_t sector;
4429 int size;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004430 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004431
4432 sector = be64_to_cpu(p->sector);
4433 size = be32_to_cpu(p->blksize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004434
4435 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4436
4437 dec_rs_pending(mdev);
4438
4439 if (get_ldev_if_state(mdev, D_FAILED)) {
4440 drbd_rs_complete_io(mdev, sector);
Philipp Reisner257d0af2011-01-26 12:15:29 +01004441 switch (cmd) {
Philipp Reisnerd612d302010-12-27 10:53:28 +01004442 case P_NEG_RS_DREPLY:
4443 drbd_rs_failed_io(mdev, sector, size);
4444 case P_RS_CANCEL:
4445 break;
4446 default:
4447 D_ASSERT(0);
4448 put_ldev(mdev);
4449 return false;
4450 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004451 put_ldev(mdev);
4452 }
4453
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004454 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004455}
4456
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004457static int got_BarrierAck(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004458{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004459 struct p_barrier_ack *p = &mdev->tconn->meta.rbuf.barrier_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004460
4461 tl_release(mdev, p->barrier, be32_to_cpu(p->set_size));
4462
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02004463 if (mdev->state.conn == C_AHEAD &&
4464 atomic_read(&mdev->ap_in_flight) == 0 &&
Philipp Reisner370a43e2011-01-14 16:03:11 +01004465 !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &mdev->current_epoch->flags)) {
4466 mdev->start_resync_timer.expires = jiffies + HZ;
4467 add_timer(&mdev->start_resync_timer);
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02004468 }
4469
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004470 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004471}
4472
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004473static int got_OVResult(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004474{
Philipp Reisner257d0af2011-01-26 12:15:29 +01004475 struct p_block_ack *p = &mdev->tconn->meta.rbuf.block_ack;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004476 struct drbd_work *w;
4477 sector_t sector;
4478 int size;
4479
4480 sector = be64_to_cpu(p->sector);
4481 size = be32_to_cpu(p->blksize);
4482
4483 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4484
4485 if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
4486 drbd_ov_oos_found(mdev, sector, size);
4487 else
4488 ov_oos_print(mdev);
4489
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004490 if (!get_ldev(mdev))
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004491 return true;
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004492
Philipp Reisnerb411b362009-09-25 16:07:19 -07004493 drbd_rs_complete_io(mdev, sector);
4494 dec_rs_pending(mdev);
4495
Lars Ellenbergea5442a2010-11-05 09:48:01 +01004496 --mdev->ov_left;
4497
4498 /* let's advance progress step marks only for every other megabyte */
4499 if ((mdev->ov_left & 0x200) == 0x200)
4500 drbd_advance_rs_marks(mdev, mdev->ov_left);
4501
4502 if (mdev->ov_left == 0) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004503 w = kmalloc(sizeof(*w), GFP_NOIO);
4504 if (w) {
4505 w->cb = w_ov_finished;
Philipp Reisnera21e9292011-02-08 15:08:49 +01004506 w->mdev = mdev;
Philipp Reisnere42325a2011-01-19 13:55:45 +01004507 drbd_queue_work_front(&mdev->tconn->data.work, w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004508 } else {
4509 dev_err(DEV, "kmalloc(w) failed.");
4510 ov_oos_print(mdev);
4511 drbd_resync_finished(mdev);
4512 }
4513 }
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004514 put_ldev(mdev);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004515 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004516}
4517
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004518static int got_skip(struct drbd_conf *mdev, enum drbd_packet cmd)
Philipp Reisner0ced55a2010-04-30 15:26:20 +02004519{
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01004520 return true;
Philipp Reisner0ced55a2010-04-30 15:26:20 +02004521}
4522
Philipp Reisnerb411b362009-09-25 16:07:19 -07004523struct asender_cmd {
4524 size_t pkt_size;
Andreas Gruenbacherd8763022011-01-26 17:39:41 +01004525 int (*process)(struct drbd_conf *mdev, enum drbd_packet cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004526};
4527
4528static struct asender_cmd *get_asender_cmd(int cmd)
4529{
4530 static struct asender_cmd asender_tbl[] = {
4531 /* anything missing from this table is in
4532 * the drbd_cmd_handler (drbd_default_handler) table,
4533 * see the beginning of drbdd() */
Philipp Reisner257d0af2011-01-26 12:15:29 +01004534 [P_PING] = { sizeof(struct p_header), got_Ping },
4535 [P_PING_ACK] = { sizeof(struct p_header), got_PingAck },
Philipp Reisnerb411b362009-09-25 16:07:19 -07004536 [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
4537 [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
4538 [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
4539 [P_DISCARD_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
4540 [P_NEG_ACK] = { sizeof(struct p_block_ack), got_NegAck },
4541 [P_NEG_DREPLY] = { sizeof(struct p_block_ack), got_NegDReply },
4542 [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), got_NegRSDReply},
4543 [P_OV_RESULT] = { sizeof(struct p_block_ack), got_OVResult },
4544 [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck },
4545 [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
4546 [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync },
Philipp Reisner02918be2010-08-20 14:35:10 +02004547 [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip },
Philipp Reisnerd612d302010-12-27 10:53:28 +01004548 [P_RS_CANCEL] = { sizeof(struct p_block_ack), got_NegRSDReply},
Philipp Reisnerb411b362009-09-25 16:07:19 -07004549 [P_MAX_CMD] = { 0, NULL },
4550 };
4551 if (cmd > P_MAX_CMD || asender_tbl[cmd].process == NULL)
4552 return NULL;
4553 return &asender_tbl[cmd];
4554}
4555
Philipp Reisner32862ec2011-02-08 16:41:01 +01004556static int _drbd_process_done_ee(int vnr, void *p, void *data)
4557{
4558 struct drbd_conf *mdev = (struct drbd_conf *)p;
4559 return !drbd_process_done_ee(mdev);
4560}
4561
4562static int _check_ee_empty(int vnr, void *p, void *data)
4563{
4564 struct drbd_conf *mdev = (struct drbd_conf *)p;
4565 struct drbd_tconn *tconn = mdev->tconn;
4566 int not_empty;
4567
4568 spin_lock_irq(&tconn->req_lock);
4569 not_empty = !list_empty(&mdev->done_ee);
4570 spin_unlock_irq(&tconn->req_lock);
4571
4572 return not_empty;
4573}
4574
4575static int tconn_process_done_ee(struct drbd_tconn *tconn)
4576{
4577 int not_empty, err;
4578
4579 do {
4580 clear_bit(SIGNAL_ASENDER, &tconn->flags);
4581 flush_signals(current);
4582 err = idr_for_each(&tconn->volumes, _drbd_process_done_ee, NULL);
4583 if (err)
4584 return err;
4585 set_bit(SIGNAL_ASENDER, &tconn->flags);
4586 not_empty = idr_for_each(&tconn->volumes, _check_ee_empty, NULL);
4587 } while (not_empty);
4588
4589 return 0;
4590}
4591
Philipp Reisnerb411b362009-09-25 16:07:19 -07004592int drbd_asender(struct drbd_thread *thi)
4593{
Philipp Reisner392c8802011-02-09 10:33:31 +01004594 struct drbd_tconn *tconn = thi->tconn;
Philipp Reisner32862ec2011-02-08 16:41:01 +01004595 struct p_header *h = &tconn->meta.rbuf.header;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004596 struct asender_cmd *cmd = NULL;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004597 struct packet_info pi;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004598 int rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004599 void *buf = h;
4600 int received = 0;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004601 int expect = sizeof(struct p_header);
Lars Ellenbergf36af182011-03-09 22:44:55 +01004602 int ping_timeout_active = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004603
Philipp Reisnerb411b362009-09-25 16:07:19 -07004604 current->policy = SCHED_RR; /* Make this a realtime task! */
4605 current->rt_priority = 2; /* more important than all other tasks */
4606
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +01004607 while (get_t_state(thi) == RUNNING) {
Philipp Reisner80822282011-02-08 12:46:30 +01004608 drbd_thread_current_set_cpu(thi);
Philipp Reisner32862ec2011-02-08 16:41:01 +01004609 if (test_and_clear_bit(SEND_PING, &tconn->flags)) {
4610 if (!drbd_send_ping(tconn->volume0)) {
4611 conn_err(tconn, "drbd_send_ping has failed\n");
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01004612 goto reconnect;
4613 }
Philipp Reisner32862ec2011-02-08 16:41:01 +01004614 tconn->meta.socket->sk->sk_rcvtimeo =
4615 tconn->net_conf->ping_timeo*HZ/10;
Lars Ellenbergf36af182011-03-09 22:44:55 +01004616 ping_timeout_active = 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004617 }
4618
Philipp Reisner32862ec2011-02-08 16:41:01 +01004619 /* TODO: conditionally cork; it may hurt latency if we cork without
4620 much to send */
4621 if (!tconn->net_conf->no_cork)
4622 drbd_tcp_cork(tconn->meta.socket);
4623 if (tconn_process_done_ee(tconn))
4624 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004625 /* but unconditionally uncork unless disabled */
Philipp Reisner32862ec2011-02-08 16:41:01 +01004626 if (!tconn->net_conf->no_cork)
4627 drbd_tcp_uncork(tconn->meta.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004628
4629 /* short circuit, recv_msg would return EINTR anyways. */
4630 if (signal_pending(current))
4631 continue;
4632
Philipp Reisner32862ec2011-02-08 16:41:01 +01004633 rv = drbd_recv_short(tconn->meta.socket, buf, expect-received, 0);
4634 clear_bit(SIGNAL_ASENDER, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004635
4636 flush_signals(current);
4637
4638 /* Note:
4639 * -EINTR (on meta) we got a signal
4640 * -EAGAIN (on meta) rcvtimeo expired
4641 * -ECONNRESET other side closed the connection
4642 * -ERESTARTSYS (on data) we got a signal
4643 * rv < 0 other than above: unexpected error!
4644 * rv == expected: full header or command
4645 * rv < expected: "woken" by signal during receive
4646 * rv == 0 : "connection shut down by peer"
4647 */
4648 if (likely(rv > 0)) {
4649 received += rv;
4650 buf += rv;
4651 } else if (rv == 0) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004652 conn_err(tconn, "meta connection shut down by peer.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004653 goto reconnect;
4654 } else if (rv == -EAGAIN) {
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02004655 /* If the data socket received something meanwhile,
4656 * that is good enough: peer is still alive. */
Philipp Reisner32862ec2011-02-08 16:41:01 +01004657 if (time_after(tconn->last_received,
4658 jiffies - tconn->meta.socket->sk->sk_rcvtimeo))
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02004659 continue;
Lars Ellenbergf36af182011-03-09 22:44:55 +01004660 if (ping_timeout_active) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004661 conn_err(tconn, "PingAck did not arrive in time.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004662 goto reconnect;
4663 }
Philipp Reisner32862ec2011-02-08 16:41:01 +01004664 set_bit(SEND_PING, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004665 continue;
4666 } else if (rv == -EINTR) {
4667 continue;
4668 } else {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004669 conn_err(tconn, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004670 goto reconnect;
4671 }
4672
4673 if (received == expect && cmd == NULL) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004674 if (!decode_header(tconn, h, &pi))
Philipp Reisnerb411b362009-09-25 16:07:19 -07004675 goto reconnect;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004676 cmd = get_asender_cmd(pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004677 if (unlikely(cmd == NULL)) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004678 conn_err(tconn, "unknown command %d on meta (l: %d)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004679 pi.cmd, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004680 goto disconnect;
4681 }
4682 expect = cmd->pkt_size;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004683 if (pi.size != expect - sizeof(struct p_header)) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004684 conn_err(tconn, "Wrong packet size on meta (c: %d, l: %d)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004685 pi.cmd, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004686 goto reconnect;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004687 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004688 }
4689 if (received == expect) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01004690 tconn->last_received = jiffies;
4691 if (!cmd->process(vnr_to_mdev(tconn, pi.vnr), pi.cmd))
Philipp Reisnerb411b362009-09-25 16:07:19 -07004692 goto reconnect;
4693
Lars Ellenbergf36af182011-03-09 22:44:55 +01004694 /* the idle_timeout (ping-int)
4695 * has been restored in got_PingAck() */
4696 if (cmd == get_asender_cmd(P_PING_ACK))
4697 ping_timeout_active = 0;
4698
Philipp Reisnerb411b362009-09-25 16:07:19 -07004699 buf = h;
4700 received = 0;
Philipp Reisner257d0af2011-01-26 12:15:29 +01004701 expect = sizeof(struct p_header);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004702 cmd = NULL;
4703 }
4704 }
4705
4706 if (0) {
4707reconnect:
Philipp Reisner32862ec2011-02-08 16:41:01 +01004708 drbd_force_state(tconn->volume0, NS(conn, C_NETWORK_FAILURE));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004709 }
4710 if (0) {
4711disconnect:
Philipp Reisner32862ec2011-02-08 16:41:01 +01004712 drbd_force_state(tconn->volume0, NS(conn, C_DISCONNECTING));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004713 }
Philipp Reisner32862ec2011-02-08 16:41:01 +01004714 clear_bit(SIGNAL_ASENDER, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004715
Philipp Reisner32862ec2011-02-08 16:41:01 +01004716 conn_info(tconn, "asender terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004717
4718 return 0;
4719}