blob: bd515e7fedeb8aa54a25a29bdf6fd94145cdc734 [file] [log] [blame]
Philipp Reisnerb411b362009-09-25 16:07:19 -07001/*
2 drbd_receiver.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23 */
24
25
Philipp Reisnerb411b362009-09-25 16:07:19 -070026#include <linux/module.h>
27
28#include <asm/uaccess.h>
29#include <net/sock.h>
30
Philipp Reisnerb411b362009-09-25 16:07:19 -070031#include <linux/drbd.h>
32#include <linux/fs.h>
33#include <linux/file.h>
34#include <linux/in.h>
35#include <linux/mm.h>
36#include <linux/memcontrol.h>
37#include <linux/mm_inline.h>
38#include <linux/slab.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070039#include <linux/pkt_sched.h>
40#define __KERNEL_SYSCALLS__
41#include <linux/unistd.h>
42#include <linux/vmalloc.h>
43#include <linux/random.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070044#include <linux/string.h>
45#include <linux/scatterlist.h>
46#include "drbd_int.h"
Andreas Gruenbachera3603a62011-05-30 11:47:37 +020047#include "drbd_protocol.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070048#include "drbd_req.h"
49
50#include "drbd_vli.h"
51
Philipp Reisner77351055b2011-02-07 17:24:26 +010052struct packet_info {
53 enum drbd_packet cmd;
Andreas Gruenbachere2857212011-03-25 00:57:38 +010054 unsigned int size;
55 unsigned int vnr;
Andreas Gruenbachere6589832011-03-30 12:54:42 +020056 void *data;
Philipp Reisner77351055b2011-02-07 17:24:26 +010057};
58
Philipp Reisnerb411b362009-09-25 16:07:19 -070059enum finish_epoch {
60 FE_STILL_LIVE,
61 FE_DESTROYED,
62 FE_RECYCLED,
63};
64
Andreas Gruenbacher60381782011-03-28 17:05:50 +020065static int drbd_do_features(struct drbd_tconn *tconn);
Philipp Reisner13e60372011-02-08 09:54:40 +010066static int drbd_do_auth(struct drbd_tconn *tconn);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +020067static int drbd_disconnected(struct drbd_conf *mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -070068
Philipp Reisner1e9dd292011-11-10 15:14:53 +010069static enum finish_epoch drbd_may_finish_epoch(struct drbd_tconn *, struct drbd_epoch *, enum epoch_event);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +010070static int e_end_block(struct drbd_work *, int);
Philipp Reisnerb411b362009-09-25 16:07:19 -070071
Philipp Reisnerb411b362009-09-25 16:07:19 -070072
73#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
74
Lars Ellenberg45bb9122010-05-14 17:10:48 +020075/*
76 * some helper functions to deal with single linked page lists,
77 * page->private being our "next" pointer.
78 */
79
80/* If at least n pages are linked at head, get n pages off.
81 * Otherwise, don't modify head, and return NULL.
82 * Locking is the responsibility of the caller.
83 */
84static struct page *page_chain_del(struct page **head, int n)
85{
86 struct page *page;
87 struct page *tmp;
88
89 BUG_ON(!n);
90 BUG_ON(!head);
91
92 page = *head;
Philipp Reisner23ce4222010-05-20 13:35:31 +020093
94 if (!page)
95 return NULL;
96
Lars Ellenberg45bb9122010-05-14 17:10:48 +020097 while (page) {
98 tmp = page_chain_next(page);
99 if (--n == 0)
100 break; /* found sufficient pages */
101 if (tmp == NULL)
102 /* insufficient pages, don't use any of them. */
103 return NULL;
104 page = tmp;
105 }
106
107 /* add end of list marker for the returned list */
108 set_page_private(page, 0);
109 /* actual return value, and adjustment of head */
110 page = *head;
111 *head = tmp;
112 return page;
113}
114
115/* may be used outside of locks to find the tail of a (usually short)
116 * "private" page chain, before adding it back to a global chain head
117 * with page_chain_add() under a spinlock. */
118static struct page *page_chain_tail(struct page *page, int *len)
119{
120 struct page *tmp;
121 int i = 1;
122 while ((tmp = page_chain_next(page)))
123 ++i, page = tmp;
124 if (len)
125 *len = i;
126 return page;
127}
128
129static int page_chain_free(struct page *page)
130{
131 struct page *tmp;
132 int i = 0;
133 page_chain_for_each_safe(page, tmp) {
134 put_page(page);
135 ++i;
136 }
137 return i;
138}
139
140static void page_chain_add(struct page **head,
141 struct page *chain_first, struct page *chain_last)
142{
143#if 1
144 struct page *tmp;
145 tmp = page_chain_tail(chain_first, NULL);
146 BUG_ON(tmp != chain_last);
147#endif
148
149 /* add chain to head */
150 set_page_private(chain_last, (unsigned long)*head);
151 *head = chain_first;
152}
153
Andreas Gruenbacher18c2d522011-04-07 21:08:50 +0200154static struct page *__drbd_alloc_pages(struct drbd_conf *mdev,
155 unsigned int number)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700156{
157 struct page *page = NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200158 struct page *tmp = NULL;
Andreas Gruenbacher18c2d522011-04-07 21:08:50 +0200159 unsigned int i = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700160
161 /* Yes, testing drbd_pp_vacant outside the lock is racy.
162 * So what. It saves a spin_lock. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200163 if (drbd_pp_vacant >= number) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700164 spin_lock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200165 page = page_chain_del(&drbd_pp_pool, number);
166 if (page)
167 drbd_pp_vacant -= number;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700168 spin_unlock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200169 if (page)
170 return page;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700171 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200172
Philipp Reisnerb411b362009-09-25 16:07:19 -0700173 /* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD
174 * "criss-cross" setup, that might cause write-out on some other DRBD,
175 * which in turn might block on the other node at this very place. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200176 for (i = 0; i < number; i++) {
177 tmp = alloc_page(GFP_TRY);
178 if (!tmp)
179 break;
180 set_page_private(tmp, (unsigned long)page);
181 page = tmp;
182 }
183
184 if (i == number)
185 return page;
186
187 /* Not enough pages immediately available this time.
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200188 * No need to jump around here, drbd_alloc_pages will retry this
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200189 * function "soon". */
190 if (page) {
191 tmp = page_chain_tail(page, NULL);
192 spin_lock(&drbd_pp_lock);
193 page_chain_add(&drbd_pp_pool, page, tmp);
194 drbd_pp_vacant += i;
195 spin_unlock(&drbd_pp_lock);
196 }
197 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700198}
199
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200200static void reclaim_finished_net_peer_reqs(struct drbd_conf *mdev,
201 struct list_head *to_be_freed)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700202{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100203 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700204 struct list_head *le, *tle;
205
206 /* The EEs are always appended to the end of the list. Since
207 they are sent in order over the wire, they have to finish
208 in order. As soon as we see the first not finished we can
209 stop to examine the list... */
210
211 list_for_each_safe(le, tle, &mdev->net_ee) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100212 peer_req = list_entry(le, struct drbd_peer_request, w.list);
Andreas Gruenbacher045417f2011-04-07 21:34:24 +0200213 if (drbd_peer_req_has_active_page(peer_req))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700214 break;
215 list_move(le, to_be_freed);
216 }
217}
218
219static void drbd_kick_lo_and_reclaim_net(struct drbd_conf *mdev)
220{
221 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100222 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700223
Philipp Reisner87eeee42011-01-19 14:16:30 +0100224 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200225 reclaim_finished_net_peer_reqs(mdev, &reclaimed);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100226 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700227
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100228 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +0200229 drbd_free_net_peer_req(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700230}
231
232/**
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200233 * drbd_alloc_pages() - Returns @number pages, retries forever (or until signalled)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700234 * @mdev: DRBD device.
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200235 * @number: number of pages requested
236 * @retry: whether to retry, if not enough pages are available right now
Philipp Reisnerb411b362009-09-25 16:07:19 -0700237 *
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200238 * Tries to allocate number pages, first from our own page pool, then from
239 * the kernel, unless this allocation would exceed the max_buffers setting.
240 * Possibly retry until DRBD frees sufficient pages somewhere else.
241 *
242 * Returns a page chain linked via page->private.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700243 */
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200244struct page *drbd_alloc_pages(struct drbd_conf *mdev, unsigned int number,
245 bool retry)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700246{
247 struct page *page = NULL;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200248 struct net_conf *nc;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700249 DEFINE_WAIT(wait);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200250 int mxb;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700251
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200252 /* Yes, we may run up to @number over max_buffers. If we
253 * follow it strictly, the admin will get it wrong anyways. */
Philipp Reisner44ed1672011-04-19 17:10:19 +0200254 rcu_read_lock();
255 nc = rcu_dereference(mdev->tconn->net_conf);
256 mxb = nc ? nc->max_buffers : 1000000;
257 rcu_read_unlock();
258
259 if (atomic_read(&mdev->pp_in_use) < mxb)
Andreas Gruenbacher18c2d522011-04-07 21:08:50 +0200260 page = __drbd_alloc_pages(mdev, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700261
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200262 while (page == NULL) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700263 prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
264
265 drbd_kick_lo_and_reclaim_net(mdev);
266
Philipp Reisner44ed1672011-04-19 17:10:19 +0200267 if (atomic_read(&mdev->pp_in_use) < mxb) {
Andreas Gruenbacher18c2d522011-04-07 21:08:50 +0200268 page = __drbd_alloc_pages(mdev, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700269 if (page)
270 break;
271 }
272
273 if (!retry)
274 break;
275
276 if (signal_pending(current)) {
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200277 dev_warn(DEV, "drbd_alloc_pages interrupted!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700278 break;
279 }
280
281 schedule();
282 }
283 finish_wait(&drbd_pp_wait, &wait);
284
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200285 if (page)
286 atomic_add(number, &mdev->pp_in_use);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700287 return page;
288}
289
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200290/* Must not be used from irq, as that may deadlock: see drbd_alloc_pages.
Philipp Reisner87eeee42011-01-19 14:16:30 +0100291 * Is also used from inside an other spin_lock_irq(&mdev->tconn->req_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200292 * Either links the page chain back to the global pool,
293 * or returns all pages to the system. */
Andreas Gruenbacher5cc287e2011-04-07 21:02:59 +0200294static void drbd_free_pages(struct drbd_conf *mdev, struct page *page, int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700295{
Lars Ellenberg435f0742010-09-06 12:30:25 +0200296 atomic_t *a = is_net ? &mdev->pp_in_use_by_net : &mdev->pp_in_use;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700297 int i;
Lars Ellenberg435f0742010-09-06 12:30:25 +0200298
Lars Ellenberga73ff322012-06-25 19:15:38 +0200299 if (page == NULL)
300 return;
301
Philipp Reisner81a5d602011-02-22 19:53:16 -0500302 if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200303 i = page_chain_free(page);
304 else {
305 struct page *tmp;
306 tmp = page_chain_tail(page, &i);
307 spin_lock(&drbd_pp_lock);
308 page_chain_add(&drbd_pp_pool, page, tmp);
309 drbd_pp_vacant += i;
310 spin_unlock(&drbd_pp_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700311 }
Lars Ellenberg435f0742010-09-06 12:30:25 +0200312 i = atomic_sub_return(i, a);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200313 if (i < 0)
Lars Ellenberg435f0742010-09-06 12:30:25 +0200314 dev_warn(DEV, "ASSERTION FAILED: %s: %d < 0\n",
315 is_net ? "pp_in_use_by_net" : "pp_in_use", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700316 wake_up(&drbd_pp_wait);
317}
318
319/*
320You need to hold the req_lock:
321 _drbd_wait_ee_list_empty()
322
323You must not have the req_lock:
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +0200324 drbd_free_peer_req()
Andreas Gruenbacher0db55362011-04-06 16:09:15 +0200325 drbd_alloc_peer_req()
Andreas Gruenbacher7721f562011-04-06 17:14:02 +0200326 drbd_free_peer_reqs()
Philipp Reisnerb411b362009-09-25 16:07:19 -0700327 drbd_ee_fix_bhs()
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200328 drbd_finish_peer_reqs()
Philipp Reisnerb411b362009-09-25 16:07:19 -0700329 drbd_clear_done_ee()
330 drbd_wait_ee_list_empty()
331*/
332
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100333struct drbd_peer_request *
Andreas Gruenbacher0db55362011-04-06 16:09:15 +0200334drbd_alloc_peer_req(struct drbd_conf *mdev, u64 id, sector_t sector,
335 unsigned int data_size, gfp_t gfp_mask) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700336{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100337 struct drbd_peer_request *peer_req;
Lars Ellenberga73ff322012-06-25 19:15:38 +0200338 struct page *page = NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200339 unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700340
Andreas Gruenbacher0cf9d272010-12-07 10:43:29 +0100341 if (drbd_insert_fault(mdev, DRBD_FAULT_AL_EE))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700342 return NULL;
343
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100344 peer_req = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
345 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700346 if (!(gfp_mask & __GFP_NOWARN))
Andreas Gruenbacher0db55362011-04-06 16:09:15 +0200347 dev_err(DEV, "%s: allocation failed\n", __func__);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700348 return NULL;
349 }
350
Lars Ellenberga73ff322012-06-25 19:15:38 +0200351 if (data_size) {
Lars Ellenberg81a35372012-07-30 09:00:54 +0200352 page = drbd_alloc_pages(mdev, nr_pages, (gfp_mask & __GFP_WAIT));
Lars Ellenberga73ff322012-06-25 19:15:38 +0200353 if (!page)
354 goto fail;
355 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700356
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100357 drbd_clear_interval(&peer_req->i);
358 peer_req->i.size = data_size;
359 peer_req->i.sector = sector;
360 peer_req->i.local = false;
361 peer_req->i.waiting = false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700362
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100363 peer_req->epoch = NULL;
Philipp Reisnera21e9292011-02-08 15:08:49 +0100364 peer_req->w.mdev = mdev;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100365 peer_req->pages = page;
366 atomic_set(&peer_req->pending_bios, 0);
367 peer_req->flags = 0;
Andreas Gruenbacher9a8e7752011-01-11 14:04:09 +0100368 /*
369 * The block_id is opaque to the receiver. It is not endianness
370 * converted, and sent back to the sender unchanged.
371 */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100372 peer_req->block_id = id;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700373
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100374 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700375
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200376 fail:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100377 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700378 return NULL;
379}
380
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +0200381void __drbd_free_peer_req(struct drbd_conf *mdev, struct drbd_peer_request *peer_req,
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100382 int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700383{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100384 if (peer_req->flags & EE_HAS_DIGEST)
385 kfree(peer_req->digest);
Andreas Gruenbacher5cc287e2011-04-07 21:02:59 +0200386 drbd_free_pages(mdev, peer_req->pages, is_net);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100387 D_ASSERT(atomic_read(&peer_req->pending_bios) == 0);
388 D_ASSERT(drbd_interval_empty(&peer_req->i));
389 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700390}
391
Andreas Gruenbacher7721f562011-04-06 17:14:02 +0200392int drbd_free_peer_reqs(struct drbd_conf *mdev, struct list_head *list)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700393{
394 LIST_HEAD(work_list);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100395 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700396 int count = 0;
Lars Ellenberg435f0742010-09-06 12:30:25 +0200397 int is_net = list == &mdev->net_ee;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700398
Philipp Reisner87eeee42011-01-19 14:16:30 +0100399 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700400 list_splice_init(list, &work_list);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100401 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700402
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100403 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +0200404 __drbd_free_peer_req(mdev, peer_req, is_net);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700405 count++;
406 }
407 return count;
408}
409
Philipp Reisnerb411b362009-09-25 16:07:19 -0700410/*
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200411 * See also comments in _req_mod(,BARRIER_ACKED) and receive_Barrier.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700412 */
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200413static int drbd_finish_peer_reqs(struct drbd_conf *mdev)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700414{
415 LIST_HEAD(work_list);
416 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100417 struct drbd_peer_request *peer_req, *t;
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100418 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700419
Philipp Reisner87eeee42011-01-19 14:16:30 +0100420 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200421 reclaim_finished_net_peer_reqs(mdev, &reclaimed);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700422 list_splice_init(&mdev->done_ee, &work_list);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100423 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700424
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100425 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +0200426 drbd_free_net_peer_req(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700427
428 /* possible callbacks here:
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +0200429 * e_end_block, and e_end_resync_block, e_send_superseded.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700430 * all ignore the last argument.
431 */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100432 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100433 int err2;
434
Philipp Reisnerb411b362009-09-25 16:07:19 -0700435 /* list_del not necessary, next/prev members not touched */
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100436 err2 = peer_req->w.cb(&peer_req->w, !!err);
437 if (!err)
438 err = err2;
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +0200439 drbd_free_peer_req(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700440 }
441 wake_up(&mdev->ee_wait);
442
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100443 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700444}
445
Andreas Gruenbacherd4da1532011-04-07 00:06:56 +0200446static void _drbd_wait_ee_list_empty(struct drbd_conf *mdev,
447 struct list_head *head)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700448{
449 DEFINE_WAIT(wait);
450
451 /* avoids spin_lock/unlock
452 * and calling prepare_to_wait in the fast path */
453 while (!list_empty(head)) {
454 prepare_to_wait(&mdev->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100455 spin_unlock_irq(&mdev->tconn->req_lock);
Jens Axboe7eaceac2011-03-10 08:52:07 +0100456 io_schedule();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700457 finish_wait(&mdev->ee_wait, &wait);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100458 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700459 }
460}
461
Andreas Gruenbacherd4da1532011-04-07 00:06:56 +0200462static void drbd_wait_ee_list_empty(struct drbd_conf *mdev,
463 struct list_head *head)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700464{
Philipp Reisner87eeee42011-01-19 14:16:30 +0100465 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700466 _drbd_wait_ee_list_empty(mdev, head);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100467 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700468}
469
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100470static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700471{
472 mm_segment_t oldfs;
473 struct kvec iov = {
474 .iov_base = buf,
475 .iov_len = size,
476 };
477 struct msghdr msg = {
478 .msg_iovlen = 1,
479 .msg_iov = (struct iovec *)&iov,
480 .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
481 };
482 int rv;
483
484 oldfs = get_fs();
485 set_fs(KERNEL_DS);
486 rv = sock_recvmsg(sock, &msg, size, msg.msg_flags);
487 set_fs(oldfs);
488
489 return rv;
490}
491
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100492static int drbd_recv(struct drbd_tconn *tconn, void *buf, size_t size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700493{
Philipp Reisnerb411b362009-09-25 16:07:19 -0700494 int rv;
495
Philipp Reisner1393b592012-09-03 14:04:23 +0200496 rv = drbd_recv_short(tconn->data.socket, buf, size, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700497
Philipp Reisnerdbd08202012-08-17 16:55:47 +0200498 if (rv < 0) {
499 if (rv == -ECONNRESET)
Philipp Reisner155522d2012-08-08 21:19:09 +0200500 conn_info(tconn, "sock was reset by peer\n");
Philipp Reisnerdbd08202012-08-17 16:55:47 +0200501 else if (rv != -ERESTARTSYS)
Philipp Reisner155522d2012-08-08 21:19:09 +0200502 conn_err(tconn, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerdbd08202012-08-17 16:55:47 +0200503 } else if (rv == 0) {
Philipp Reisnerb66623e2012-08-08 21:19:09 +0200504 if (test_bit(DISCONNECT_SENT, &tconn->flags)) {
505 long t;
506 rcu_read_lock();
507 t = rcu_dereference(tconn->net_conf)->ping_timeo * HZ/10;
508 rcu_read_unlock();
509
510 t = wait_event_timeout(tconn->ping_wait, tconn->cstate < C_WF_REPORT_PARAMS, t);
511
Philipp Reisner599377a2012-08-17 14:50:22 +0200512 if (t)
513 goto out;
514 }
Philipp Reisnerb66623e2012-08-08 21:19:09 +0200515 conn_info(tconn, "sock was shut down by peer\n");
Philipp Reisner599377a2012-08-17 14:50:22 +0200516 }
517
Philipp Reisnerb411b362009-09-25 16:07:19 -0700518 if (rv != size)
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100519 conn_request_state(tconn, NS(conn, C_BROKEN_PIPE), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700520
Philipp Reisner599377a2012-08-17 14:50:22 +0200521out:
Philipp Reisnerb411b362009-09-25 16:07:19 -0700522 return rv;
523}
524
Andreas Gruenbacherc6967742011-03-17 17:15:20 +0100525static int drbd_recv_all(struct drbd_tconn *tconn, void *buf, size_t size)
526{
527 int err;
528
529 err = drbd_recv(tconn, buf, size);
530 if (err != size) {
531 if (err >= 0)
532 err = -EIO;
533 } else
534 err = 0;
535 return err;
536}
537
Andreas Gruenbachera5c31902011-03-24 03:28:04 +0100538static int drbd_recv_all_warn(struct drbd_tconn *tconn, void *buf, size_t size)
539{
540 int err;
541
542 err = drbd_recv_all(tconn, buf, size);
543 if (err && !signal_pending(current))
544 conn_warn(tconn, "short read (expected size %d)\n", (int)size);
545 return err;
546}
547
Lars Ellenberg5dbf1672010-05-25 16:18:01 +0200548/* quoting tcp(7):
549 * On individual connections, the socket buffer size must be set prior to the
550 * listen(2) or connect(2) calls in order to have it take effect.
551 * This is our wrapper to do so.
552 */
553static void drbd_setbufsize(struct socket *sock, unsigned int snd,
554 unsigned int rcv)
555{
556 /* open coded SO_SNDBUF, SO_RCVBUF */
557 if (snd) {
558 sock->sk->sk_sndbuf = snd;
559 sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
560 }
561 if (rcv) {
562 sock->sk->sk_rcvbuf = rcv;
563 sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
564 }
565}
566
Philipp Reisnereac3e992011-02-07 14:05:07 +0100567static struct socket *drbd_try_connect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700568{
569 const char *what;
570 struct socket *sock;
571 struct sockaddr_in6 src_in6;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200572 struct sockaddr_in6 peer_in6;
573 struct net_conf *nc;
574 int err, peer_addr_len, my_addr_len;
Andreas Gruenbacher69ef82d2011-05-11 14:34:35 +0200575 int sndbuf_size, rcvbuf_size, connect_int;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700576 int disconnect_on_error = 1;
577
Philipp Reisner44ed1672011-04-19 17:10:19 +0200578 rcu_read_lock();
579 nc = rcu_dereference(tconn->net_conf);
580 if (!nc) {
581 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700582 return NULL;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200583 }
Philipp Reisner44ed1672011-04-19 17:10:19 +0200584 sndbuf_size = nc->sndbuf_size;
585 rcvbuf_size = nc->rcvbuf_size;
Andreas Gruenbacher69ef82d2011-05-11 14:34:35 +0200586 connect_int = nc->connect_int;
Andreas Gruenbacher089c0752011-06-14 18:28:09 +0200587 rcu_read_unlock();
Philipp Reisner44ed1672011-04-19 17:10:19 +0200588
Andreas Gruenbacher089c0752011-06-14 18:28:09 +0200589 my_addr_len = min_t(int, tconn->my_addr_len, sizeof(src_in6));
590 memcpy(&src_in6, &tconn->my_addr, my_addr_len);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200591
Andreas Gruenbacher089c0752011-06-14 18:28:09 +0200592 if (((struct sockaddr *)&tconn->my_addr)->sa_family == AF_INET6)
Philipp Reisner44ed1672011-04-19 17:10:19 +0200593 src_in6.sin6_port = 0;
594 else
595 ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
596
Andreas Gruenbacher089c0752011-06-14 18:28:09 +0200597 peer_addr_len = min_t(int, tconn->peer_addr_len, sizeof(src_in6));
598 memcpy(&peer_in6, &tconn->peer_addr, peer_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700599
600 what = "sock_create_kern";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200601 err = sock_create_kern(((struct sockaddr *)&src_in6)->sa_family,
602 SOCK_STREAM, IPPROTO_TCP, &sock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700603 if (err < 0) {
604 sock = NULL;
605 goto out;
606 }
607
608 sock->sk->sk_rcvtimeo =
Andreas Gruenbacher69ef82d2011-05-11 14:34:35 +0200609 sock->sk->sk_sndtimeo = connect_int * HZ;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200610 drbd_setbufsize(sock, sndbuf_size, rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700611
612 /* explicitly bind to the configured IP as source IP
613 * for the outgoing connections.
614 * This is needed for multihomed hosts and to be
615 * able to use lo: interfaces for drbd.
616 * Make sure to use 0 as port number, so linux selects
617 * a free one dynamically.
618 */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700619 what = "bind before connect";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200620 err = sock->ops->bind(sock, (struct sockaddr *) &src_in6, my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700621 if (err < 0)
622 goto out;
623
624 /* connect may fail, peer not yet available.
625 * stay C_WF_CONNECTION, don't go Disconnecting! */
626 disconnect_on_error = 0;
627 what = "connect";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200628 err = sock->ops->connect(sock, (struct sockaddr *) &peer_in6, peer_addr_len, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700629
630out:
631 if (err < 0) {
632 if (sock) {
633 sock_release(sock);
634 sock = NULL;
635 }
636 switch (-err) {
637 /* timeout, busy, signal pending */
638 case ETIMEDOUT: case EAGAIN: case EINPROGRESS:
639 case EINTR: case ERESTARTSYS:
640 /* peer not (yet) available, network problem */
641 case ECONNREFUSED: case ENETUNREACH:
642 case EHOSTDOWN: case EHOSTUNREACH:
643 disconnect_on_error = 0;
644 break;
645 default:
Philipp Reisnereac3e992011-02-07 14:05:07 +0100646 conn_err(tconn, "%s failed, err = %d\n", what, err);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700647 }
648 if (disconnect_on_error)
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100649 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700650 }
Philipp Reisner44ed1672011-04-19 17:10:19 +0200651
Philipp Reisnerb411b362009-09-25 16:07:19 -0700652 return sock;
653}
654
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200655struct accept_wait_data {
656 struct drbd_tconn *tconn;
657 struct socket *s_listen;
658 struct completion door_bell;
659 void (*original_sk_state_change)(struct sock *sk);
660
661};
662
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200663static void drbd_incoming_connection(struct sock *sk)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700664{
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200665 struct accept_wait_data *ad = sk->sk_user_data;
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200666 void (*state_change)(struct sock *sk);
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200667
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200668 state_change = ad->original_sk_state_change;
669 if (sk->sk_state == TCP_ESTABLISHED)
670 complete(&ad->door_bell);
671 state_change(sk);
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200672}
673
674static int prepare_listen_socket(struct drbd_tconn *tconn, struct accept_wait_data *ad)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700675{
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200676 int err, sndbuf_size, rcvbuf_size, my_addr_len;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200677 struct sockaddr_in6 my_addr;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200678 struct socket *s_listen;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200679 struct net_conf *nc;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700680 const char *what;
681
Philipp Reisner44ed1672011-04-19 17:10:19 +0200682 rcu_read_lock();
683 nc = rcu_dereference(tconn->net_conf);
684 if (!nc) {
685 rcu_read_unlock();
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200686 return -EIO;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200687 }
Philipp Reisner44ed1672011-04-19 17:10:19 +0200688 sndbuf_size = nc->sndbuf_size;
689 rcvbuf_size = nc->rcvbuf_size;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200690 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700691
Andreas Gruenbacher089c0752011-06-14 18:28:09 +0200692 my_addr_len = min_t(int, tconn->my_addr_len, sizeof(struct sockaddr_in6));
693 memcpy(&my_addr, &tconn->my_addr, my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700694
695 what = "sock_create_kern";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200696 err = sock_create_kern(((struct sockaddr *)&my_addr)->sa_family,
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200697 SOCK_STREAM, IPPROTO_TCP, &s_listen);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700698 if (err) {
699 s_listen = NULL;
700 goto out;
701 }
702
Philipp Reisner98683652012-11-09 14:18:43 +0100703 s_listen->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
Philipp Reisner44ed1672011-04-19 17:10:19 +0200704 drbd_setbufsize(s_listen, sndbuf_size, rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700705
706 what = "bind before listen";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200707 err = s_listen->ops->bind(s_listen, (struct sockaddr *)&my_addr, my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700708 if (err < 0)
709 goto out;
710
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200711 ad->s_listen = s_listen;
712 write_lock_bh(&s_listen->sk->sk_callback_lock);
713 ad->original_sk_state_change = s_listen->sk->sk_state_change;
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200714 s_listen->sk->sk_state_change = drbd_incoming_connection;
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200715 s_listen->sk->sk_user_data = ad;
716 write_unlock_bh(&s_listen->sk->sk_callback_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700717
Philipp Reisner2820fd32012-07-12 10:22:48 +0200718 what = "listen";
719 err = s_listen->ops->listen(s_listen, 5);
720 if (err < 0)
721 goto out;
722
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200723 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700724out:
725 if (s_listen)
726 sock_release(s_listen);
727 if (err < 0) {
728 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200729 conn_err(tconn, "%s failed, err = %d\n", what, err);
730 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700731 }
732 }
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200733
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200734 return -EIO;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200735}
736
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200737static void unregister_state_change(struct sock *sk, struct accept_wait_data *ad)
738{
739 write_lock_bh(&sk->sk_callback_lock);
740 sk->sk_state_change = ad->original_sk_state_change;
741 sk->sk_user_data = NULL;
742 write_unlock_bh(&sk->sk_callback_lock);
743}
744
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200745static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn, struct accept_wait_data *ad)
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200746{
747 int timeo, connect_int, err = 0;
748 struct socket *s_estab = NULL;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200749 struct net_conf *nc;
750
751 rcu_read_lock();
752 nc = rcu_dereference(tconn->net_conf);
753 if (!nc) {
754 rcu_read_unlock();
755 return NULL;
756 }
757 connect_int = nc->connect_int;
758 rcu_read_unlock();
759
760 timeo = connect_int * HZ;
Akinobu Mita38b682b22013-04-29 16:21:31 -0700761 /* 28.5% random jitter */
762 timeo += (prandom_u32() & 1) ? timeo / 7 : -timeo / 7;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200763
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200764 err = wait_for_completion_interruptible_timeout(&ad->door_bell, timeo);
765 if (err <= 0)
766 return NULL;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200767
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200768 err = kernel_accept(ad->s_listen, &s_estab, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700769 if (err < 0) {
770 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200771 conn_err(tconn, "accept failed, err = %d\n", err);
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100772 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700773 }
774 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700775
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200776 if (s_estab)
777 unregister_state_change(s_estab->sk, ad);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700778
779 return s_estab;
780}
781
Andreas Gruenbachere6589832011-03-30 12:54:42 +0200782static int decode_header(struct drbd_tconn *, void *, struct packet_info *);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700783
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200784static int send_first_packet(struct drbd_tconn *tconn, struct drbd_socket *sock,
785 enum drbd_packet cmd)
786{
787 if (!conn_prepare_command(tconn, sock))
788 return -EIO;
Andreas Gruenbachere6589832011-03-30 12:54:42 +0200789 return conn_send_command(tconn, sock, cmd, 0, NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700790}
791
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200792static int receive_first_packet(struct drbd_tconn *tconn, struct socket *sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700793{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200794 unsigned int header_size = drbd_header_size(tconn);
795 struct packet_info pi;
796 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700797
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200798 err = drbd_recv_short(sock, tconn->data.rbuf, header_size, 0);
799 if (err != header_size) {
800 if (err >= 0)
801 err = -EIO;
802 return err;
803 }
804 err = decode_header(tconn, tconn->data.rbuf, &pi);
805 if (err)
806 return err;
807 return pi.cmd;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700808}
809
810/**
811 * drbd_socket_okay() - Free the socket if its connection is not okay
Philipp Reisnerb411b362009-09-25 16:07:19 -0700812 * @sock: pointer to the pointer to the socket.
813 */
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100814static int drbd_socket_okay(struct socket **sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700815{
816 int rr;
817 char tb[4];
818
819 if (!*sock)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100820 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700821
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100822 rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700823
824 if (rr > 0 || rr == -EAGAIN) {
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100825 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700826 } else {
827 sock_release(*sock);
828 *sock = NULL;
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100829 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700830 }
831}
Philipp Reisner2325eb62011-03-15 16:56:18 +0100832/* Gets called if a connection is established, or if a new minor gets created
833 in a connection */
Philipp Reisnerc141ebd2011-05-05 16:13:10 +0200834int drbd_connected(struct drbd_conf *mdev)
Philipp Reisner907599e2011-02-08 11:25:37 +0100835{
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100836 int err;
Philipp Reisner907599e2011-02-08 11:25:37 +0100837
838 atomic_set(&mdev->packet_seq, 0);
839 mdev->peer_seq = 0;
840
Philipp Reisner8410da82011-02-11 20:11:10 +0100841 mdev->state_mutex = mdev->tconn->agreed_pro_version < 100 ?
842 &mdev->tconn->cstate_mutex :
843 &mdev->own_state_mutex;
844
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100845 err = drbd_send_sync_param(mdev);
846 if (!err)
847 err = drbd_send_sizes(mdev, 0, 0);
848 if (!err)
849 err = drbd_send_uuids(mdev);
850 if (!err)
Philipp Reisner43de7c82011-11-10 13:16:13 +0100851 err = drbd_send_current_state(mdev);
Philipp Reisner907599e2011-02-08 11:25:37 +0100852 clear_bit(USE_DEGR_WFC_T, &mdev->flags);
853 clear_bit(RESIZE_PENDING, &mdev->flags);
Philipp Reisner2d56a972013-03-27 14:08:34 +0100854 atomic_set(&mdev->ap_in_flight, 0);
Philipp Reisner8b924f12011-03-01 11:08:28 +0100855 mod_timer(&mdev->request_timer, jiffies + HZ); /* just start it here. */
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100856 return err;
Philipp Reisner907599e2011-02-08 11:25:37 +0100857}
Philipp Reisnerb411b362009-09-25 16:07:19 -0700858
859/*
860 * return values:
861 * 1 yes, we have a valid connection
862 * 0 oops, did not work out, please try again
863 * -1 peer talks different language,
864 * no point in trying again, please go standalone.
865 * -2 We do not have a network config...
866 */
Philipp Reisner81fa2e62011-05-04 15:10:30 +0200867static int conn_connect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700868{
Philipp Reisner7da35862011-12-19 22:42:56 +0100869 struct drbd_socket sock, msock;
Philipp Reisnerc141ebd2011-05-05 16:13:10 +0200870 struct drbd_conf *mdev;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200871 struct net_conf *nc;
Philipp Reisner92f14952012-08-01 11:41:01 +0200872 int vnr, timeout, h, ok;
Philipp Reisner08b165b2011-09-05 16:22:33 +0200873 bool discard_my_data;
Philipp Reisner197296f2012-03-26 16:47:11 +0200874 enum drbd_state_rv rv;
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200875 struct accept_wait_data ad = {
876 .tconn = tconn,
877 .door_bell = COMPLETION_INITIALIZER_ONSTACK(ad.door_bell),
878 };
Philipp Reisnerb411b362009-09-25 16:07:19 -0700879
Philipp Reisnerb66623e2012-08-08 21:19:09 +0200880 clear_bit(DISCONNECT_SENT, &tconn->flags);
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100881 if (conn_request_state(tconn, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700882 return -2;
883
Philipp Reisner7da35862011-12-19 22:42:56 +0100884 mutex_init(&sock.mutex);
885 sock.sbuf = tconn->data.sbuf;
886 sock.rbuf = tconn->data.rbuf;
887 sock.socket = NULL;
888 mutex_init(&msock.mutex);
889 msock.sbuf = tconn->meta.sbuf;
890 msock.rbuf = tconn->meta.rbuf;
891 msock.socket = NULL;
892
Andreas Gruenbacher0916e0e2011-03-21 14:10:15 +0100893 /* Assume that the peer only understands protocol 80 until we know better. */
894 tconn->agreed_pro_version = 80;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700895
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200896 if (prepare_listen_socket(tconn, &ad))
897 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700898
899 do {
Andreas Gruenbacher2bf89622011-03-28 16:33:12 +0200900 struct socket *s;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700901
Philipp Reisner92f14952012-08-01 11:41:01 +0200902 s = drbd_try_connect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700903 if (s) {
Philipp Reisner7da35862011-12-19 22:42:56 +0100904 if (!sock.socket) {
905 sock.socket = s;
906 send_first_packet(tconn, &sock, P_INITIAL_DATA);
907 } else if (!msock.socket) {
Lars Ellenberg427c0432012-08-01 12:43:01 +0200908 clear_bit(RESOLVE_CONFLICTS, &tconn->flags);
Philipp Reisner7da35862011-12-19 22:42:56 +0100909 msock.socket = s;
910 send_first_packet(tconn, &msock, P_INITIAL_META);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700911 } else {
Philipp Reisner81fa2e62011-05-04 15:10:30 +0200912 conn_err(tconn, "Logic error in conn_connect()\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700913 goto out_release_sockets;
914 }
915 }
916
Philipp Reisner7da35862011-12-19 22:42:56 +0100917 if (sock.socket && msock.socket) {
918 rcu_read_lock();
919 nc = rcu_dereference(tconn->net_conf);
920 timeout = nc->ping_timeo * HZ / 10;
921 rcu_read_unlock();
922 schedule_timeout_interruptible(timeout);
923 ok = drbd_socket_okay(&sock.socket);
924 ok = drbd_socket_okay(&msock.socket) && ok;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700925 if (ok)
926 break;
927 }
928
929retry:
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200930 s = drbd_wait_for_connect(tconn, &ad);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700931 if (s) {
Philipp Reisner92f14952012-08-01 11:41:01 +0200932 int fp = receive_first_packet(tconn, s);
Philipp Reisner7da35862011-12-19 22:42:56 +0100933 drbd_socket_okay(&sock.socket);
934 drbd_socket_okay(&msock.socket);
Philipp Reisner92f14952012-08-01 11:41:01 +0200935 switch (fp) {
Andreas Gruenbachere5d6f332011-03-28 16:44:40 +0200936 case P_INITIAL_DATA:
Philipp Reisner7da35862011-12-19 22:42:56 +0100937 if (sock.socket) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100938 conn_warn(tconn, "initial packet S crossed\n");
Philipp Reisner7da35862011-12-19 22:42:56 +0100939 sock_release(sock.socket);
Philipp Reisner80c6eed2012-08-01 14:53:39 +0200940 sock.socket = s;
941 goto randomize;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700942 }
Philipp Reisner7da35862011-12-19 22:42:56 +0100943 sock.socket = s;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700944 break;
Andreas Gruenbachere5d6f332011-03-28 16:44:40 +0200945 case P_INITIAL_META:
Lars Ellenberg427c0432012-08-01 12:43:01 +0200946 set_bit(RESOLVE_CONFLICTS, &tconn->flags);
Philipp Reisner7da35862011-12-19 22:42:56 +0100947 if (msock.socket) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100948 conn_warn(tconn, "initial packet M crossed\n");
Philipp Reisner7da35862011-12-19 22:42:56 +0100949 sock_release(msock.socket);
Philipp Reisner80c6eed2012-08-01 14:53:39 +0200950 msock.socket = s;
951 goto randomize;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700952 }
Philipp Reisner7da35862011-12-19 22:42:56 +0100953 msock.socket = s;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700954 break;
955 default:
Philipp Reisner907599e2011-02-08 11:25:37 +0100956 conn_warn(tconn, "Error receiving initial packet\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700957 sock_release(s);
Philipp Reisner80c6eed2012-08-01 14:53:39 +0200958randomize:
Akinobu Mita38b682b22013-04-29 16:21:31 -0700959 if (prandom_u32() & 1)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700960 goto retry;
961 }
962 }
963
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100964 if (tconn->cstate <= C_DISCONNECTING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700965 goto out_release_sockets;
966 if (signal_pending(current)) {
967 flush_signals(current);
968 smp_rmb();
Philipp Reisner907599e2011-02-08 11:25:37 +0100969 if (get_t_state(&tconn->receiver) == EXITING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700970 goto out_release_sockets;
971 }
972
Philipp Reisnerb666dbf2012-07-26 14:12:59 +0200973 ok = drbd_socket_okay(&sock.socket);
974 ok = drbd_socket_okay(&msock.socket) && ok;
975 } while (!ok);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700976
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200977 if (ad.s_listen)
978 sock_release(ad.s_listen);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700979
Philipp Reisner98683652012-11-09 14:18:43 +0100980 sock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
981 msock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700982
Philipp Reisner7da35862011-12-19 22:42:56 +0100983 sock.socket->sk->sk_allocation = GFP_NOIO;
984 msock.socket->sk->sk_allocation = GFP_NOIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700985
Philipp Reisner7da35862011-12-19 22:42:56 +0100986 sock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
987 msock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700988
Philipp Reisnerb411b362009-09-25 16:07:19 -0700989 /* NOT YET ...
Philipp Reisner7da35862011-12-19 22:42:56 +0100990 * sock.socket->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10;
991 * sock.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
Andreas Gruenbacher60381782011-03-28 17:05:50 +0200992 * first set it to the P_CONNECTION_FEATURES timeout,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700993 * which we set to 4x the configured ping_timeout. */
Philipp Reisner44ed1672011-04-19 17:10:19 +0200994 rcu_read_lock();
995 nc = rcu_dereference(tconn->net_conf);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700996
Philipp Reisner7da35862011-12-19 22:42:56 +0100997 sock.socket->sk->sk_sndtimeo =
998 sock.socket->sk->sk_rcvtimeo = nc->ping_timeo*4*HZ/10;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200999
Philipp Reisner7da35862011-12-19 22:42:56 +01001000 msock.socket->sk->sk_rcvtimeo = nc->ping_int*HZ;
Philipp Reisner44ed1672011-04-19 17:10:19 +02001001 timeout = nc->timeout * HZ / 10;
Philipp Reisner08b165b2011-09-05 16:22:33 +02001002 discard_my_data = nc->discard_my_data;
Philipp Reisner44ed1672011-04-19 17:10:19 +02001003 rcu_read_unlock();
1004
Philipp Reisner7da35862011-12-19 22:42:56 +01001005 msock.socket->sk->sk_sndtimeo = timeout;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001006
1007 /* we don't want delays.
Lucas De Marchi25985ed2011-03-30 22:57:33 -03001008 * we use TCP_CORK where appropriate, though */
Philipp Reisner7da35862011-12-19 22:42:56 +01001009 drbd_tcp_nodelay(sock.socket);
1010 drbd_tcp_nodelay(msock.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001011
Philipp Reisner7da35862011-12-19 22:42:56 +01001012 tconn->data.socket = sock.socket;
1013 tconn->meta.socket = msock.socket;
Philipp Reisner907599e2011-02-08 11:25:37 +01001014 tconn->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001015
Andreas Gruenbacher60381782011-03-28 17:05:50 +02001016 h = drbd_do_features(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001017 if (h <= 0)
1018 return h;
1019
Philipp Reisner907599e2011-02-08 11:25:37 +01001020 if (tconn->cram_hmac_tfm) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001021 /* drbd_request_state(mdev, NS(conn, WFAuth)); */
Philipp Reisner907599e2011-02-08 11:25:37 +01001022 switch (drbd_do_auth(tconn)) {
Johannes Thomab10d96c2010-01-07 16:02:50 +01001023 case -1:
Philipp Reisner907599e2011-02-08 11:25:37 +01001024 conn_err(tconn, "Authentication of peer failed\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001025 return -1;
Johannes Thomab10d96c2010-01-07 16:02:50 +01001026 case 0:
Philipp Reisner907599e2011-02-08 11:25:37 +01001027 conn_err(tconn, "Authentication of peer failed, trying again.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01001028 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001029 }
1030 }
1031
Philipp Reisner7da35862011-12-19 22:42:56 +01001032 tconn->data.socket->sk->sk_sndtimeo = timeout;
1033 tconn->data.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001034
Andreas Gruenbacher387eb302011-03-16 01:05:37 +01001035 if (drbd_send_protocol(tconn) == -EOPNOTSUPP)
Philipp Reisner7e2455c2010-04-22 14:50:23 +02001036 return -1;
Philipp Reisner1e86ac42011-08-04 10:33:08 +02001037
Philipp Reisnera1096a62012-04-06 12:07:34 +02001038 set_bit(STATE_SENT, &tconn->flags);
Philipp Reisner197296f2012-03-26 16:47:11 +02001039
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02001040 rcu_read_lock();
1041 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
1042 kref_get(&mdev->kref);
Andreas Gruenbacher26ea8f92013-06-25 16:50:03 +02001043 rcu_read_unlock();
1044
Philipp Reisner13c76ab2012-11-22 17:06:00 +01001045 /* Prevent a race between resync-handshake and
1046 * being promoted to Primary.
1047 *
1048 * Grab and release the state mutex, so we know that any current
1049 * drbd_set_role() is finished, and any incoming drbd_set_role
1050 * will see the STATE_SENT flag, and wait for it to be cleared.
1051 */
1052 mutex_lock(mdev->state_mutex);
1053 mutex_unlock(mdev->state_mutex);
1054
Philipp Reisner08b165b2011-09-05 16:22:33 +02001055 if (discard_my_data)
1056 set_bit(DISCARD_MY_DATA, &mdev->flags);
1057 else
1058 clear_bit(DISCARD_MY_DATA, &mdev->flags);
1059
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02001060 drbd_connected(mdev);
1061 kref_put(&mdev->kref, &drbd_minor_destroy);
1062 rcu_read_lock();
1063 }
1064 rcu_read_unlock();
1065
Philipp Reisnera1096a62012-04-06 12:07:34 +02001066 rv = conn_request_state(tconn, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE);
Lars Ellenberged635cb02012-11-05 11:54:30 +01001067 if (rv < SS_SUCCESS || tconn->cstate != C_WF_REPORT_PARAMS) {
Philipp Reisnera1096a62012-04-06 12:07:34 +02001068 clear_bit(STATE_SENT, &tconn->flags);
Philipp Reisner1e86ac42011-08-04 10:33:08 +02001069 return 0;
Philipp Reisnera1096a62012-04-06 12:07:34 +02001070 }
Philipp Reisner1e86ac42011-08-04 10:33:08 +02001071
Philipp Reisner823bd832012-11-08 15:04:36 +01001072 drbd_thread_start(&tconn->asender);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001073
Philipp Reisner08b165b2011-09-05 16:22:33 +02001074 mutex_lock(&tconn->conf_update);
1075 /* The discard_my_data flag is a single-shot modifier to the next
1076 * connection attempt, the handshake of which is now well underway.
1077 * No need for rcu style copying of the whole struct
1078 * just to clear a single value. */
1079 tconn->net_conf->discard_my_data = 0;
1080 mutex_unlock(&tconn->conf_update);
1081
Philipp Reisnerd3fcb492011-04-13 14:46:05 -07001082 return h;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001083
1084out_release_sockets:
Philipp Reisner7a426fd2012-07-12 14:22:37 +02001085 if (ad.s_listen)
1086 sock_release(ad.s_listen);
Philipp Reisner7da35862011-12-19 22:42:56 +01001087 if (sock.socket)
1088 sock_release(sock.socket);
1089 if (msock.socket)
1090 sock_release(msock.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001091 return -1;
1092}
1093
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001094static int decode_header(struct drbd_tconn *tconn, void *header, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001095{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001096 unsigned int header_size = drbd_header_size(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001097
Andreas Gruenbacher0c8e36d2011-03-30 16:00:17 +02001098 if (header_size == sizeof(struct p_header100) &&
1099 *(__be32 *)header == cpu_to_be32(DRBD_MAGIC_100)) {
1100 struct p_header100 *h = header;
1101 if (h->pad != 0) {
1102 conn_err(tconn, "Header padding is not zero\n");
1103 return -EINVAL;
1104 }
1105 pi->vnr = be16_to_cpu(h->volume);
1106 pi->cmd = be16_to_cpu(h->command);
1107 pi->size = be32_to_cpu(h->length);
1108 } else if (header_size == sizeof(struct p_header95) &&
1109 *(__be16 *)header == cpu_to_be16(DRBD_MAGIC_BIG)) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001110 struct p_header95 *h = header;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001111 pi->cmd = be16_to_cpu(h->command);
Andreas Gruenbacherb55d84b2011-03-22 13:17:47 +01001112 pi->size = be32_to_cpu(h->length);
1113 pi->vnr = 0;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001114 } else if (header_size == sizeof(struct p_header80) &&
1115 *(__be32 *)header == cpu_to_be32(DRBD_MAGIC)) {
1116 struct p_header80 *h = header;
1117 pi->cmd = be16_to_cpu(h->command);
1118 pi->size = be16_to_cpu(h->length);
Philipp Reisner77351055b2011-02-07 17:24:26 +01001119 pi->vnr = 0;
Philipp Reisner02918be2010-08-20 14:35:10 +02001120 } else {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001121 conn_err(tconn, "Wrong magic value 0x%08x in protocol version %d\n",
1122 be32_to_cpu(*(__be32 *)header),
1123 tconn->agreed_pro_version);
Andreas Gruenbacher8172f3e2011-03-16 17:22:39 +01001124 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001125 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001126 pi->data = header + header_size;
Andreas Gruenbacher8172f3e2011-03-16 17:22:39 +01001127 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001128}
1129
Philipp Reisner9ba7aa02011-02-07 17:32:41 +01001130static int drbd_recv_header(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisner257d0af2011-01-26 12:15:29 +01001131{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001132 void *buffer = tconn->data.rbuf;
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001133 int err;
Philipp Reisner257d0af2011-01-26 12:15:29 +01001134
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001135 err = drbd_recv_all_warn(tconn, buffer, drbd_header_size(tconn));
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001136 if (err)
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001137 return err;
Philipp Reisner257d0af2011-01-26 12:15:29 +01001138
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001139 err = decode_header(tconn, buffer, pi);
Philipp Reisner9ba7aa02011-02-07 17:32:41 +01001140 tconn->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001141
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001142 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001143}
1144
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001145static void drbd_flush(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001146{
1147 int rv;
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001148 struct drbd_conf *mdev;
1149 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001150
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001151 if (tconn->write_ordering >= WO_bdev_flush) {
Lars Ellenberg615e0872011-11-17 14:32:12 +01001152 rcu_read_lock();
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001153 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
Lars Ellenberg615e0872011-11-17 14:32:12 +01001154 if (!get_ldev(mdev))
1155 continue;
1156 kref_get(&mdev->kref);
1157 rcu_read_unlock();
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001158
Lars Ellenberg615e0872011-11-17 14:32:12 +01001159 rv = blkdev_issue_flush(mdev->ldev->backing_bdev,
1160 GFP_NOIO, NULL);
1161 if (rv) {
1162 dev_info(DEV, "local disk flush failed with status %d\n", rv);
1163 /* would rather check on EOPNOTSUPP, but that is not reliable.
1164 * don't try again for ANY return value != 0
1165 * if (rv == -EOPNOTSUPP) */
1166 drbd_bump_write_ordering(tconn, WO_drain_io);
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001167 }
Lars Ellenberg615e0872011-11-17 14:32:12 +01001168 put_ldev(mdev);
1169 kref_put(&mdev->kref, &drbd_minor_destroy);
1170
1171 rcu_read_lock();
1172 if (rv)
1173 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001174 }
Lars Ellenberg615e0872011-11-17 14:32:12 +01001175 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07001176 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001177}
1178
1179/**
1180 * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it.
1181 * @mdev: DRBD device.
1182 * @epoch: Epoch object.
1183 * @ev: Epoch event.
1184 */
Philipp Reisner1e9dd292011-11-10 15:14:53 +01001185static enum finish_epoch drbd_may_finish_epoch(struct drbd_tconn *tconn,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001186 struct drbd_epoch *epoch,
1187 enum epoch_event ev)
1188{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001189 int epoch_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001190 struct drbd_epoch *next_epoch;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001191 enum finish_epoch rv = FE_STILL_LIVE;
1192
Philipp Reisner12038a32011-11-09 19:18:00 +01001193 spin_lock(&tconn->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001194 do {
1195 next_epoch = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001196
1197 epoch_size = atomic_read(&epoch->epoch_size);
1198
1199 switch (ev & ~EV_CLEANUP) {
1200 case EV_PUT:
1201 atomic_dec(&epoch->active);
1202 break;
1203 case EV_GOT_BARRIER_NR:
1204 set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001205 break;
1206 case EV_BECAME_LAST:
1207 /* nothing to do*/
1208 break;
1209 }
1210
Philipp Reisnerb411b362009-09-25 16:07:19 -07001211 if (epoch_size != 0 &&
1212 atomic_read(&epoch->active) == 0 &&
Philipp Reisner80f9fd52011-07-18 15:45:15 +02001213 (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) || ev & EV_CLEANUP)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001214 if (!(ev & EV_CLEANUP)) {
Philipp Reisner12038a32011-11-09 19:18:00 +01001215 spin_unlock(&tconn->epoch_lock);
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001216 drbd_send_b_ack(epoch->tconn, epoch->barrier_nr, epoch_size);
Philipp Reisner12038a32011-11-09 19:18:00 +01001217 spin_lock(&tconn->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001218 }
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001219#if 0
1220 /* FIXME: dec unacked on connection, once we have
1221 * something to count pending connection packets in. */
Philipp Reisner80f9fd52011-07-18 15:45:15 +02001222 if (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags))
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001223 dec_unacked(epoch->tconn);
1224#endif
Philipp Reisnerb411b362009-09-25 16:07:19 -07001225
Philipp Reisner12038a32011-11-09 19:18:00 +01001226 if (tconn->current_epoch != epoch) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001227 next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
1228 list_del(&epoch->list);
1229 ev = EV_BECAME_LAST | (ev & EV_CLEANUP);
Philipp Reisner12038a32011-11-09 19:18:00 +01001230 tconn->epochs--;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001231 kfree(epoch);
1232
1233 if (rv == FE_STILL_LIVE)
1234 rv = FE_DESTROYED;
1235 } else {
1236 epoch->flags = 0;
1237 atomic_set(&epoch->epoch_size, 0);
Uwe Kleine-König698f9312010-07-02 20:41:51 +02001238 /* atomic_set(&epoch->active, 0); is already zero */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001239 if (rv == FE_STILL_LIVE)
1240 rv = FE_RECYCLED;
1241 }
1242 }
1243
1244 if (!next_epoch)
1245 break;
1246
1247 epoch = next_epoch;
1248 } while (1);
1249
Philipp Reisner12038a32011-11-09 19:18:00 +01001250 spin_unlock(&tconn->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001251
Philipp Reisnerb411b362009-09-25 16:07:19 -07001252 return rv;
1253}
1254
1255/**
1256 * drbd_bump_write_ordering() - Fall back to an other write ordering method
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001257 * @tconn: DRBD connection.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001258 * @wo: Write ordering method to try.
1259 */
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001260void drbd_bump_write_ordering(struct drbd_tconn *tconn, enum write_ordering_e wo)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001261{
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001262 struct disk_conf *dc;
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001263 struct drbd_conf *mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001264 enum write_ordering_e pwo;
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001265 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001266 static char *write_ordering_str[] = {
1267 [WO_none] = "none",
1268 [WO_drain_io] = "drain",
1269 [WO_bdev_flush] = "flush",
Philipp Reisnerb411b362009-09-25 16:07:19 -07001270 };
1271
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001272 pwo = tconn->write_ordering;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001273 wo = min(pwo, wo);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001274 rcu_read_lock();
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001275 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
Philipp Reisner27eb13e2012-03-30 14:12:15 +02001276 if (!get_ldev_if_state(mdev, D_ATTACHING))
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001277 continue;
1278 dc = rcu_dereference(mdev->ldev->disk_conf);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001279
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001280 if (wo == WO_bdev_flush && !dc->disk_flushes)
1281 wo = WO_drain_io;
1282 if (wo == WO_drain_io && !dc->disk_drain)
1283 wo = WO_none;
1284 put_ldev(mdev);
1285 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001286 rcu_read_unlock();
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001287 tconn->write_ordering = wo;
1288 if (pwo != tconn->write_ordering || wo == WO_bdev_flush)
1289 conn_info(tconn, "Method to ensure write ordering: %s\n", write_ordering_str[tconn->write_ordering]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001290}
1291
1292/**
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001293 * drbd_submit_peer_request()
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001294 * @mdev: DRBD device.
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001295 * @peer_req: peer request
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001296 * @rw: flag field, see bio->bi_rw
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001297 *
1298 * May spread the pages to multiple bios,
1299 * depending on bio_add_page restrictions.
1300 *
1301 * Returns 0 if all bios have been submitted,
1302 * -ENOMEM if we could not allocate enough bios,
1303 * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a
1304 * single page to an empty bio (which should never happen and likely indicates
1305 * that the lower level IO stack is in some way broken). This has been observed
1306 * on certain Xen deployments.
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001307 */
1308/* TODO allocate from our own bio_set. */
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001309int drbd_submit_peer_request(struct drbd_conf *mdev,
1310 struct drbd_peer_request *peer_req,
1311 const unsigned rw, const int fault_type)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001312{
1313 struct bio *bios = NULL;
1314 struct bio *bio;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001315 struct page *page = peer_req->pages;
1316 sector_t sector = peer_req->i.sector;
1317 unsigned ds = peer_req->i.size;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001318 unsigned n_bios = 0;
1319 unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT;
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001320 int err = -ENOMEM;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001321
1322 /* In most cases, we will only need one bio. But in case the lower
1323 * level restrictions happen to be different at this offset on this
1324 * side than those of the sending peer, we may need to submit the
Lars Ellenberg9476f392011-02-23 17:02:01 +01001325 * request in more than one bio.
1326 *
1327 * Plain bio_alloc is good enough here, this is no DRBD internally
1328 * generated bio, but a bio allocated on behalf of the peer.
1329 */
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001330next_bio:
1331 bio = bio_alloc(GFP_NOIO, nr_pages);
1332 if (!bio) {
1333 dev_err(DEV, "submit_ee: Allocation of a bio failed\n");
1334 goto fail;
1335 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001336 /* > peer_req->i.sector, unless this is the first bio */
Kent Overstreet4f024f32013-10-11 15:44:27 -07001337 bio->bi_iter.bi_sector = sector;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001338 bio->bi_bdev = mdev->ldev->backing_bdev;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001339 bio->bi_rw = rw;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001340 bio->bi_private = peer_req;
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001341 bio->bi_end_io = drbd_peer_request_endio;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001342
1343 bio->bi_next = bios;
1344 bios = bio;
1345 ++n_bios;
1346
1347 page_chain_for_each(page) {
1348 unsigned len = min_t(unsigned, ds, PAGE_SIZE);
1349 if (!bio_add_page(bio, page, len, 0)) {
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001350 /* A single page must always be possible!
1351 * But in case it fails anyways,
1352 * we deal with it, and complain (below). */
1353 if (bio->bi_vcnt == 0) {
1354 dev_err(DEV,
1355 "bio_add_page failed for len=%u, "
1356 "bi_vcnt=0 (bi_sector=%llu)\n",
Kent Overstreet4f024f32013-10-11 15:44:27 -07001357 len, (uint64_t)bio->bi_iter.bi_sector);
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001358 err = -ENOSPC;
1359 goto fail;
1360 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001361 goto next_bio;
1362 }
1363 ds -= len;
1364 sector += len >> 9;
1365 --nr_pages;
1366 }
1367 D_ASSERT(page == NULL);
1368 D_ASSERT(ds == 0);
1369
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001370 atomic_set(&peer_req->pending_bios, n_bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001371 do {
1372 bio = bios;
1373 bios = bios->bi_next;
1374 bio->bi_next = NULL;
1375
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001376 drbd_generic_make_request(mdev, fault_type, bio);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001377 } while (bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001378 return 0;
1379
1380fail:
1381 while (bios) {
1382 bio = bios;
1383 bios = bios->bi_next;
1384 bio_put(bio);
1385 }
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001386 return err;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001387}
1388
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001389static void drbd_remove_epoch_entry_interval(struct drbd_conf *mdev,
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001390 struct drbd_peer_request *peer_req)
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001391{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001392 struct drbd_interval *i = &peer_req->i;
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001393
1394 drbd_remove_interval(&mdev->write_requests, i);
1395 drbd_clear_interval(i);
1396
Andreas Gruenbacher6c852be2011-02-04 15:38:52 +01001397 /* Wake up any processes waiting for this peer request to complete. */
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001398 if (i->waiting)
1399 wake_up(&mdev->misc_wait);
1400}
1401
Rashika Kheriaf63e6312013-12-19 15:11:09 +05301402static void conn_wait_active_ee_empty(struct drbd_tconn *tconn)
Philipp Reisner77fede52011-11-10 21:19:11 +01001403{
1404 struct drbd_conf *mdev;
1405 int vnr;
1406
1407 rcu_read_lock();
1408 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
1409 kref_get(&mdev->kref);
1410 rcu_read_unlock();
1411 drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
1412 kref_put(&mdev->kref, &drbd_minor_destroy);
1413 rcu_read_lock();
1414 }
1415 rcu_read_unlock();
1416}
1417
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001418static int receive_Barrier(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001419{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001420 int rv;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001421 struct p_barrier *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001422 struct drbd_epoch *epoch;
1423
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001424 /* FIXME these are unacked on connection,
1425 * not a specific (peer)device.
1426 */
Philipp Reisner12038a32011-11-09 19:18:00 +01001427 tconn->current_epoch->barrier_nr = p->barrier;
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001428 tconn->current_epoch->tconn = tconn;
Philipp Reisner1e9dd292011-11-10 15:14:53 +01001429 rv = drbd_may_finish_epoch(tconn, tconn->current_epoch, EV_GOT_BARRIER_NR);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001430
1431 /* P_BARRIER_ACK may imply that the corresponding extent is dropped from
1432 * the activity log, which means it would not be resynced in case the
1433 * R_PRIMARY crashes now.
1434 * Therefore we must send the barrier_ack after the barrier request was
1435 * completed. */
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001436 switch (tconn->write_ordering) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001437 case WO_none:
1438 if (rv == FE_RECYCLED)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001439 return 0;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001440
1441 /* receiver context, in the writeout path of the other node.
1442 * avoid potential distributed deadlock */
1443 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1444 if (epoch)
1445 break;
1446 else
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001447 conn_warn(tconn, "Allocation of an epoch failed, slowing down\n");
Philipp Reisner2451fc32010-08-24 13:43:11 +02001448 /* Fall through */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001449
1450 case WO_bdev_flush:
1451 case WO_drain_io:
Philipp Reisner77fede52011-11-10 21:19:11 +01001452 conn_wait_active_ee_empty(tconn);
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001453 drbd_flush(tconn);
Philipp Reisner2451fc32010-08-24 13:43:11 +02001454
Philipp Reisner12038a32011-11-09 19:18:00 +01001455 if (atomic_read(&tconn->current_epoch->epoch_size)) {
Philipp Reisner2451fc32010-08-24 13:43:11 +02001456 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1457 if (epoch)
1458 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001459 }
1460
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001461 return 0;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001462 default:
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001463 conn_err(tconn, "Strangeness in tconn->write_ordering %d\n", tconn->write_ordering);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001464 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001465 }
1466
1467 epoch->flags = 0;
1468 atomic_set(&epoch->epoch_size, 0);
1469 atomic_set(&epoch->active, 0);
1470
Philipp Reisner12038a32011-11-09 19:18:00 +01001471 spin_lock(&tconn->epoch_lock);
1472 if (atomic_read(&tconn->current_epoch->epoch_size)) {
1473 list_add(&epoch->list, &tconn->current_epoch->list);
1474 tconn->current_epoch = epoch;
1475 tconn->epochs++;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001476 } else {
1477 /* The current_epoch got recycled while we allocated this one... */
1478 kfree(epoch);
1479 }
Philipp Reisner12038a32011-11-09 19:18:00 +01001480 spin_unlock(&tconn->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001481
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001482 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001483}
1484
1485/* used from receive_RSDataReply (recv_resync_read)
1486 * and from receive_Data */
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +01001487static struct drbd_peer_request *
1488read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector,
1489 int data_size) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001490{
Lars Ellenberg66660322010-04-06 12:15:04 +02001491 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001492 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001493 struct page *page;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001494 int dgs, ds, err;
Philipp Reisnera0638452011-01-19 14:31:32 +01001495 void *dig_in = mdev->tconn->int_dig_in;
1496 void *dig_vv = mdev->tconn->int_dig_vv;
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001497 unsigned long *data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001498
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001499 dgs = 0;
1500 if (mdev->tconn->peer_integrity_tfm) {
1501 dgs = crypto_hash_digestsize(mdev->tconn->peer_integrity_tfm);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001502 /*
1503 * FIXME: Receive the incoming digest into the receive buffer
1504 * here, together with its struct p_data?
1505 */
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001506 err = drbd_recv_all_warn(mdev->tconn, dig_in, dgs);
1507 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001508 return NULL;
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001509 data_size -= dgs;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001510 }
1511
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01001512 if (!expect(IS_ALIGNED(data_size, 512)))
1513 return NULL;
1514 if (!expect(data_size <= DRBD_MAX_BIO_SIZE))
1515 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001516
Lars Ellenberg66660322010-04-06 12:15:04 +02001517 /* even though we trust out peer,
1518 * we sometimes have to double check. */
1519 if (sector + (data_size>>9) > capacity) {
Lars Ellenbergfdda6542011-01-24 15:11:01 +01001520 dev_err(DEV, "request from peer beyond end of local disk: "
1521 "capacity: %llus < sector: %llus + size: %u\n",
Lars Ellenberg66660322010-04-06 12:15:04 +02001522 (unsigned long long)capacity,
1523 (unsigned long long)sector, data_size);
1524 return NULL;
1525 }
1526
Philipp Reisnerb411b362009-09-25 16:07:19 -07001527 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
1528 * "criss-cross" setup, that might cause write-out on some other DRBD,
1529 * which in turn might block on the other node at this very place. */
Andreas Gruenbacher0db55362011-04-06 16:09:15 +02001530 peer_req = drbd_alloc_peer_req(mdev, id, sector, data_size, GFP_NOIO);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001531 if (!peer_req)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001532 return NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001533
Lars Ellenberga73ff322012-06-25 19:15:38 +02001534 if (!data_size)
Lars Ellenberg81a35372012-07-30 09:00:54 +02001535 return peer_req;
Lars Ellenberga73ff322012-06-25 19:15:38 +02001536
Philipp Reisnerb411b362009-09-25 16:07:19 -07001537 ds = data_size;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001538 page = peer_req->pages;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001539 page_chain_for_each(page) {
1540 unsigned len = min_t(int, ds, PAGE_SIZE);
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001541 data = kmap(page);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001542 err = drbd_recv_all_warn(mdev->tconn, data, len);
Andreas Gruenbacher0cf9d272010-12-07 10:43:29 +01001543 if (drbd_insert_fault(mdev, DRBD_FAULT_RECEIVE)) {
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001544 dev_err(DEV, "Fault injection: Corrupting data on receive\n");
1545 data[0] = data[0] ^ (unsigned long)-1;
1546 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001547 kunmap(page);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001548 if (err) {
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +02001549 drbd_free_peer_req(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001550 return NULL;
1551 }
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001552 ds -= len;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001553 }
1554
1555 if (dgs) {
Andreas Gruenbacher5b614ab2011-04-27 21:00:12 +02001556 drbd_csum_ee(mdev, mdev->tconn->peer_integrity_tfm, peer_req, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001557 if (memcmp(dig_in, dig_vv, dgs)) {
Lars Ellenberg470be442010-11-10 10:36:52 +01001558 dev_err(DEV, "Digest integrity check FAILED: %llus +%u\n",
1559 (unsigned long long)sector, data_size);
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +02001560 drbd_free_peer_req(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001561 return NULL;
1562 }
1563 }
1564 mdev->recv_cnt += data_size>>9;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001565 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001566}
1567
1568/* drbd_drain_block() just takes a data block
1569 * out of the socket input buffer, and discards it.
1570 */
1571static int drbd_drain_block(struct drbd_conf *mdev, int data_size)
1572{
1573 struct page *page;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001574 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001575 void *data;
1576
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001577 if (!data_size)
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001578 return 0;
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001579
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +02001580 page = drbd_alloc_pages(mdev, 1, 1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001581
1582 data = kmap(page);
1583 while (data_size) {
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001584 unsigned int len = min_t(int, data_size, PAGE_SIZE);
1585
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001586 err = drbd_recv_all_warn(mdev->tconn, data, len);
1587 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001588 break;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001589 data_size -= len;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001590 }
1591 kunmap(page);
Andreas Gruenbacher5cc287e2011-04-07 21:02:59 +02001592 drbd_free_pages(mdev, page, 0);
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001593 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001594}
1595
1596static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req,
1597 sector_t sector, int data_size)
1598{
Kent Overstreet79886132013-11-23 17:19:00 -08001599 struct bio_vec bvec;
1600 struct bvec_iter iter;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001601 struct bio *bio;
Kent Overstreet79886132013-11-23 17:19:00 -08001602 int dgs, err, expect;
Philipp Reisnera0638452011-01-19 14:31:32 +01001603 void *dig_in = mdev->tconn->int_dig_in;
1604 void *dig_vv = mdev->tconn->int_dig_vv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001605
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001606 dgs = 0;
1607 if (mdev->tconn->peer_integrity_tfm) {
1608 dgs = crypto_hash_digestsize(mdev->tconn->peer_integrity_tfm);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001609 err = drbd_recv_all_warn(mdev->tconn, dig_in, dgs);
1610 if (err)
1611 return err;
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001612 data_size -= dgs;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001613 }
1614
Philipp Reisnerb411b362009-09-25 16:07:19 -07001615 /* optimistically update recv_cnt. if receiving fails below,
1616 * we disconnect anyways, and counters will be reset. */
1617 mdev->recv_cnt += data_size>>9;
1618
1619 bio = req->master_bio;
Kent Overstreet4f024f32013-10-11 15:44:27 -07001620 D_ASSERT(sector == bio->bi_iter.bi_sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001621
Kent Overstreet79886132013-11-23 17:19:00 -08001622 bio_for_each_segment(bvec, bio, iter) {
1623 void *mapped = kmap(bvec.bv_page) + bvec.bv_offset;
1624 expect = min_t(int, data_size, bvec.bv_len);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001625 err = drbd_recv_all_warn(mdev->tconn, mapped, expect);
Kent Overstreet79886132013-11-23 17:19:00 -08001626 kunmap(bvec.bv_page);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001627 if (err)
1628 return err;
1629 data_size -= expect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001630 }
1631
1632 if (dgs) {
Andreas Gruenbacher5b614ab2011-04-27 21:00:12 +02001633 drbd_csum_bio(mdev, mdev->tconn->peer_integrity_tfm, bio, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001634 if (memcmp(dig_in, dig_vv, dgs)) {
1635 dev_err(DEV, "Digest integrity check FAILED. Broken NICs?\n");
Andreas Gruenbacher28284ce2011-03-16 17:54:02 +01001636 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001637 }
1638 }
1639
1640 D_ASSERT(data_size == 0);
Andreas Gruenbacher28284ce2011-03-16 17:54:02 +01001641 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001642}
1643
Andreas Gruenbachera990be42011-04-06 17:56:48 +02001644/*
1645 * e_end_resync_block() is called in asender context via
1646 * drbd_finish_peer_reqs().
1647 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001648static int e_end_resync_block(struct drbd_work *w, int unused)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001649{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001650 struct drbd_peer_request *peer_req =
1651 container_of(w, struct drbd_peer_request, w);
Philipp Reisner00d56942011-02-09 18:09:48 +01001652 struct drbd_conf *mdev = w->mdev;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001653 sector_t sector = peer_req->i.sector;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001654 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001655
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001656 D_ASSERT(drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001657
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001658 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
1659 drbd_set_in_sync(mdev, sector, peer_req->i.size);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001660 err = drbd_send_ack(mdev, P_RS_WRITE_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001661 } else {
1662 /* Record failure to sync */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001663 drbd_rs_failed_io(mdev, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001664
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001665 err = drbd_send_ack(mdev, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001666 }
1667 dec_unacked(mdev);
1668
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001669 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001670}
1671
1672static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_size) __releases(local)
1673{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001674 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001675
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001676 peer_req = read_in_block(mdev, ID_SYNCER, sector, data_size);
1677 if (!peer_req)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001678 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001679
1680 dec_rs_pending(mdev);
1681
Philipp Reisnerb411b362009-09-25 16:07:19 -07001682 inc_unacked(mdev);
1683 /* corresponding dec_unacked() in e_end_resync_block()
1684 * respective _drbd_clear_done_ee */
1685
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001686 peer_req->w.cb = e_end_resync_block;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001687
Philipp Reisner87eeee42011-01-19 14:16:30 +01001688 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001689 list_add(&peer_req->w.list, &mdev->sync_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001690 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001691
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001692 atomic_add(data_size >> 9, &mdev->rs_sect_ev);
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001693 if (drbd_submit_peer_request(mdev, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0)
Andreas Gruenbachere1c1b0f2011-03-16 17:58:27 +01001694 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001695
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001696 /* don't care for the reason here */
1697 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01001698 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001699 list_del(&peer_req->w.list);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001700 spin_unlock_irq(&mdev->tconn->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02001701
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +02001702 drbd_free_peer_req(mdev, peer_req);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001703fail:
1704 put_ldev(mdev);
Andreas Gruenbachere1c1b0f2011-03-16 17:58:27 +01001705 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001706}
1707
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001708static struct drbd_request *
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001709find_request(struct drbd_conf *mdev, struct rb_root *root, u64 id,
1710 sector_t sector, bool missing_ok, const char *func)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001711{
1712 struct drbd_request *req;
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001713
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001714 /* Request object according to our peer */
1715 req = (struct drbd_request *)(unsigned long)id;
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001716 if (drbd_contains_interval(root, sector, &req->i) && req->i.local)
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001717 return req;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001718 if (!missing_ok) {
Andreas Gruenbacher5af172e2011-07-15 09:43:23 +02001719 dev_err(DEV, "%s: failed to find request 0x%lx, sector %llus\n", func,
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001720 (unsigned long)id, (unsigned long long)sector);
1721 }
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001722 return NULL;
1723}
1724
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001725static int receive_DataReply(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001726{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001727 struct drbd_conf *mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001728 struct drbd_request *req;
1729 sector_t sector;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001730 int err;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001731 struct p_data *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001732
1733 mdev = vnr_to_mdev(tconn, pi->vnr);
1734 if (!mdev)
1735 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001736
1737 sector = be64_to_cpu(p->sector);
1738
Philipp Reisner87eeee42011-01-19 14:16:30 +01001739 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001740 req = find_request(mdev, &mdev->read_requests, p->block_id, sector, false, __func__);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001741 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001742 if (unlikely(!req))
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001743 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001744
Bart Van Assche24c48302011-05-21 18:32:29 +02001745 /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid
Philipp Reisnerb411b362009-09-25 16:07:19 -07001746 * special casing it there for the various failure cases.
1747 * still no race with drbd_fail_pending_reads */
Andreas Gruenbachere2857212011-03-25 00:57:38 +01001748 err = recv_dless_read(mdev, req, sector, pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001749 if (!err)
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001750 req_mod(req, DATA_RECEIVED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001751 /* else: nothing. handled from drbd_disconnect...
1752 * I don't think we may complete this just yet
1753 * in case we are "on-disconnect: freeze" */
1754
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001755 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001756}
1757
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001758static int receive_RSDataReply(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001759{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001760 struct drbd_conf *mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001761 sector_t sector;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001762 int err;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001763 struct p_data *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001764
1765 mdev = vnr_to_mdev(tconn, pi->vnr);
1766 if (!mdev)
1767 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001768
1769 sector = be64_to_cpu(p->sector);
1770 D_ASSERT(p->block_id == ID_SYNCER);
1771
1772 if (get_ldev(mdev)) {
1773 /* data is submitted to disk within recv_resync_read.
1774 * corresponding put_ldev done below on error,
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001775 * or in drbd_peer_request_endio. */
Andreas Gruenbachere2857212011-03-25 00:57:38 +01001776 err = recv_resync_read(mdev, sector, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001777 } else {
1778 if (__ratelimit(&drbd_ratelimit_state))
1779 dev_err(DEV, "Can not write resync data to local disk.\n");
1780
Andreas Gruenbachere2857212011-03-25 00:57:38 +01001781 err = drbd_drain_block(mdev, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001782
Andreas Gruenbachere2857212011-03-25 00:57:38 +01001783 drbd_send_ack_dp(mdev, P_NEG_ACK, p, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001784 }
1785
Andreas Gruenbachere2857212011-03-25 00:57:38 +01001786 atomic_add(pi->size >> 9, &mdev->rs_sect_in);
Philipp Reisner778f2712010-07-06 11:14:00 +02001787
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001788 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001789}
1790
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001791static void restart_conflicting_writes(struct drbd_conf *mdev,
1792 sector_t sector, int size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001793{
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001794 struct drbd_interval *i;
1795 struct drbd_request *req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001796
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001797 drbd_for_each_overlap(i, &mdev->write_requests, sector, size) {
1798 if (!i->local)
1799 continue;
1800 req = container_of(i, struct drbd_request, i);
1801 if (req->rq_state & RQ_LOCAL_PENDING ||
1802 !(req->rq_state & RQ_POSTPONED))
1803 continue;
Lars Ellenberg2312f0b32011-11-24 10:36:25 +01001804 /* as it is RQ_POSTPONED, this will cause it to
1805 * be queued on the retry workqueue. */
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02001806 __req_mod(req, CONFLICT_RESOLVED, NULL);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001807 }
1808}
1809
Andreas Gruenbachera990be42011-04-06 17:56:48 +02001810/*
1811 * e_end_block() is called in asender context via drbd_finish_peer_reqs().
Philipp Reisnerb411b362009-09-25 16:07:19 -07001812 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001813static int e_end_block(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001814{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001815 struct drbd_peer_request *peer_req =
1816 container_of(w, struct drbd_peer_request, w);
Philipp Reisner00d56942011-02-09 18:09:48 +01001817 struct drbd_conf *mdev = w->mdev;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001818 sector_t sector = peer_req->i.sector;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001819 int err = 0, pcmd;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001820
Philipp Reisner303d1442011-04-13 16:24:47 -07001821 if (peer_req->flags & EE_SEND_WRITE_ACK) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001822 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001823 pcmd = (mdev->state.conn >= C_SYNC_SOURCE &&
1824 mdev->state.conn <= C_PAUSED_SYNC_T &&
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001825 peer_req->flags & EE_MAY_SET_IN_SYNC) ?
Philipp Reisnerb411b362009-09-25 16:07:19 -07001826 P_RS_WRITE_ACK : P_WRITE_ACK;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001827 err = drbd_send_ack(mdev, pcmd, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001828 if (pcmd == P_RS_WRITE_ACK)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001829 drbd_set_in_sync(mdev, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001830 } else {
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001831 err = drbd_send_ack(mdev, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001832 /* we expect it to be marked out of sync anyways...
1833 * maybe assert this? */
1834 }
1835 dec_unacked(mdev);
1836 }
1837 /* we delete from the conflict detection hash _after_ we sent out the
1838 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */
Philipp Reisner302bdea2011-04-21 11:36:49 +02001839 if (peer_req->flags & EE_IN_INTERVAL_TREE) {
Philipp Reisner87eeee42011-01-19 14:16:30 +01001840 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001841 D_ASSERT(!drbd_interval_empty(&peer_req->i));
1842 drbd_remove_epoch_entry_interval(mdev, peer_req);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001843 if (peer_req->flags & EE_RESTART_REQUESTS)
1844 restart_conflicting_writes(mdev, sector, peer_req->i.size);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001845 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +01001846 } else
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001847 D_ASSERT(drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001848
Philipp Reisner1e9dd292011-11-10 15:14:53 +01001849 drbd_may_finish_epoch(mdev->tconn, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001850
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001851 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001852}
1853
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001854static int e_send_ack(struct drbd_work *w, enum drbd_packet ack)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001855{
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001856 struct drbd_conf *mdev = w->mdev;
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001857 struct drbd_peer_request *peer_req =
1858 container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001859 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001860
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001861 err = drbd_send_ack(mdev, ack, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001862 dec_unacked(mdev);
1863
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001864 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001865}
1866
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02001867static int e_send_superseded(struct drbd_work *w, int unused)
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001868{
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02001869 return e_send_ack(w, P_SUPERSEDED);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001870}
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001871
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001872static int e_send_retry_write(struct drbd_work *w, int unused)
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001873{
1874 struct drbd_tconn *tconn = w->mdev->tconn;
1875
1876 return e_send_ack(w, tconn->agreed_pro_version >= 100 ?
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02001877 P_RETRY_WRITE : P_SUPERSEDED);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001878}
1879
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001880static bool seq_greater(u32 a, u32 b)
1881{
1882 /*
1883 * We assume 32-bit wrap-around here.
1884 * For 24-bit wrap-around, we would have to shift:
1885 * a <<= 8; b <<= 8;
1886 */
1887 return (s32)a - (s32)b > 0;
1888}
1889
1890static u32 seq_max(u32 a, u32 b)
1891{
1892 return seq_greater(a, b) ? a : b;
1893}
1894
Andreas Gruenbacher43ae0772011-02-03 18:42:08 +01001895static void update_peer_seq(struct drbd_conf *mdev, unsigned int peer_seq)
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001896{
Lars Ellenberg3c13b682011-02-23 16:10:01 +01001897 unsigned int newest_peer_seq;
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001898
Philipp Reisnerb874d232013-10-23 10:59:16 +02001899 if (test_bit(RESOLVE_CONFLICTS, &mdev->tconn->flags)) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001900 spin_lock(&mdev->peer_seq_lock);
Lars Ellenberg3c13b682011-02-23 16:10:01 +01001901 newest_peer_seq = seq_max(mdev->peer_seq, peer_seq);
1902 mdev->peer_seq = newest_peer_seq;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001903 spin_unlock(&mdev->peer_seq_lock);
Lars Ellenberg3c13b682011-02-23 16:10:01 +01001904 /* wake up only if we actually changed mdev->peer_seq */
1905 if (peer_seq == newest_peer_seq)
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001906 wake_up(&mdev->seq_wait);
1907 }
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001908}
1909
Lars Ellenbergd93f6302012-03-26 15:49:13 +02001910static inline int overlaps(sector_t s1, int l1, sector_t s2, int l2)
1911{
1912 return !((s1 + (l1>>9) <= s2) || (s1 >= s2 + (l2>>9)));
1913}
1914
1915/* maybe change sync_ee into interval trees as well? */
Philipp Reisner3ea35df2012-04-06 12:13:18 +02001916static bool overlapping_resync_write(struct drbd_conf *mdev, struct drbd_peer_request *peer_req)
Lars Ellenbergd93f6302012-03-26 15:49:13 +02001917{
1918 struct drbd_peer_request *rs_req;
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001919 bool rv = 0;
1920
Lars Ellenbergd93f6302012-03-26 15:49:13 +02001921 spin_lock_irq(&mdev->tconn->req_lock);
1922 list_for_each_entry(rs_req, &mdev->sync_ee, w.list) {
1923 if (overlaps(peer_req->i.sector, peer_req->i.size,
1924 rs_req->i.sector, rs_req->i.size)) {
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001925 rv = 1;
1926 break;
1927 }
1928 }
Lars Ellenbergd93f6302012-03-26 15:49:13 +02001929 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001930
1931 return rv;
1932}
1933
Philipp Reisnerb411b362009-09-25 16:07:19 -07001934/* Called from receive_Data.
1935 * Synchronize packets on sock with packets on msock.
1936 *
1937 * This is here so even when a P_DATA packet traveling via sock overtook an Ack
1938 * packet traveling on msock, they are still processed in the order they have
1939 * been sent.
1940 *
1941 * Note: we don't care for Ack packets overtaking P_DATA packets.
1942 *
1943 * In case packet_seq is larger than mdev->peer_seq number, there are
1944 * outstanding packets on the msock. We wait for them to arrive.
1945 * In case we are the logically next packet, we update mdev->peer_seq
1946 * ourselves. Correctly handles 32bit wrap around.
1947 *
1948 * Assume we have a 10 GBit connection, that is about 1<<30 byte per second,
1949 * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds
1950 * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have
1951 * 1<<9 == 512 seconds aka ages for the 32bit wrap around...
1952 *
1953 * returns 0 if we may process the packet,
1954 * -ERESTARTSYS if we were interrupted (by disconnect signal). */
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001955static int wait_for_and_update_peer_seq(struct drbd_conf *mdev, const u32 peer_seq)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001956{
1957 DEFINE_WAIT(wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001958 long timeout;
Philipp Reisnerb874d232013-10-23 10:59:16 +02001959 int ret = 0, tp;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001960
Philipp Reisnerb874d232013-10-23 10:59:16 +02001961 if (!test_bit(RESOLVE_CONFLICTS, &mdev->tconn->flags))
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001962 return 0;
1963
Philipp Reisnerb411b362009-09-25 16:07:19 -07001964 spin_lock(&mdev->peer_seq_lock);
1965 for (;;) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001966 if (!seq_greater(peer_seq - 1, mdev->peer_seq)) {
1967 mdev->peer_seq = seq_max(mdev->peer_seq, peer_seq);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001968 break;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001969 }
Philipp Reisnerb874d232013-10-23 10:59:16 +02001970
Philipp Reisnerb411b362009-09-25 16:07:19 -07001971 if (signal_pending(current)) {
1972 ret = -ERESTARTSYS;
1973 break;
1974 }
Philipp Reisnerb874d232013-10-23 10:59:16 +02001975
1976 rcu_read_lock();
1977 tp = rcu_dereference(mdev->tconn->net_conf)->two_primaries;
1978 rcu_read_unlock();
1979
1980 if (!tp)
1981 break;
1982
1983 /* Only need to wait if two_primaries is enabled */
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001984 prepare_to_wait(&mdev->seq_wait, &wait, TASK_INTERRUPTIBLE);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001985 spin_unlock(&mdev->peer_seq_lock);
Philipp Reisner44ed1672011-04-19 17:10:19 +02001986 rcu_read_lock();
1987 timeout = rcu_dereference(mdev->tconn->net_conf)->ping_timeo*HZ/10;
1988 rcu_read_unlock();
Andreas Gruenbacher71b1c1e2011-03-01 15:40:43 +01001989 timeout = schedule_timeout(timeout);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001990 spin_lock(&mdev->peer_seq_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001991 if (!timeout) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001992 ret = -ETIMEDOUT;
Andreas Gruenbacher71b1c1e2011-03-01 15:40:43 +01001993 dev_err(DEV, "Timed out waiting for missing ack packets; disconnecting\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001994 break;
1995 }
1996 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001997 spin_unlock(&mdev->peer_seq_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001998 finish_wait(&mdev->seq_wait, &wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001999 return ret;
2000}
2001
Lars Ellenberg688593c2010-11-17 22:25:03 +01002002/* see also bio_flags_to_wire()
2003 * DRBD_REQ_*, because we need to semantically map the flags to data packet
2004 * flags and back. We may replicate to other kernel versions. */
2005static unsigned long wire_flags_to_bio(struct drbd_conf *mdev, u32 dpf)
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02002006{
Lars Ellenberg688593c2010-11-17 22:25:03 +01002007 return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
2008 (dpf & DP_FUA ? REQ_FUA : 0) |
2009 (dpf & DP_FLUSH ? REQ_FLUSH : 0) |
2010 (dpf & DP_DISCARD ? REQ_DISCARD : 0);
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02002011}
2012
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002013static void fail_postponed_requests(struct drbd_conf *mdev, sector_t sector,
2014 unsigned int size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002015{
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002016 struct drbd_interval *i;
2017
2018 repeat:
2019 drbd_for_each_overlap(i, &mdev->write_requests, sector, size) {
2020 struct drbd_request *req;
2021 struct bio_and_error m;
2022
2023 if (!i->local)
2024 continue;
2025 req = container_of(i, struct drbd_request, i);
2026 if (!(req->rq_state & RQ_POSTPONED))
2027 continue;
2028 req->rq_state &= ~RQ_POSTPONED;
2029 __req_mod(req, NEG_ACKED, &m);
2030 spin_unlock_irq(&mdev->tconn->req_lock);
2031 if (m.bio)
2032 complete_master_bio(mdev, &m);
2033 spin_lock_irq(&mdev->tconn->req_lock);
2034 goto repeat;
2035 }
2036}
2037
2038static int handle_write_conflicts(struct drbd_conf *mdev,
2039 struct drbd_peer_request *peer_req)
2040{
2041 struct drbd_tconn *tconn = mdev->tconn;
Lars Ellenberg427c0432012-08-01 12:43:01 +02002042 bool resolve_conflicts = test_bit(RESOLVE_CONFLICTS, &tconn->flags);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002043 sector_t sector = peer_req->i.sector;
2044 const unsigned int size = peer_req->i.size;
2045 struct drbd_interval *i;
2046 bool equal;
2047 int err;
2048
2049 /*
2050 * Inserting the peer request into the write_requests tree will prevent
2051 * new conflicting local requests from being added.
2052 */
2053 drbd_insert_interval(&mdev->write_requests, &peer_req->i);
2054
2055 repeat:
2056 drbd_for_each_overlap(i, &mdev->write_requests, sector, size) {
2057 if (i == &peer_req->i)
2058 continue;
2059
2060 if (!i->local) {
2061 /*
2062 * Our peer has sent a conflicting remote request; this
2063 * should not happen in a two-node setup. Wait for the
2064 * earlier peer request to complete.
2065 */
2066 err = drbd_wait_misc(mdev, i);
2067 if (err)
2068 goto out;
2069 goto repeat;
2070 }
2071
2072 equal = i->sector == sector && i->size == size;
2073 if (resolve_conflicts) {
2074 /*
2075 * If the peer request is fully contained within the
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002076 * overlapping request, it can be considered overwritten
2077 * and thus superseded; otherwise, it will be retried
2078 * once all overlapping requests have completed.
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002079 */
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002080 bool superseded = i->sector <= sector && i->sector +
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002081 (i->size >> 9) >= sector + (size >> 9);
2082
2083 if (!equal)
2084 dev_alert(DEV, "Concurrent writes detected: "
2085 "local=%llus +%u, remote=%llus +%u, "
2086 "assuming %s came first\n",
2087 (unsigned long long)i->sector, i->size,
2088 (unsigned long long)sector, size,
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002089 superseded ? "local" : "remote");
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002090
2091 inc_unacked(mdev);
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002092 peer_req->w.cb = superseded ? e_send_superseded :
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002093 e_send_retry_write;
2094 list_add_tail(&peer_req->w.list, &mdev->done_ee);
2095 wake_asender(mdev->tconn);
2096
2097 err = -ENOENT;
2098 goto out;
2099 } else {
2100 struct drbd_request *req =
2101 container_of(i, struct drbd_request, i);
2102
2103 if (!equal)
2104 dev_alert(DEV, "Concurrent writes detected: "
2105 "local=%llus +%u, remote=%llus +%u\n",
2106 (unsigned long long)i->sector, i->size,
2107 (unsigned long long)sector, size);
2108
2109 if (req->rq_state & RQ_LOCAL_PENDING ||
2110 !(req->rq_state & RQ_POSTPONED)) {
2111 /*
2112 * Wait for the node with the discard flag to
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002113 * decide if this request has been superseded
2114 * or needs to be retried.
2115 * Requests that have been superseded will
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002116 * disappear from the write_requests tree.
2117 *
2118 * In addition, wait for the conflicting
2119 * request to finish locally before submitting
2120 * the conflicting peer request.
2121 */
2122 err = drbd_wait_misc(mdev, &req->i);
2123 if (err) {
2124 _conn_request_state(mdev->tconn,
2125 NS(conn, C_TIMEOUT),
2126 CS_HARD);
2127 fail_postponed_requests(mdev, sector, size);
2128 goto out;
2129 }
2130 goto repeat;
2131 }
2132 /*
2133 * Remember to restart the conflicting requests after
2134 * the new peer request has completed.
2135 */
2136 peer_req->flags |= EE_RESTART_REQUESTS;
2137 }
2138 }
2139 err = 0;
2140
2141 out:
2142 if (err)
2143 drbd_remove_epoch_entry_interval(mdev, peer_req);
2144 return err;
2145}
2146
Philipp Reisnerb411b362009-09-25 16:07:19 -07002147/* mirrored write */
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002148static int receive_Data(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002149{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002150 struct drbd_conf *mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002151 sector_t sector;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002152 struct drbd_peer_request *peer_req;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02002153 struct p_data *p = pi->data;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002154 u32 peer_seq = be32_to_cpu(p->seq_num);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002155 int rw = WRITE;
2156 u32 dp_flags;
Philipp Reisner302bdea2011-04-21 11:36:49 +02002157 int err, tp;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002158
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002159 mdev = vnr_to_mdev(tconn, pi->vnr);
2160 if (!mdev)
2161 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002162
Philipp Reisnerb411b362009-09-25 16:07:19 -07002163 if (!get_ldev(mdev)) {
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002164 int err2;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002165
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002166 err = wait_for_and_update_peer_seq(mdev, peer_seq);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002167 drbd_send_ack_dp(mdev, P_NEG_ACK, p, pi->size);
Philipp Reisner12038a32011-11-09 19:18:00 +01002168 atomic_inc(&tconn->current_epoch->epoch_size);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002169 err2 = drbd_drain_block(mdev, pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002170 if (!err)
2171 err = err2;
2172 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002173 }
2174
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01002175 /*
2176 * Corresponding put_ldev done either below (on various errors), or in
2177 * drbd_peer_request_endio, if we successfully submit the data at the
2178 * end of this function.
2179 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002180
2181 sector = be64_to_cpu(p->sector);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002182 peer_req = read_in_block(mdev, p->block_id, sector, pi->size);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002183 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002184 put_ldev(mdev);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002185 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002186 }
2187
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002188 peer_req->w.cb = e_end_block;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002189
Lars Ellenberg688593c2010-11-17 22:25:03 +01002190 dp_flags = be32_to_cpu(p->dp_flags);
2191 rw |= wire_flags_to_bio(mdev, dp_flags);
Lars Ellenberg81a35372012-07-30 09:00:54 +02002192 if (peer_req->pages == NULL) {
2193 D_ASSERT(peer_req->i.size == 0);
Lars Ellenberga73ff322012-06-25 19:15:38 +02002194 D_ASSERT(dp_flags & DP_FLUSH);
2195 }
Lars Ellenberg688593c2010-11-17 22:25:03 +01002196
2197 if (dp_flags & DP_MAY_SET_IN_SYNC)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002198 peer_req->flags |= EE_MAY_SET_IN_SYNC;
Lars Ellenberg688593c2010-11-17 22:25:03 +01002199
Philipp Reisner12038a32011-11-09 19:18:00 +01002200 spin_lock(&tconn->epoch_lock);
2201 peer_req->epoch = tconn->current_epoch;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002202 atomic_inc(&peer_req->epoch->epoch_size);
2203 atomic_inc(&peer_req->epoch->active);
Philipp Reisner12038a32011-11-09 19:18:00 +01002204 spin_unlock(&tconn->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002205
Philipp Reisner302bdea2011-04-21 11:36:49 +02002206 rcu_read_lock();
2207 tp = rcu_dereference(mdev->tconn->net_conf)->two_primaries;
2208 rcu_read_unlock();
2209 if (tp) {
2210 peer_req->flags |= EE_IN_INTERVAL_TREE;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002211 err = wait_for_and_update_peer_seq(mdev, peer_seq);
2212 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002213 goto out_interrupted;
Philipp Reisner87eeee42011-01-19 14:16:30 +01002214 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002215 err = handle_write_conflicts(mdev, peer_req);
2216 if (err) {
2217 spin_unlock_irq(&mdev->tconn->req_lock);
2218 if (err == -ENOENT) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002219 put_ldev(mdev);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002220 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002221 }
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002222 goto out_interrupted;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002223 }
Philipp Reisnerb874d232013-10-23 10:59:16 +02002224 } else {
2225 update_peer_seq(mdev, peer_seq);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002226 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb874d232013-10-23 10:59:16 +02002227 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002228 list_add(&peer_req->w.list, &mdev->active_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002229 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002230
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01002231 if (mdev->state.conn == C_SYNC_TARGET)
Philipp Reisner3ea35df2012-04-06 12:13:18 +02002232 wait_event(mdev->ee_wait, !overlapping_resync_write(mdev, peer_req));
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01002233
Philipp Reisner303d1442011-04-13 16:24:47 -07002234 if (mdev->tconn->agreed_pro_version < 100) {
Philipp Reisner44ed1672011-04-19 17:10:19 +02002235 rcu_read_lock();
2236 switch (rcu_dereference(mdev->tconn->net_conf)->wire_protocol) {
Philipp Reisner303d1442011-04-13 16:24:47 -07002237 case DRBD_PROT_C:
2238 dp_flags |= DP_SEND_WRITE_ACK;
2239 break;
2240 case DRBD_PROT_B:
2241 dp_flags |= DP_SEND_RECEIVE_ACK;
2242 break;
2243 }
Philipp Reisner44ed1672011-04-19 17:10:19 +02002244 rcu_read_unlock();
Philipp Reisner303d1442011-04-13 16:24:47 -07002245 }
2246
2247 if (dp_flags & DP_SEND_WRITE_ACK) {
2248 peer_req->flags |= EE_SEND_WRITE_ACK;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002249 inc_unacked(mdev);
2250 /* corresponding dec_unacked() in e_end_block()
2251 * respective _drbd_clear_done_ee */
Philipp Reisner303d1442011-04-13 16:24:47 -07002252 }
2253
2254 if (dp_flags & DP_SEND_RECEIVE_ACK) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002255 /* I really don't like it that the receiver thread
2256 * sends on the msock, but anyways */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002257 drbd_send_ack(mdev, P_RECV_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002258 }
2259
Lars Ellenberg6719fb02010-10-18 23:04:07 +02002260 if (mdev->state.pdsk < D_INCONSISTENT) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002261 /* In case we have the only disk of the cluster, */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002262 drbd_set_out_of_sync(mdev, peer_req->i.sector, peer_req->i.size);
2263 peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
2264 peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
Lars Ellenberg56392d22013-03-19 18:16:48 +01002265 drbd_al_begin_io(mdev, &peer_req->i, true);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002266 }
2267
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002268 err = drbd_submit_peer_request(mdev, peer_req, rw, DRBD_FAULT_DT_WR);
2269 if (!err)
2270 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002271
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002272 /* don't care for the reason here */
2273 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01002274 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002275 list_del(&peer_req->w.list);
2276 drbd_remove_epoch_entry_interval(mdev, peer_req);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002277 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002278 if (peer_req->flags & EE_CALL_AL_COMPLETE_IO)
Lars Ellenberg181286a2011-03-31 15:18:56 +02002279 drbd_al_complete_io(mdev, &peer_req->i);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002280
Philipp Reisnerb411b362009-09-25 16:07:19 -07002281out_interrupted:
Philipp Reisner1e9dd292011-11-10 15:14:53 +01002282 drbd_may_finish_epoch(tconn, peer_req->epoch, EV_PUT + EV_CLEANUP);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002283 put_ldev(mdev);
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +02002284 drbd_free_peer_req(mdev, peer_req);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002285 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002286}
2287
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002288/* We may throttle resync, if the lower device seems to be busy,
2289 * and current sync rate is above c_min_rate.
2290 *
2291 * To decide whether or not the lower device is busy, we use a scheme similar
2292 * to MD RAID is_mddev_idle(): if the partition stats reveal "significant"
2293 * (more than 64 sectors) of activity we cannot account for with our own resync
2294 * activity, it obviously is "busy".
2295 *
2296 * The current sync rate used here uses only the most recent two step marks,
2297 * to have a short time average so we can react faster.
2298 */
Philipp Reisnere3555d82010-11-07 15:56:29 +01002299int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002300{
2301 struct gendisk *disk = mdev->ldev->backing_bdev->bd_contains->bd_disk;
2302 unsigned long db, dt, dbdt;
Philipp Reisnere3555d82010-11-07 15:56:29 +01002303 struct lc_element *tmp;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002304 int curr_events;
2305 int throttle = 0;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002306 unsigned int c_min_rate;
2307
2308 rcu_read_lock();
2309 c_min_rate = rcu_dereference(mdev->ldev->disk_conf)->c_min_rate;
2310 rcu_read_unlock();
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002311
2312 /* feature disabled? */
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002313 if (c_min_rate == 0)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002314 return 0;
2315
Philipp Reisnere3555d82010-11-07 15:56:29 +01002316 spin_lock_irq(&mdev->al_lock);
2317 tmp = lc_find(mdev->resync, BM_SECT_TO_EXT(sector));
2318 if (tmp) {
2319 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
2320 if (test_bit(BME_PRIORITY, &bm_ext->flags)) {
2321 spin_unlock_irq(&mdev->al_lock);
2322 return 0;
2323 }
2324 /* Do not slow down if app IO is already waiting for this extent */
2325 }
2326 spin_unlock_irq(&mdev->al_lock);
2327
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002328 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
2329 (int)part_stat_read(&disk->part0, sectors[1]) -
2330 atomic_read(&mdev->rs_sect_ev);
Philipp Reisnere3555d82010-11-07 15:56:29 +01002331
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002332 if (!mdev->rs_last_events || curr_events - mdev->rs_last_events > 64) {
2333 unsigned long rs_left;
2334 int i;
2335
2336 mdev->rs_last_events = curr_events;
2337
2338 /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
2339 * approx. */
Lars Ellenberg2649f082010-11-05 10:05:47 +01002340 i = (mdev->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
2341
2342 if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T)
2343 rs_left = mdev->ov_left;
2344 else
2345 rs_left = drbd_bm_total_weight(mdev) - mdev->rs_failed;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002346
2347 dt = ((long)jiffies - (long)mdev->rs_mark_time[i]) / HZ;
2348 if (!dt)
2349 dt++;
2350 db = mdev->rs_mark_left[i] - rs_left;
2351 dbdt = Bit2KB(db/dt);
2352
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002353 if (dbdt > c_min_rate)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002354 throttle = 1;
2355 }
2356 return throttle;
2357}
2358
2359
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002360static int receive_DataRequest(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002361{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002362 struct drbd_conf *mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002363 sector_t sector;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002364 sector_t capacity;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002365 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002366 struct digest_info *di = NULL;
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002367 int size, verb;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002368 unsigned int fault_type;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02002369 struct p_block_req *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002370
2371 mdev = vnr_to_mdev(tconn, pi->vnr);
2372 if (!mdev)
2373 return -EIO;
2374 capacity = drbd_get_capacity(mdev->this_bdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002375
2376 sector = be64_to_cpu(p->sector);
2377 size = be32_to_cpu(p->blksize);
2378
Andreas Gruenbacherc670a392011-02-21 12:41:39 +01002379 if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002380 dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2381 (unsigned long long)sector, size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002382 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002383 }
2384 if (sector + (size>>9) > capacity) {
2385 dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2386 (unsigned long long)sector, size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002387 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002388 }
2389
2390 if (!get_ldev_if_state(mdev, D_UP_TO_DATE)) {
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002391 verb = 1;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002392 switch (pi->cmd) {
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002393 case P_DATA_REQUEST:
2394 drbd_send_ack_rp(mdev, P_NEG_DREPLY, p);
2395 break;
2396 case P_RS_DATA_REQUEST:
2397 case P_CSUM_RS_REQUEST:
2398 case P_OV_REQUEST:
2399 drbd_send_ack_rp(mdev, P_NEG_RS_DREPLY , p);
2400 break;
2401 case P_OV_REPLY:
2402 verb = 0;
2403 dec_rs_pending(mdev);
2404 drbd_send_ack_ex(mdev, P_OV_RESULT, sector, size, ID_IN_SYNC);
2405 break;
2406 default:
Andreas Gruenbacher49ba9b12011-03-25 00:35:45 +01002407 BUG();
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002408 }
2409 if (verb && __ratelimit(&drbd_ratelimit_state))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002410 dev_err(DEV, "Can not satisfy peer's read request, "
2411 "no local data.\n");
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002412
Lars Ellenberga821cc42010-09-06 12:31:37 +02002413 /* drain possibly payload */
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002414 return drbd_drain_block(mdev, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002415 }
2416
2417 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
2418 * "criss-cross" setup, that might cause write-out on some other DRBD,
2419 * which in turn might block on the other node at this very place. */
Andreas Gruenbacher0db55362011-04-06 16:09:15 +02002420 peer_req = drbd_alloc_peer_req(mdev, p->block_id, sector, size, GFP_NOIO);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002421 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002422 put_ldev(mdev);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002423 return -ENOMEM;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002424 }
2425
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002426 switch (pi->cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002427 case P_DATA_REQUEST:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002428 peer_req->w.cb = w_e_end_data_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002429 fault_type = DRBD_FAULT_DT_RD;
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002430 /* application IO, don't drbd_rs_begin_io */
2431 goto submit;
2432
Philipp Reisnerb411b362009-09-25 16:07:19 -07002433 case P_RS_DATA_REQUEST:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002434 peer_req->w.cb = w_e_end_rsdata_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002435 fault_type = DRBD_FAULT_RS_RD;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002436 /* used in the sector offset progress display */
2437 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002438 break;
2439
2440 case P_OV_REPLY:
2441 case P_CSUM_RS_REQUEST:
2442 fault_type = DRBD_FAULT_RS_RD;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002443 di = kmalloc(sizeof(*di) + pi->size, GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002444 if (!di)
2445 goto out_free_e;
2446
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002447 di->digest_size = pi->size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002448 di->digest = (((char *)di)+sizeof(struct digest_info));
2449
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002450 peer_req->digest = di;
2451 peer_req->flags |= EE_HAS_DIGEST;
Lars Ellenbergc36c3ce2010-08-11 20:42:55 +02002452
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002453 if (drbd_recv_all(mdev->tconn, di->digest, pi->size))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002454 goto out_free_e;
2455
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002456 if (pi->cmd == P_CSUM_RS_REQUEST) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002457 D_ASSERT(mdev->tconn->agreed_pro_version >= 89);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002458 peer_req->w.cb = w_e_end_csum_rs_req;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002459 /* used in the sector offset progress display */
2460 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002461 } else if (pi->cmd == P_OV_REPLY) {
Lars Ellenberg2649f082010-11-05 10:05:47 +01002462 /* track progress, we may need to throttle */
2463 atomic_add(size >> 9, &mdev->rs_sect_in);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002464 peer_req->w.cb = w_e_end_ov_reply;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002465 dec_rs_pending(mdev);
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002466 /* drbd_rs_begin_io done when we sent this request,
2467 * but accounting still needs to be done. */
2468 goto submit_for_resync;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002469 }
2470 break;
2471
2472 case P_OV_REQUEST:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002473 if (mdev->ov_start_sector == ~(sector_t)0 &&
Philipp Reisner31890f42011-01-19 14:12:51 +01002474 mdev->tconn->agreed_pro_version >= 90) {
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002475 unsigned long now = jiffies;
2476 int i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002477 mdev->ov_start_sector = sector;
2478 mdev->ov_position = sector;
Lars Ellenberg30b743a2010-11-05 09:39:06 +01002479 mdev->ov_left = drbd_bm_bits(mdev) - BM_SECT_TO_BIT(sector);
2480 mdev->rs_total = mdev->ov_left;
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002481 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
2482 mdev->rs_mark_left[i] = mdev->ov_left;
2483 mdev->rs_mark_time[i] = now;
2484 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002485 dev_info(DEV, "Online Verify start sector: %llu\n",
2486 (unsigned long long)sector);
2487 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002488 peer_req->w.cb = w_e_end_ov_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002489 fault_type = DRBD_FAULT_RS_RD;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002490 break;
2491
Philipp Reisnerb411b362009-09-25 16:07:19 -07002492 default:
Andreas Gruenbacher49ba9b12011-03-25 00:35:45 +01002493 BUG();
Philipp Reisnerb411b362009-09-25 16:07:19 -07002494 }
2495
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002496 /* Throttle, drbd_rs_begin_io and submit should become asynchronous
2497 * wrt the receiver, but it is not as straightforward as it may seem.
2498 * Various places in the resync start and stop logic assume resync
2499 * requests are processed in order, requeuing this on the worker thread
2500 * introduces a bunch of new code for synchronization between threads.
2501 *
2502 * Unlimited throttling before drbd_rs_begin_io may stall the resync
2503 * "forever", throttling after drbd_rs_begin_io will lock that extent
2504 * for application writes for the same time. For now, just throttle
2505 * here, where the rest of the code expects the receiver to sleep for
2506 * a while, anyways.
2507 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002508
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002509 /* Throttle before drbd_rs_begin_io, as that locks out application IO;
2510 * this defers syncer requests for some time, before letting at least
2511 * on request through. The resync controller on the receiving side
2512 * will adapt to the incoming rate accordingly.
2513 *
2514 * We cannot throttle here if remote is Primary/SyncTarget:
2515 * we would also throttle its application reads.
2516 * In that case, throttling is done on the SyncTarget only.
2517 */
Philipp Reisnere3555d82010-11-07 15:56:29 +01002518 if (mdev->state.peer != R_PRIMARY && drbd_rs_should_slow_down(mdev, sector))
2519 schedule_timeout_uninterruptible(HZ/10);
2520 if (drbd_rs_begin_io(mdev, sector))
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002521 goto out_free_e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002522
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002523submit_for_resync:
2524 atomic_add(size >> 9, &mdev->rs_sect_ev);
2525
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002526submit:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002527 inc_unacked(mdev);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002528 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002529 list_add_tail(&peer_req->w.list, &mdev->read_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002530 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002531
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01002532 if (drbd_submit_peer_request(mdev, peer_req, READ, fault_type) == 0)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002533 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002534
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002535 /* don't care for the reason here */
2536 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01002537 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002538 list_del(&peer_req->w.list);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002539 spin_unlock_irq(&mdev->tconn->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002540 /* no drbd_rs_complete_io(), we are dropping the connection anyways */
2541
Philipp Reisnerb411b362009-09-25 16:07:19 -07002542out_free_e:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002543 put_ldev(mdev);
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +02002544 drbd_free_peer_req(mdev, peer_req);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002545 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002546}
2547
2548static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local)
2549{
2550 int self, peer, rv = -100;
2551 unsigned long ch_self, ch_peer;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002552 enum drbd_after_sb_p after_sb_0p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002553
2554 self = mdev->ldev->md.uuid[UI_BITMAP] & 1;
2555 peer = mdev->p_uuid[UI_BITMAP] & 1;
2556
2557 ch_peer = mdev->p_uuid[UI_SIZE];
2558 ch_self = mdev->comm_bm_set;
2559
Philipp Reisner44ed1672011-04-19 17:10:19 +02002560 rcu_read_lock();
2561 after_sb_0p = rcu_dereference(mdev->tconn->net_conf)->after_sb_0p;
2562 rcu_read_unlock();
2563 switch (after_sb_0p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002564 case ASB_CONSENSUS:
2565 case ASB_DISCARD_SECONDARY:
2566 case ASB_CALL_HELPER:
Philipp Reisner44ed1672011-04-19 17:10:19 +02002567 case ASB_VIOLENTLY:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002568 dev_err(DEV, "Configuration error.\n");
2569 break;
2570 case ASB_DISCONNECT:
2571 break;
2572 case ASB_DISCARD_YOUNGER_PRI:
2573 if (self == 0 && peer == 1) {
2574 rv = -1;
2575 break;
2576 }
2577 if (self == 1 && peer == 0) {
2578 rv = 1;
2579 break;
2580 }
2581 /* Else fall through to one of the other strategies... */
2582 case ASB_DISCARD_OLDER_PRI:
2583 if (self == 0 && peer == 1) {
2584 rv = 1;
2585 break;
2586 }
2587 if (self == 1 && peer == 0) {
2588 rv = -1;
2589 break;
2590 }
2591 /* Else fall through to one of the other strategies... */
Lars Ellenbergad19bf62009-10-14 09:36:49 +02002592 dev_warn(DEV, "Discard younger/older primary did not find a decision\n"
Philipp Reisnerb411b362009-09-25 16:07:19 -07002593 "Using discard-least-changes instead\n");
2594 case ASB_DISCARD_ZERO_CHG:
2595 if (ch_peer == 0 && ch_self == 0) {
Lars Ellenberg427c0432012-08-01 12:43:01 +02002596 rv = test_bit(RESOLVE_CONFLICTS, &mdev->tconn->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002597 ? -1 : 1;
2598 break;
2599 } else {
2600 if (ch_peer == 0) { rv = 1; break; }
2601 if (ch_self == 0) { rv = -1; break; }
2602 }
Philipp Reisner44ed1672011-04-19 17:10:19 +02002603 if (after_sb_0p == ASB_DISCARD_ZERO_CHG)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002604 break;
2605 case ASB_DISCARD_LEAST_CHG:
2606 if (ch_self < ch_peer)
2607 rv = -1;
2608 else if (ch_self > ch_peer)
2609 rv = 1;
2610 else /* ( ch_self == ch_peer ) */
2611 /* Well, then use something else. */
Lars Ellenberg427c0432012-08-01 12:43:01 +02002612 rv = test_bit(RESOLVE_CONFLICTS, &mdev->tconn->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002613 ? -1 : 1;
2614 break;
2615 case ASB_DISCARD_LOCAL:
2616 rv = -1;
2617 break;
2618 case ASB_DISCARD_REMOTE:
2619 rv = 1;
2620 }
2621
2622 return rv;
2623}
2624
2625static int drbd_asb_recover_1p(struct drbd_conf *mdev) __must_hold(local)
2626{
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002627 int hg, rv = -100;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002628 enum drbd_after_sb_p after_sb_1p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002629
Philipp Reisner44ed1672011-04-19 17:10:19 +02002630 rcu_read_lock();
2631 after_sb_1p = rcu_dereference(mdev->tconn->net_conf)->after_sb_1p;
2632 rcu_read_unlock();
2633 switch (after_sb_1p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002634 case ASB_DISCARD_YOUNGER_PRI:
2635 case ASB_DISCARD_OLDER_PRI:
2636 case ASB_DISCARD_LEAST_CHG:
2637 case ASB_DISCARD_LOCAL:
2638 case ASB_DISCARD_REMOTE:
Philipp Reisner44ed1672011-04-19 17:10:19 +02002639 case ASB_DISCARD_ZERO_CHG:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002640 dev_err(DEV, "Configuration error.\n");
2641 break;
2642 case ASB_DISCONNECT:
2643 break;
2644 case ASB_CONSENSUS:
2645 hg = drbd_asb_recover_0p(mdev);
2646 if (hg == -1 && mdev->state.role == R_SECONDARY)
2647 rv = hg;
2648 if (hg == 1 && mdev->state.role == R_PRIMARY)
2649 rv = hg;
2650 break;
2651 case ASB_VIOLENTLY:
2652 rv = drbd_asb_recover_0p(mdev);
2653 break;
2654 case ASB_DISCARD_SECONDARY:
2655 return mdev->state.role == R_PRIMARY ? 1 : -1;
2656 case ASB_CALL_HELPER:
2657 hg = drbd_asb_recover_0p(mdev);
2658 if (hg == -1 && mdev->state.role == R_PRIMARY) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002659 enum drbd_state_rv rv2;
2660
Philipp Reisnerb411b362009-09-25 16:07:19 -07002661 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2662 * we might be here in C_WF_REPORT_PARAMS which is transient.
2663 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002664 rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
2665 if (rv2 != SS_SUCCESS) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002666 drbd_khelper(mdev, "pri-lost-after-sb");
2667 } else {
2668 dev_warn(DEV, "Successfully gave up primary role.\n");
2669 rv = hg;
2670 }
2671 } else
2672 rv = hg;
2673 }
2674
2675 return rv;
2676}
2677
2678static int drbd_asb_recover_2p(struct drbd_conf *mdev) __must_hold(local)
2679{
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002680 int hg, rv = -100;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002681 enum drbd_after_sb_p after_sb_2p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002682
Philipp Reisner44ed1672011-04-19 17:10:19 +02002683 rcu_read_lock();
2684 after_sb_2p = rcu_dereference(mdev->tconn->net_conf)->after_sb_2p;
2685 rcu_read_unlock();
2686 switch (after_sb_2p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002687 case ASB_DISCARD_YOUNGER_PRI:
2688 case ASB_DISCARD_OLDER_PRI:
2689 case ASB_DISCARD_LEAST_CHG:
2690 case ASB_DISCARD_LOCAL:
2691 case ASB_DISCARD_REMOTE:
2692 case ASB_CONSENSUS:
2693 case ASB_DISCARD_SECONDARY:
Philipp Reisner44ed1672011-04-19 17:10:19 +02002694 case ASB_DISCARD_ZERO_CHG:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002695 dev_err(DEV, "Configuration error.\n");
2696 break;
2697 case ASB_VIOLENTLY:
2698 rv = drbd_asb_recover_0p(mdev);
2699 break;
2700 case ASB_DISCONNECT:
2701 break;
2702 case ASB_CALL_HELPER:
2703 hg = drbd_asb_recover_0p(mdev);
2704 if (hg == -1) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002705 enum drbd_state_rv rv2;
2706
Philipp Reisnerb411b362009-09-25 16:07:19 -07002707 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2708 * we might be here in C_WF_REPORT_PARAMS which is transient.
2709 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002710 rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
2711 if (rv2 != SS_SUCCESS) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002712 drbd_khelper(mdev, "pri-lost-after-sb");
2713 } else {
2714 dev_warn(DEV, "Successfully gave up primary role.\n");
2715 rv = hg;
2716 }
2717 } else
2718 rv = hg;
2719 }
2720
2721 return rv;
2722}
2723
2724static void drbd_uuid_dump(struct drbd_conf *mdev, char *text, u64 *uuid,
2725 u64 bits, u64 flags)
2726{
2727 if (!uuid) {
2728 dev_info(DEV, "%s uuid info vanished while I was looking!\n", text);
2729 return;
2730 }
2731 dev_info(DEV, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n",
2732 text,
2733 (unsigned long long)uuid[UI_CURRENT],
2734 (unsigned long long)uuid[UI_BITMAP],
2735 (unsigned long long)uuid[UI_HISTORY_START],
2736 (unsigned long long)uuid[UI_HISTORY_END],
2737 (unsigned long long)bits,
2738 (unsigned long long)flags);
2739}
2740
2741/*
2742 100 after split brain try auto recover
2743 2 C_SYNC_SOURCE set BitMap
2744 1 C_SYNC_SOURCE use BitMap
2745 0 no Sync
2746 -1 C_SYNC_TARGET use BitMap
2747 -2 C_SYNC_TARGET set BitMap
2748 -100 after split brain, disconnect
2749-1000 unrelated data
Philipp Reisner4a23f262011-01-11 17:42:17 +01002750-1091 requires proto 91
2751-1096 requires proto 96
Philipp Reisnerb411b362009-09-25 16:07:19 -07002752 */
2753static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(local)
2754{
2755 u64 self, peer;
2756 int i, j;
2757
2758 self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2759 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
2760
2761 *rule_nr = 10;
2762 if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED)
2763 return 0;
2764
2765 *rule_nr = 20;
2766 if ((self == UUID_JUST_CREATED || self == (u64)0) &&
2767 peer != UUID_JUST_CREATED)
2768 return -2;
2769
2770 *rule_nr = 30;
2771 if (self != UUID_JUST_CREATED &&
2772 (peer == UUID_JUST_CREATED || peer == (u64)0))
2773 return 2;
2774
2775 if (self == peer) {
2776 int rct, dc; /* roles at crash time */
2777
2778 if (mdev->p_uuid[UI_BITMAP] == (u64)0 && mdev->ldev->md.uuid[UI_BITMAP] != (u64)0) {
2779
Philipp Reisner31890f42011-01-19 14:12:51 +01002780 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002781 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002782
2783 if ((mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
2784 (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
2785 dev_info(DEV, "was SyncSource, missed the resync finished event, corrected myself:\n");
Philipp Reisner9f2247b2012-08-16 14:25:58 +02002786 drbd_uuid_move_history(mdev);
2787 mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[UI_BITMAP];
2788 mdev->ldev->md.uuid[UI_BITMAP] = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002789
2790 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
2791 mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
2792 *rule_nr = 34;
2793 } else {
2794 dev_info(DEV, "was SyncSource (peer failed to write sync_uuid)\n");
2795 *rule_nr = 36;
2796 }
2797
2798 return 1;
2799 }
2800
2801 if (mdev->ldev->md.uuid[UI_BITMAP] == (u64)0 && mdev->p_uuid[UI_BITMAP] != (u64)0) {
2802
Philipp Reisner31890f42011-01-19 14:12:51 +01002803 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002804 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002805
2806 if ((mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_BITMAP] & ~((u64)1)) &&
2807 (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
2808 dev_info(DEV, "was SyncTarget, peer missed the resync finished event, corrected peer:\n");
2809
2810 mdev->p_uuid[UI_HISTORY_START + 1] = mdev->p_uuid[UI_HISTORY_START];
2811 mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_BITMAP];
2812 mdev->p_uuid[UI_BITMAP] = 0UL;
2813
2814 drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2815 *rule_nr = 35;
2816 } else {
2817 dev_info(DEV, "was SyncTarget (failed to write sync_uuid)\n");
2818 *rule_nr = 37;
2819 }
2820
2821 return -1;
2822 }
2823
2824 /* Common power [off|failure] */
2825 rct = (test_bit(CRASHED_PRIMARY, &mdev->flags) ? 1 : 0) +
2826 (mdev->p_uuid[UI_FLAGS] & 2);
2827 /* lowest bit is set when we were primary,
2828 * next bit (weight 2) is set when peer was primary */
2829 *rule_nr = 40;
2830
2831 switch (rct) {
2832 case 0: /* !self_pri && !peer_pri */ return 0;
2833 case 1: /* self_pri && !peer_pri */ return 1;
2834 case 2: /* !self_pri && peer_pri */ return -1;
2835 case 3: /* self_pri && peer_pri */
Lars Ellenberg427c0432012-08-01 12:43:01 +02002836 dc = test_bit(RESOLVE_CONFLICTS, &mdev->tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002837 return dc ? -1 : 1;
2838 }
2839 }
2840
2841 *rule_nr = 50;
2842 peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1);
2843 if (self == peer)
2844 return -1;
2845
2846 *rule_nr = 51;
2847 peer = mdev->p_uuid[UI_HISTORY_START] & ~((u64)1);
2848 if (self == peer) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002849 if (mdev->tconn->agreed_pro_version < 96 ?
Philipp Reisner4a23f262011-01-11 17:42:17 +01002850 (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
2851 (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
2852 peer + UUID_NEW_BM_OFFSET == (mdev->p_uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002853 /* The last P_SYNC_UUID did not get though. Undo the last start of
2854 resync as sync source modifications of the peer's UUIDs. */
2855
Philipp Reisner31890f42011-01-19 14:12:51 +01002856 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002857 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002858
2859 mdev->p_uuid[UI_BITMAP] = mdev->p_uuid[UI_HISTORY_START];
2860 mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_HISTORY_START + 1];
Philipp Reisner4a23f262011-01-11 17:42:17 +01002861
Lars Ellenberg92b4ca22012-04-30 12:53:52 +02002862 dev_info(DEV, "Lost last syncUUID packet, corrected:\n");
Philipp Reisner4a23f262011-01-11 17:42:17 +01002863 drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2864
Philipp Reisnerb411b362009-09-25 16:07:19 -07002865 return -1;
2866 }
2867 }
2868
2869 *rule_nr = 60;
2870 self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2871 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2872 peer = mdev->p_uuid[i] & ~((u64)1);
2873 if (self == peer)
2874 return -2;
2875 }
2876
2877 *rule_nr = 70;
2878 self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2879 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
2880 if (self == peer)
2881 return 1;
2882
2883 *rule_nr = 71;
2884 self = mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
2885 if (self == peer) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002886 if (mdev->tconn->agreed_pro_version < 96 ?
Philipp Reisner4a23f262011-01-11 17:42:17 +01002887 (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
2888 (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
2889 self + UUID_NEW_BM_OFFSET == (mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002890 /* The last P_SYNC_UUID did not get though. Undo the last start of
2891 resync as sync source modifications of our UUIDs. */
2892
Philipp Reisner31890f42011-01-19 14:12:51 +01002893 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002894 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002895
Philipp Reisner9f2247b2012-08-16 14:25:58 +02002896 __drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_HISTORY_START]);
2897 __drbd_uuid_set(mdev, UI_HISTORY_START, mdev->ldev->md.uuid[UI_HISTORY_START + 1]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002898
Philipp Reisner4a23f262011-01-11 17:42:17 +01002899 dev_info(DEV, "Last syncUUID did not get through, corrected:\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002900 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
2901 mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
2902
2903 return 1;
2904 }
2905 }
2906
2907
2908 *rule_nr = 80;
Philipp Reisnerd8c2a362009-11-18 15:52:51 +01002909 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002910 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2911 self = mdev->ldev->md.uuid[i] & ~((u64)1);
2912 if (self == peer)
2913 return 2;
2914 }
2915
2916 *rule_nr = 90;
2917 self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2918 peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1);
2919 if (self == peer && self != ((u64)0))
2920 return 100;
2921
2922 *rule_nr = 100;
2923 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2924 self = mdev->ldev->md.uuid[i] & ~((u64)1);
2925 for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) {
2926 peer = mdev->p_uuid[j] & ~((u64)1);
2927 if (self == peer)
2928 return -100;
2929 }
2930 }
2931
2932 return -1000;
2933}
2934
2935/* drbd_sync_handshake() returns the new conn state on success, or
2936 CONN_MASK (-1) on failure.
2937 */
2938static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_role peer_role,
2939 enum drbd_disk_state peer_disk) __must_hold(local)
2940{
Philipp Reisnerb411b362009-09-25 16:07:19 -07002941 enum drbd_conns rv = C_MASK;
2942 enum drbd_disk_state mydisk;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002943 struct net_conf *nc;
Andreas Gruenbacher6dff2902011-06-28 14:18:12 +02002944 int hg, rule_nr, rr_conflict, tentative;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002945
2946 mydisk = mdev->state.disk;
2947 if (mydisk == D_NEGOTIATING)
2948 mydisk = mdev->new_state_tmp.disk;
2949
2950 dev_info(DEV, "drbd_sync_handshake:\n");
Philipp Reisner9f2247b2012-08-16 14:25:58 +02002951
2952 spin_lock_irq(&mdev->ldev->md.uuid_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002953 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid, mdev->comm_bm_set, 0);
2954 drbd_uuid_dump(mdev, "peer", mdev->p_uuid,
2955 mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2956
2957 hg = drbd_uuid_compare(mdev, &rule_nr);
Philipp Reisner9f2247b2012-08-16 14:25:58 +02002958 spin_unlock_irq(&mdev->ldev->md.uuid_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002959
2960 dev_info(DEV, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
2961
2962 if (hg == -1000) {
2963 dev_alert(DEV, "Unrelated data, aborting!\n");
2964 return C_MASK;
2965 }
Philipp Reisner4a23f262011-01-11 17:42:17 +01002966 if (hg < -1000) {
2967 dev_alert(DEV, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002968 return C_MASK;
2969 }
2970
2971 if ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) ||
2972 (peer_disk == D_INCONSISTENT && mydisk > D_INCONSISTENT)) {
2973 int f = (hg == -100) || abs(hg) == 2;
2974 hg = mydisk > D_INCONSISTENT ? 1 : -1;
2975 if (f)
2976 hg = hg*2;
2977 dev_info(DEV, "Becoming sync %s due to disk states.\n",
2978 hg > 0 ? "source" : "target");
2979 }
2980
Adam Gandelman3a11a482010-04-08 16:48:23 -07002981 if (abs(hg) == 100)
2982 drbd_khelper(mdev, "initial-split-brain");
2983
Philipp Reisner44ed1672011-04-19 17:10:19 +02002984 rcu_read_lock();
2985 nc = rcu_dereference(mdev->tconn->net_conf);
2986
2987 if (hg == 100 || (hg == -100 && nc->always_asbp)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002988 int pcount = (mdev->state.role == R_PRIMARY)
2989 + (peer_role == R_PRIMARY);
2990 int forced = (hg == -100);
2991
2992 switch (pcount) {
2993 case 0:
2994 hg = drbd_asb_recover_0p(mdev);
2995 break;
2996 case 1:
2997 hg = drbd_asb_recover_1p(mdev);
2998 break;
2999 case 2:
3000 hg = drbd_asb_recover_2p(mdev);
3001 break;
3002 }
3003 if (abs(hg) < 100) {
3004 dev_warn(DEV, "Split-Brain detected, %d primaries, "
3005 "automatically solved. Sync from %s node\n",
3006 pcount, (hg < 0) ? "peer" : "this");
3007 if (forced) {
3008 dev_warn(DEV, "Doing a full sync, since"
3009 " UUIDs where ambiguous.\n");
3010 hg = hg*2;
3011 }
3012 }
3013 }
3014
3015 if (hg == -100) {
Philipp Reisner08b165b2011-09-05 16:22:33 +02003016 if (test_bit(DISCARD_MY_DATA, &mdev->flags) && !(mdev->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003017 hg = -1;
Philipp Reisner08b165b2011-09-05 16:22:33 +02003018 if (!test_bit(DISCARD_MY_DATA, &mdev->flags) && (mdev->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003019 hg = 1;
3020
3021 if (abs(hg) < 100)
3022 dev_warn(DEV, "Split-Brain detected, manually solved. "
3023 "Sync from %s node\n",
3024 (hg < 0) ? "peer" : "this");
3025 }
Philipp Reisner44ed1672011-04-19 17:10:19 +02003026 rr_conflict = nc->rr_conflict;
Andreas Gruenbacher6dff2902011-06-28 14:18:12 +02003027 tentative = nc->tentative;
Philipp Reisner44ed1672011-04-19 17:10:19 +02003028 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07003029
3030 if (hg == -100) {
Lars Ellenberg580b9762010-02-26 23:15:23 +01003031 /* FIXME this log message is not correct if we end up here
3032 * after an attempted attach on a diskless node.
3033 * We just refuse to attach -- well, we drop the "connection"
3034 * to that disk, in a way... */
Adam Gandelman3a11a482010-04-08 16:48:23 -07003035 dev_alert(DEV, "Split-Brain detected but unresolved, dropping connection!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003036 drbd_khelper(mdev, "split-brain");
3037 return C_MASK;
3038 }
3039
3040 if (hg > 0 && mydisk <= D_INCONSISTENT) {
3041 dev_err(DEV, "I shall become SyncSource, but I am inconsistent!\n");
3042 return C_MASK;
3043 }
3044
3045 if (hg < 0 && /* by intention we do not use mydisk here. */
3046 mdev->state.role == R_PRIMARY && mdev->state.disk >= D_CONSISTENT) {
Philipp Reisner44ed1672011-04-19 17:10:19 +02003047 switch (rr_conflict) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003048 case ASB_CALL_HELPER:
3049 drbd_khelper(mdev, "pri-lost");
3050 /* fall through */
3051 case ASB_DISCONNECT:
3052 dev_err(DEV, "I shall become SyncTarget, but I am primary!\n");
3053 return C_MASK;
3054 case ASB_VIOLENTLY:
3055 dev_warn(DEV, "Becoming SyncTarget, violating the stable-data"
3056 "assumption\n");
3057 }
3058 }
3059
Andreas Gruenbacher6dff2902011-06-28 14:18:12 +02003060 if (tentative || test_bit(CONN_DRY_RUN, &mdev->tconn->flags)) {
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003061 if (hg == 0)
3062 dev_info(DEV, "dry-run connect: No resync, would become Connected immediately.\n");
3063 else
3064 dev_info(DEV, "dry-run connect: Would become %s, doing a %s resync.",
3065 drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET),
3066 abs(hg) >= 2 ? "full" : "bit-map based");
3067 return C_MASK;
3068 }
3069
Philipp Reisnerb411b362009-09-25 16:07:19 -07003070 if (abs(hg) >= 2) {
3071 dev_info(DEV, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003072 if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
3073 BM_LOCKED_SET_ALLOWED))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003074 return C_MASK;
3075 }
3076
3077 if (hg > 0) { /* become sync source. */
3078 rv = C_WF_BITMAP_S;
3079 } else if (hg < 0) { /* become sync target */
3080 rv = C_WF_BITMAP_T;
3081 } else {
3082 rv = C_CONNECTED;
3083 if (drbd_bm_total_weight(mdev)) {
3084 dev_info(DEV, "No resync, but %lu bits in bitmap!\n",
3085 drbd_bm_total_weight(mdev));
3086 }
3087 }
3088
3089 return rv;
3090}
3091
Philipp Reisnerf179d762011-05-16 17:31:47 +02003092static enum drbd_after_sb_p convert_after_sb(enum drbd_after_sb_p peer)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003093{
3094 /* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */
Philipp Reisnerf179d762011-05-16 17:31:47 +02003095 if (peer == ASB_DISCARD_REMOTE)
3096 return ASB_DISCARD_LOCAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003097
3098 /* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */
Philipp Reisnerf179d762011-05-16 17:31:47 +02003099 if (peer == ASB_DISCARD_LOCAL)
3100 return ASB_DISCARD_REMOTE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003101
3102 /* everything else is valid if they are equal on both sides. */
Philipp Reisnerf179d762011-05-16 17:31:47 +02003103 return peer;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003104}
3105
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003106static int receive_protocol(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003107{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003108 struct p_protocol *p = pi->data;
Philipp Reisner036b17e2011-05-16 17:38:11 +02003109 enum drbd_after_sb_p p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
3110 int p_proto, p_discard_my_data, p_two_primaries, cf;
3111 struct net_conf *nc, *old_net_conf, *new_net_conf = NULL;
3112 char integrity_alg[SHARED_SECRET_MAX] = "";
Andreas Gruenbacheraccdbcc2011-07-15 17:41:09 +02003113 struct crypto_hash *peer_integrity_tfm = NULL;
Philipp Reisner7aca6c72011-05-17 10:12:56 +02003114 void *int_dig_in = NULL, *int_dig_vv = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003115
Philipp Reisnerb411b362009-09-25 16:07:19 -07003116 p_proto = be32_to_cpu(p->protocol);
3117 p_after_sb_0p = be32_to_cpu(p->after_sb_0p);
3118 p_after_sb_1p = be32_to_cpu(p->after_sb_1p);
3119 p_after_sb_2p = be32_to_cpu(p->after_sb_2p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003120 p_two_primaries = be32_to_cpu(p->two_primaries);
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003121 cf = be32_to_cpu(p->conn_flags);
Andreas Gruenbacher6139f602011-05-06 20:00:02 +02003122 p_discard_my_data = cf & CF_DISCARD_MY_DATA;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003123
Andreas Gruenbacher86db0612011-04-28 15:24:18 +02003124 if (tconn->agreed_pro_version >= 87) {
3125 int err;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003126
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02003127 if (pi->size > sizeof(integrity_alg))
Andreas Gruenbacher86db0612011-04-28 15:24:18 +02003128 return -EIO;
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02003129 err = drbd_recv_all(tconn, integrity_alg, pi->size);
Andreas Gruenbacher86db0612011-04-28 15:24:18 +02003130 if (err)
3131 return err;
Philipp Reisner036b17e2011-05-16 17:38:11 +02003132 integrity_alg[SHARED_SECRET_MAX - 1] = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003133 }
3134
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003135 if (pi->cmd != P_PROTOCOL_UPDATE) {
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003136 clear_bit(CONN_DRY_RUN, &tconn->flags);
Philipp Reisner036b17e2011-05-16 17:38:11 +02003137
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003138 if (cf & CF_DRY_RUN)
3139 set_bit(CONN_DRY_RUN, &tconn->flags);
3140
3141 rcu_read_lock();
3142 nc = rcu_dereference(tconn->net_conf);
3143
3144 if (p_proto != nc->wire_protocol) {
Andreas Gruenbacherd505d9b2011-07-15 17:19:18 +02003145 conn_err(tconn, "incompatible %s settings\n", "protocol");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003146 goto disconnect_rcu_unlock;
3147 }
3148
3149 if (convert_after_sb(p_after_sb_0p) != nc->after_sb_0p) {
Andreas Gruenbacherd505d9b2011-07-15 17:19:18 +02003150 conn_err(tconn, "incompatible %s settings\n", "after-sb-0pri");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003151 goto disconnect_rcu_unlock;
3152 }
3153
3154 if (convert_after_sb(p_after_sb_1p) != nc->after_sb_1p) {
Andreas Gruenbacherd505d9b2011-07-15 17:19:18 +02003155 conn_err(tconn, "incompatible %s settings\n", "after-sb-1pri");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003156 goto disconnect_rcu_unlock;
3157 }
3158
3159 if (convert_after_sb(p_after_sb_2p) != nc->after_sb_2p) {
Andreas Gruenbacherd505d9b2011-07-15 17:19:18 +02003160 conn_err(tconn, "incompatible %s settings\n", "after-sb-2pri");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003161 goto disconnect_rcu_unlock;
3162 }
3163
3164 if (p_discard_my_data && nc->discard_my_data) {
Andreas Gruenbacherd505d9b2011-07-15 17:19:18 +02003165 conn_err(tconn, "incompatible %s settings\n", "discard-my-data");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003166 goto disconnect_rcu_unlock;
3167 }
3168
3169 if (p_two_primaries != nc->two_primaries) {
Andreas Gruenbacherd505d9b2011-07-15 17:19:18 +02003170 conn_err(tconn, "incompatible %s settings\n", "allow-two-primaries");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003171 goto disconnect_rcu_unlock;
3172 }
3173
3174 if (strcmp(integrity_alg, nc->integrity_alg)) {
Andreas Gruenbacherd505d9b2011-07-15 17:19:18 +02003175 conn_err(tconn, "incompatible %s settings\n", "data-integrity-alg");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003176 goto disconnect_rcu_unlock;
3177 }
3178
3179 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07003180 }
3181
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003182 if (integrity_alg[0]) {
3183 int hash_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003184
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003185 /*
3186 * We can only change the peer data integrity algorithm
3187 * here. Changing our own data integrity algorithm
3188 * requires that we send a P_PROTOCOL_UPDATE packet at
3189 * the same time; otherwise, the peer has no way to
3190 * tell between which packets the algorithm should
3191 * change.
3192 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003193
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003194 peer_integrity_tfm = crypto_alloc_hash(integrity_alg, 0, CRYPTO_ALG_ASYNC);
3195 if (!peer_integrity_tfm) {
3196 conn_err(tconn, "peer data-integrity-alg %s not supported\n",
3197 integrity_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003198 goto disconnect;
3199 }
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003200
3201 hash_size = crypto_hash_digestsize(peer_integrity_tfm);
3202 int_dig_in = kmalloc(hash_size, GFP_KERNEL);
3203 int_dig_vv = kmalloc(hash_size, GFP_KERNEL);
3204 if (!(int_dig_in && int_dig_vv)) {
3205 conn_err(tconn, "Allocation of buffers for data integrity checking failed\n");
3206 goto disconnect;
3207 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003208 }
3209
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003210 new_net_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL);
3211 if (!new_net_conf) {
3212 conn_err(tconn, "Allocation of new net_conf failed\n");
3213 goto disconnect;
3214 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003215
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003216 mutex_lock(&tconn->data.mutex);
3217 mutex_lock(&tconn->conf_update);
3218 old_net_conf = tconn->net_conf;
3219 *new_net_conf = *old_net_conf;
3220
3221 new_net_conf->wire_protocol = p_proto;
3222 new_net_conf->after_sb_0p = convert_after_sb(p_after_sb_0p);
3223 new_net_conf->after_sb_1p = convert_after_sb(p_after_sb_1p);
3224 new_net_conf->after_sb_2p = convert_after_sb(p_after_sb_2p);
3225 new_net_conf->two_primaries = p_two_primaries;
3226
3227 rcu_assign_pointer(tconn->net_conf, new_net_conf);
3228 mutex_unlock(&tconn->conf_update);
3229 mutex_unlock(&tconn->data.mutex);
3230
3231 crypto_free_hash(tconn->peer_integrity_tfm);
3232 kfree(tconn->int_dig_in);
3233 kfree(tconn->int_dig_vv);
3234 tconn->peer_integrity_tfm = peer_integrity_tfm;
3235 tconn->int_dig_in = int_dig_in;
3236 tconn->int_dig_vv = int_dig_vv;
3237
3238 if (strcmp(old_net_conf->integrity_alg, integrity_alg))
3239 conn_info(tconn, "peer data-integrity-alg: %s\n",
3240 integrity_alg[0] ? integrity_alg : "(none)");
3241
3242 synchronize_rcu();
3243 kfree(old_net_conf);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003244 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003245
Philipp Reisner44ed1672011-04-19 17:10:19 +02003246disconnect_rcu_unlock:
3247 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07003248disconnect:
Andreas Gruenbacherb792c352011-07-15 16:48:49 +02003249 crypto_free_hash(peer_integrity_tfm);
Philipp Reisner036b17e2011-05-16 17:38:11 +02003250 kfree(int_dig_in);
3251 kfree(int_dig_vv);
Philipp Reisner72046242011-03-15 18:51:47 +01003252 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003253 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003254}
3255
3256/* helper function
3257 * input: alg name, feature name
3258 * return: NULL (alg name was "")
3259 * ERR_PTR(error) if something goes wrong
3260 * or the crypto hash ptr, if it worked out ok. */
Rashika Kheriaf63e6312013-12-19 15:11:09 +05303261static
Philipp Reisnerb411b362009-09-25 16:07:19 -07003262struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_conf *mdev,
3263 const char *alg, const char *name)
3264{
3265 struct crypto_hash *tfm;
3266
3267 if (!alg[0])
3268 return NULL;
3269
3270 tfm = crypto_alloc_hash(alg, 0, CRYPTO_ALG_ASYNC);
3271 if (IS_ERR(tfm)) {
3272 dev_err(DEV, "Can not allocate \"%s\" as %s (reason: %ld)\n",
3273 alg, name, PTR_ERR(tfm));
3274 return tfm;
3275 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003276 return tfm;
3277}
3278
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003279static int ignore_remaining_packet(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003280{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003281 void *buffer = tconn->data.rbuf;
3282 int size = pi->size;
3283
3284 while (size) {
3285 int s = min_t(int, size, DRBD_SOCKET_BUFFER_SIZE);
3286 s = drbd_recv(tconn, buffer, s);
3287 if (s <= 0) {
3288 if (s < 0)
3289 return s;
3290 break;
3291 }
3292 size -= s;
3293 }
3294 if (size)
3295 return -EIO;
3296 return 0;
3297}
3298
3299/*
3300 * config_unknown_volume - device configuration command for unknown volume
3301 *
3302 * When a device is added to an existing connection, the node on which the
3303 * device is added first will send configuration commands to its peer but the
3304 * peer will not know about the device yet. It will warn and ignore these
3305 * commands. Once the device is added on the second node, the second node will
3306 * send the same device configuration commands, but in the other direction.
3307 *
3308 * (We can also end up here if drbd is misconfigured.)
3309 */
3310static int config_unknown_volume(struct drbd_tconn *tconn, struct packet_info *pi)
3311{
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02003312 conn_warn(tconn, "%s packet received for volume %u, which is not configured locally\n",
3313 cmdname(pi->cmd), pi->vnr);
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003314 return ignore_remaining_packet(tconn, pi);
3315}
3316
3317static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi)
3318{
3319 struct drbd_conf *mdev;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003320 struct p_rs_param_95 *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003321 unsigned int header_size, data_size, exp_max_sz;
3322 struct crypto_hash *verify_tfm = NULL;
3323 struct crypto_hash *csums_tfm = NULL;
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003324 struct net_conf *old_net_conf, *new_net_conf = NULL;
Philipp Reisner813472c2011-05-03 16:47:02 +02003325 struct disk_conf *old_disk_conf = NULL, *new_disk_conf = NULL;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003326 const int apv = tconn->agreed_pro_version;
Philipp Reisner813472c2011-05-03 16:47:02 +02003327 struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
Philipp Reisner778f2712010-07-06 11:14:00 +02003328 int fifo_size = 0;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003329 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003330
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003331 mdev = vnr_to_mdev(tconn, pi->vnr);
3332 if (!mdev)
3333 return config_unknown_volume(tconn, pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003334
3335 exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param)
3336 : apv == 88 ? sizeof(struct p_rs_param)
3337 + SHARED_SECRET_MAX
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003338 : apv <= 94 ? sizeof(struct p_rs_param_89)
3339 : /* apv >= 95 */ sizeof(struct p_rs_param_95);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003340
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003341 if (pi->size > exp_max_sz) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003342 dev_err(DEV, "SyncParam packet too long: received %u, expected <= %u bytes\n",
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003343 pi->size, exp_max_sz);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003344 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003345 }
3346
3347 if (apv <= 88) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003348 header_size = sizeof(struct p_rs_param);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003349 data_size = pi->size - header_size;
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003350 } else if (apv <= 94) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003351 header_size = sizeof(struct p_rs_param_89);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003352 data_size = pi->size - header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003353 D_ASSERT(data_size == 0);
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003354 } else {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003355 header_size = sizeof(struct p_rs_param_95);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003356 data_size = pi->size - header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003357 D_ASSERT(data_size == 0);
3358 }
3359
3360 /* initialize verify_alg and csums_alg */
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003361 p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003362 memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
3363
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003364 err = drbd_recv_all(mdev->tconn, p, header_size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003365 if (err)
3366 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003367
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003368 mutex_lock(&mdev->tconn->conf_update);
3369 old_net_conf = mdev->tconn->net_conf;
Philipp Reisner813472c2011-05-03 16:47:02 +02003370 if (get_ldev(mdev)) {
3371 new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3372 if (!new_disk_conf) {
3373 put_ldev(mdev);
3374 mutex_unlock(&mdev->tconn->conf_update);
3375 dev_err(DEV, "Allocation of new disk_conf failed\n");
3376 return -ENOMEM;
3377 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003378
Philipp Reisner813472c2011-05-03 16:47:02 +02003379 old_disk_conf = mdev->ldev->disk_conf;
3380 *new_disk_conf = *old_disk_conf;
3381
Andreas Gruenbacher6394b932011-05-11 14:29:52 +02003382 new_disk_conf->resync_rate = be32_to_cpu(p->resync_rate);
Philipp Reisner813472c2011-05-03 16:47:02 +02003383 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003384
3385 if (apv >= 88) {
3386 if (apv == 88) {
Philipp Reisner5de73822012-03-28 10:17:32 +02003387 if (data_size > SHARED_SECRET_MAX || data_size == 0) {
3388 dev_err(DEV, "verify-alg of wrong size, "
3389 "peer wants %u, accepting only up to %u byte\n",
3390 data_size, SHARED_SECRET_MAX);
Philipp Reisner813472c2011-05-03 16:47:02 +02003391 err = -EIO;
3392 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003393 }
3394
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003395 err = drbd_recv_all(mdev->tconn, p->verify_alg, data_size);
Philipp Reisner813472c2011-05-03 16:47:02 +02003396 if (err)
3397 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003398 /* we expect NUL terminated string */
3399 /* but just in case someone tries to be evil */
3400 D_ASSERT(p->verify_alg[data_size-1] == 0);
3401 p->verify_alg[data_size-1] = 0;
3402
3403 } else /* apv >= 89 */ {
3404 /* we still expect NUL terminated strings */
3405 /* but just in case someone tries to be evil */
3406 D_ASSERT(p->verify_alg[SHARED_SECRET_MAX-1] == 0);
3407 D_ASSERT(p->csums_alg[SHARED_SECRET_MAX-1] == 0);
3408 p->verify_alg[SHARED_SECRET_MAX-1] = 0;
3409 p->csums_alg[SHARED_SECRET_MAX-1] = 0;
3410 }
3411
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003412 if (strcmp(old_net_conf->verify_alg, p->verify_alg)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003413 if (mdev->state.conn == C_WF_REPORT_PARAMS) {
3414 dev_err(DEV, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003415 old_net_conf->verify_alg, p->verify_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003416 goto disconnect;
3417 }
3418 verify_tfm = drbd_crypto_alloc_digest_safe(mdev,
3419 p->verify_alg, "verify-alg");
3420 if (IS_ERR(verify_tfm)) {
3421 verify_tfm = NULL;
3422 goto disconnect;
3423 }
3424 }
3425
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003426 if (apv >= 89 && strcmp(old_net_conf->csums_alg, p->csums_alg)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003427 if (mdev->state.conn == C_WF_REPORT_PARAMS) {
3428 dev_err(DEV, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003429 old_net_conf->csums_alg, p->csums_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003430 goto disconnect;
3431 }
3432 csums_tfm = drbd_crypto_alloc_digest_safe(mdev,
3433 p->csums_alg, "csums-alg");
3434 if (IS_ERR(csums_tfm)) {
3435 csums_tfm = NULL;
3436 goto disconnect;
3437 }
3438 }
3439
Philipp Reisner813472c2011-05-03 16:47:02 +02003440 if (apv > 94 && new_disk_conf) {
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003441 new_disk_conf->c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
3442 new_disk_conf->c_delay_target = be32_to_cpu(p->c_delay_target);
3443 new_disk_conf->c_fill_target = be32_to_cpu(p->c_fill_target);
3444 new_disk_conf->c_max_rate = be32_to_cpu(p->c_max_rate);
Philipp Reisner778f2712010-07-06 11:14:00 +02003445
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003446 fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
Philipp Reisner9958c852011-05-03 16:19:31 +02003447 if (fifo_size != mdev->rs_plan_s->size) {
Philipp Reisner813472c2011-05-03 16:47:02 +02003448 new_plan = fifo_alloc(fifo_size);
3449 if (!new_plan) {
Philipp Reisner778f2712010-07-06 11:14:00 +02003450 dev_err(DEV, "kmalloc of fifo_buffer failed");
Lars Ellenbergf3990022011-03-23 14:31:09 +01003451 put_ldev(mdev);
Philipp Reisner778f2712010-07-06 11:14:00 +02003452 goto disconnect;
3453 }
3454 }
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003455 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003456
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003457 if (verify_tfm || csums_tfm) {
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003458 new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
3459 if (!new_net_conf) {
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003460 dev_err(DEV, "Allocation of new net_conf failed\n");
3461 goto disconnect;
3462 }
3463
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003464 *new_net_conf = *old_net_conf;
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003465
3466 if (verify_tfm) {
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003467 strcpy(new_net_conf->verify_alg, p->verify_alg);
3468 new_net_conf->verify_alg_len = strlen(p->verify_alg) + 1;
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003469 crypto_free_hash(mdev->tconn->verify_tfm);
3470 mdev->tconn->verify_tfm = verify_tfm;
3471 dev_info(DEV, "using verify-alg: \"%s\"\n", p->verify_alg);
3472 }
3473 if (csums_tfm) {
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003474 strcpy(new_net_conf->csums_alg, p->csums_alg);
3475 new_net_conf->csums_alg_len = strlen(p->csums_alg) + 1;
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003476 crypto_free_hash(mdev->tconn->csums_tfm);
3477 mdev->tconn->csums_tfm = csums_tfm;
3478 dev_info(DEV, "using csums-alg: \"%s\"\n", p->csums_alg);
3479 }
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003480 rcu_assign_pointer(tconn->net_conf, new_net_conf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003481 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003482 }
3483
Philipp Reisner813472c2011-05-03 16:47:02 +02003484 if (new_disk_conf) {
3485 rcu_assign_pointer(mdev->ldev->disk_conf, new_disk_conf);
3486 put_ldev(mdev);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003487 }
Philipp Reisner813472c2011-05-03 16:47:02 +02003488
3489 if (new_plan) {
3490 old_plan = mdev->rs_plan_s;
3491 rcu_assign_pointer(mdev->rs_plan_s, new_plan);
3492 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003493
3494 mutex_unlock(&mdev->tconn->conf_update);
3495 synchronize_rcu();
3496 if (new_net_conf)
3497 kfree(old_net_conf);
3498 kfree(old_disk_conf);
Philipp Reisner813472c2011-05-03 16:47:02 +02003499 kfree(old_plan);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003500
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003501 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003502
Philipp Reisner813472c2011-05-03 16:47:02 +02003503reconnect:
3504 if (new_disk_conf) {
3505 put_ldev(mdev);
3506 kfree(new_disk_conf);
3507 }
3508 mutex_unlock(&mdev->tconn->conf_update);
3509 return -EIO;
3510
Philipp Reisnerb411b362009-09-25 16:07:19 -07003511disconnect:
Philipp Reisner813472c2011-05-03 16:47:02 +02003512 kfree(new_plan);
3513 if (new_disk_conf) {
3514 put_ldev(mdev);
3515 kfree(new_disk_conf);
3516 }
Philipp Reisnera0095502011-05-03 13:14:15 +02003517 mutex_unlock(&mdev->tconn->conf_update);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003518 /* just for completeness: actually not needed,
3519 * as this is not reached if csums_tfm was ok. */
3520 crypto_free_hash(csums_tfm);
3521 /* but free the verify_tfm again, if csums_tfm did not work out */
3522 crypto_free_hash(verify_tfm);
Philipp Reisner38fa9982011-03-15 18:24:49 +01003523 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003524 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003525}
3526
Philipp Reisnerb411b362009-09-25 16:07:19 -07003527/* warn if the arguments differ by more than 12.5% */
3528static void warn_if_differ_considerably(struct drbd_conf *mdev,
3529 const char *s, sector_t a, sector_t b)
3530{
3531 sector_t d;
3532 if (a == 0 || b == 0)
3533 return;
3534 d = (a > b) ? (a - b) : (b - a);
3535 if (d > (a>>3) || d > (b>>3))
3536 dev_warn(DEV, "Considerable difference in %s: %llus vs. %llus\n", s,
3537 (unsigned long long)a, (unsigned long long)b);
3538}
3539
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003540static int receive_sizes(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003541{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003542 struct drbd_conf *mdev;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003543 struct p_sizes *p = pi->data;
Philipp Reisnere96c9632013-06-25 16:50:07 +02003544 enum determine_dev_size dd = DS_UNCHANGED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003545 sector_t p_size, p_usize, my_usize;
3546 int ldsc = 0; /* local disk size changed */
Philipp Reisnere89b5912010-03-24 17:11:33 +01003547 enum dds_flags ddsf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003548
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003549 mdev = vnr_to_mdev(tconn, pi->vnr);
3550 if (!mdev)
3551 return config_unknown_volume(tconn, pi);
3552
Philipp Reisnerb411b362009-09-25 16:07:19 -07003553 p_size = be64_to_cpu(p->d_size);
3554 p_usize = be64_to_cpu(p->u_size);
3555
Philipp Reisnerb411b362009-09-25 16:07:19 -07003556 /* just store the peer's disk size for now.
3557 * we still need to figure out whether we accept that. */
3558 mdev->p_size = p_size;
3559
Philipp Reisnerb411b362009-09-25 16:07:19 -07003560 if (get_ldev(mdev)) {
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003561 rcu_read_lock();
3562 my_usize = rcu_dereference(mdev->ldev->disk_conf)->disk_size;
3563 rcu_read_unlock();
3564
Philipp Reisnerb411b362009-09-25 16:07:19 -07003565 warn_if_differ_considerably(mdev, "lower level device sizes",
3566 p_size, drbd_get_max_capacity(mdev->ldev));
3567 warn_if_differ_considerably(mdev, "user requested size",
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003568 p_usize, my_usize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003569
3570 /* if this is the first connect, or an otherwise expected
3571 * param exchange, choose the minimum */
3572 if (mdev->state.conn == C_WF_REPORT_PARAMS)
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003573 p_usize = min_not_zero(my_usize, p_usize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003574
3575 /* Never shrink a device with usable data during connect.
3576 But allow online shrinking if we are connected. */
Philipp Reisneref5e44a2011-05-03 13:27:43 +02003577 if (drbd_new_dev_size(mdev, mdev->ldev, p_usize, 0) <
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003578 drbd_get_capacity(mdev->this_bdev) &&
3579 mdev->state.disk >= D_OUTDATED &&
3580 mdev->state.conn < C_CONNECTED) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003581 dev_err(DEV, "The peer's disk size is too small!\n");
Philipp Reisner38fa9982011-03-15 18:24:49 +01003582 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003583 put_ldev(mdev);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003584 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003585 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003586
3587 if (my_usize != p_usize) {
3588 struct disk_conf *old_disk_conf, *new_disk_conf = NULL;
3589
3590 new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3591 if (!new_disk_conf) {
3592 dev_err(DEV, "Allocation of new disk_conf failed\n");
3593 put_ldev(mdev);
3594 return -ENOMEM;
3595 }
3596
3597 mutex_lock(&mdev->tconn->conf_update);
3598 old_disk_conf = mdev->ldev->disk_conf;
3599 *new_disk_conf = *old_disk_conf;
3600 new_disk_conf->disk_size = p_usize;
3601
3602 rcu_assign_pointer(mdev->ldev->disk_conf, new_disk_conf);
3603 mutex_unlock(&mdev->tconn->conf_update);
3604 synchronize_rcu();
3605 kfree(old_disk_conf);
3606
3607 dev_info(DEV, "Peer sets u_size to %lu sectors\n",
3608 (unsigned long)my_usize);
3609 }
3610
Philipp Reisnerb411b362009-09-25 16:07:19 -07003611 put_ldev(mdev);
3612 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003613
Philipp Reisnere89b5912010-03-24 17:11:33 +01003614 ddsf = be16_to_cpu(p->dds_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003615 if (get_ldev(mdev)) {
Philipp Reisnerd752b262013-06-25 16:50:08 +02003616 dd = drbd_determine_dev_size(mdev, ddsf, NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003617 put_ldev(mdev);
Philipp Reisnere96c9632013-06-25 16:50:07 +02003618 if (dd == DS_ERROR)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003619 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003620 drbd_md_sync(mdev);
3621 } else {
3622 /* I am diskless, need to accept the peer's size. */
3623 drbd_set_my_capacity(mdev, p_size);
3624 }
3625
Philipp Reisner99432fc2011-05-20 16:39:13 +02003626 mdev->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
3627 drbd_reconsider_max_bio_size(mdev);
3628
Philipp Reisnerb411b362009-09-25 16:07:19 -07003629 if (get_ldev(mdev)) {
3630 if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) {
3631 mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev);
3632 ldsc = 1;
3633 }
3634
Philipp Reisnerb411b362009-09-25 16:07:19 -07003635 put_ldev(mdev);
3636 }
3637
3638 if (mdev->state.conn > C_WF_REPORT_PARAMS) {
3639 if (be64_to_cpu(p->c_size) !=
3640 drbd_get_capacity(mdev->this_bdev) || ldsc) {
3641 /* we have different sizes, probably peer
3642 * needs to know my new size... */
Philipp Reisnere89b5912010-03-24 17:11:33 +01003643 drbd_send_sizes(mdev, 0, ddsf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003644 }
3645 if (test_and_clear_bit(RESIZE_PENDING, &mdev->flags) ||
Philipp Reisnere96c9632013-06-25 16:50:07 +02003646 (dd == DS_GREW && mdev->state.conn == C_CONNECTED)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003647 if (mdev->state.pdsk >= D_INCONSISTENT &&
Philipp Reisnere89b5912010-03-24 17:11:33 +01003648 mdev->state.disk >= D_INCONSISTENT) {
3649 if (ddsf & DDSF_NO_RESYNC)
3650 dev_info(DEV, "Resync of new storage suppressed with --assume-clean\n");
3651 else
3652 resync_after_online_grow(mdev);
3653 } else
Philipp Reisnerb411b362009-09-25 16:07:19 -07003654 set_bit(RESYNC_AFTER_NEG, &mdev->flags);
3655 }
3656 }
3657
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003658 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003659}
3660
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003661static int receive_uuids(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003662{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003663 struct drbd_conf *mdev;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003664 struct p_uuids *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003665 u64 *p_uuid;
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003666 int i, updated_uuids = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003667
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003668 mdev = vnr_to_mdev(tconn, pi->vnr);
3669 if (!mdev)
3670 return config_unknown_volume(tconn, pi);
3671
Philipp Reisnerb411b362009-09-25 16:07:19 -07003672 p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO);
Jing Wang063eacf2012-10-25 15:00:56 +08003673 if (!p_uuid) {
3674 dev_err(DEV, "kmalloc of p_uuid failed\n");
3675 return false;
3676 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003677
3678 for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
3679 p_uuid[i] = be64_to_cpu(p->uuid[i]);
3680
3681 kfree(mdev->p_uuid);
3682 mdev->p_uuid = p_uuid;
3683
3684 if (mdev->state.conn < C_CONNECTED &&
3685 mdev->state.disk < D_INCONSISTENT &&
3686 mdev->state.role == R_PRIMARY &&
3687 (mdev->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
3688 dev_err(DEV, "Can only connect to data with current UUID=%016llX\n",
3689 (unsigned long long)mdev->ed_uuid);
Philipp Reisner38fa9982011-03-15 18:24:49 +01003690 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003691 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003692 }
3693
3694 if (get_ldev(mdev)) {
3695 int skip_initial_sync =
3696 mdev->state.conn == C_CONNECTED &&
Philipp Reisner31890f42011-01-19 14:12:51 +01003697 mdev->tconn->agreed_pro_version >= 90 &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003698 mdev->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
3699 (p_uuid[UI_FLAGS] & 8);
3700 if (skip_initial_sync) {
3701 dev_info(DEV, "Accepted new current UUID, preparing to skip initial sync\n");
3702 drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write,
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003703 "clear_n_write from receive_uuids",
3704 BM_LOCKED_TEST_ALLOWED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003705 _drbd_uuid_set(mdev, UI_CURRENT, p_uuid[UI_CURRENT]);
3706 _drbd_uuid_set(mdev, UI_BITMAP, 0);
3707 _drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
3708 CS_VERBOSE, NULL);
3709 drbd_md_sync(mdev);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003710 updated_uuids = 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003711 }
3712 put_ldev(mdev);
Philipp Reisner18a50fa2010-06-21 14:14:15 +02003713 } else if (mdev->state.disk < D_INCONSISTENT &&
3714 mdev->state.role == R_PRIMARY) {
3715 /* I am a diskless primary, the peer just created a new current UUID
3716 for me. */
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003717 updated_uuids = drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003718 }
3719
3720 /* Before we test for the disk state, we should wait until an eventually
3721 ongoing cluster wide state change is finished. That is important if
3722 we are primary and are detaching from our disk. We need to see the
3723 new disk state... */
Philipp Reisner8410da82011-02-11 20:11:10 +01003724 mutex_lock(mdev->state_mutex);
3725 mutex_unlock(mdev->state_mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003726 if (mdev->state.conn >= C_CONNECTED && mdev->state.disk < D_INCONSISTENT)
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003727 updated_uuids |= drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
3728
3729 if (updated_uuids)
3730 drbd_print_uuids(mdev, "receiver updated UUIDs to");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003731
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003732 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003733}
3734
3735/**
3736 * convert_state() - Converts the peer's view of the cluster state to our point of view
3737 * @ps: The state as seen by the peer.
3738 */
3739static union drbd_state convert_state(union drbd_state ps)
3740{
3741 union drbd_state ms;
3742
3743 static enum drbd_conns c_tab[] = {
Philipp Reisner369bea62011-07-06 23:04:44 +02003744 [C_WF_REPORT_PARAMS] = C_WF_REPORT_PARAMS,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003745 [C_CONNECTED] = C_CONNECTED,
3746
3747 [C_STARTING_SYNC_S] = C_STARTING_SYNC_T,
3748 [C_STARTING_SYNC_T] = C_STARTING_SYNC_S,
3749 [C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */
3750 [C_VERIFY_S] = C_VERIFY_T,
3751 [C_MASK] = C_MASK,
3752 };
3753
3754 ms.i = ps.i;
3755
3756 ms.conn = c_tab[ps.conn];
3757 ms.peer = ps.role;
3758 ms.role = ps.peer;
3759 ms.pdsk = ps.disk;
3760 ms.disk = ps.pdsk;
3761 ms.peer_isp = (ps.aftr_isp | ps.user_isp);
3762
3763 return ms;
3764}
3765
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003766static int receive_req_state(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003767{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003768 struct drbd_conf *mdev;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003769 struct p_req_state *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003770 union drbd_state mask, val;
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +01003771 enum drbd_state_rv rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003772
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003773 mdev = vnr_to_mdev(tconn, pi->vnr);
3774 if (!mdev)
3775 return -EIO;
3776
Philipp Reisnerb411b362009-09-25 16:07:19 -07003777 mask.i = be32_to_cpu(p->mask);
3778 val.i = be32_to_cpu(p->val);
3779
Lars Ellenberg427c0432012-08-01 12:43:01 +02003780 if (test_bit(RESOLVE_CONFLICTS, &mdev->tconn->flags) &&
Philipp Reisner8410da82011-02-11 20:11:10 +01003781 mutex_is_locked(mdev->state_mutex)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003782 drbd_send_sr_reply(mdev, SS_CONCURRENT_ST_CHG);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003783 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003784 }
3785
3786 mask = convert_state(mask);
3787 val = convert_state(val);
3788
3789 rv = drbd_change_state(mdev, CS_VERBOSE, mask, val);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003790 drbd_send_sr_reply(mdev, rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003791
Philipp Reisnerb411b362009-09-25 16:07:19 -07003792 drbd_md_sync(mdev);
3793
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003794 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003795}
3796
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003797static int receive_req_conn_state(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003798{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003799 struct p_req_state *p = pi->data;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003800 union drbd_state mask, val;
3801 enum drbd_state_rv rv;
3802
3803 mask.i = be32_to_cpu(p->mask);
3804 val.i = be32_to_cpu(p->val);
3805
Lars Ellenberg427c0432012-08-01 12:43:01 +02003806 if (test_bit(RESOLVE_CONFLICTS, &tconn->flags) &&
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003807 mutex_is_locked(&tconn->cstate_mutex)) {
3808 conn_send_sr_reply(tconn, SS_CONCURRENT_ST_CHG);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003809 return 0;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003810 }
3811
3812 mask = convert_state(mask);
3813 val = convert_state(val);
3814
Philipp Reisner778bcf22011-03-28 12:55:03 +02003815 rv = conn_request_state(tconn, mask, val, CS_VERBOSE | CS_LOCAL_ONLY | CS_IGN_OUTD_FAIL);
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003816 conn_send_sr_reply(tconn, rv);
3817
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003818 return 0;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003819}
3820
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003821static int receive_state(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003822{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003823 struct drbd_conf *mdev;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003824 struct p_state *p = pi->data;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003825 union drbd_state os, ns, peer_state;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003826 enum drbd_disk_state real_peer_disk;
Philipp Reisner65d922c2010-06-16 16:18:09 +02003827 enum chg_state_flags cs_flags;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003828 int rv;
3829
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003830 mdev = vnr_to_mdev(tconn, pi->vnr);
3831 if (!mdev)
3832 return config_unknown_volume(tconn, pi);
3833
Philipp Reisnerb411b362009-09-25 16:07:19 -07003834 peer_state.i = be32_to_cpu(p->state);
3835
3836 real_peer_disk = peer_state.disk;
3837 if (peer_state.disk == D_NEGOTIATING) {
3838 real_peer_disk = mdev->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT;
3839 dev_info(DEV, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
3840 }
3841
Philipp Reisner87eeee42011-01-19 14:16:30 +01003842 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003843 retry:
Philipp Reisner78bae592011-03-28 15:40:12 +02003844 os = ns = drbd_read_state(mdev);
Philipp Reisner87eeee42011-01-19 14:16:30 +01003845 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003846
Lars Ellenberg545752d2011-12-05 14:39:25 +01003847 /* If some other part of the code (asender thread, timeout)
3848 * already decided to close the connection again,
3849 * we must not "re-establish" it here. */
3850 if (os.conn <= C_TEAR_DOWN)
Lars Ellenberg58ffa582012-07-26 14:09:49 +02003851 return -ECONNRESET;
Lars Ellenberg545752d2011-12-05 14:39:25 +01003852
Lars Ellenberg40424e42011-09-26 15:24:56 +02003853 /* If this is the "end of sync" confirmation, usually the peer disk
3854 * transitions from D_INCONSISTENT to D_UP_TO_DATE. For empty (0 bits
3855 * set) resync started in PausedSyncT, or if the timing of pause-/
3856 * unpause-sync events has been "just right", the peer disk may
3857 * transition from D_CONSISTENT to D_UP_TO_DATE as well.
3858 */
3859 if ((os.pdsk == D_INCONSISTENT || os.pdsk == D_CONSISTENT) &&
3860 real_peer_disk == D_UP_TO_DATE &&
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003861 os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
3862 /* If we are (becoming) SyncSource, but peer is still in sync
3863 * preparation, ignore its uptodate-ness to avoid flapping, it
3864 * will change to inconsistent once the peer reaches active
3865 * syncing states.
3866 * It may have changed syncer-paused flags, however, so we
3867 * cannot ignore this completely. */
3868 if (peer_state.conn > C_CONNECTED &&
3869 peer_state.conn < C_SYNC_SOURCE)
3870 real_peer_disk = D_INCONSISTENT;
3871
3872 /* if peer_state changes to connected at the same time,
3873 * it explicitly notifies us that it finished resync.
3874 * Maybe we should finish it up, too? */
3875 else if (os.conn >= C_SYNC_SOURCE &&
3876 peer_state.conn == C_CONNECTED) {
3877 if (drbd_bm_total_weight(mdev) <= mdev->rs_failed)
3878 drbd_resync_finished(mdev);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003879 return 0;
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003880 }
3881 }
3882
Lars Ellenberg02b91b52012-06-28 18:26:52 +02003883 /* explicit verify finished notification, stop sector reached. */
3884 if (os.conn == C_VERIFY_T && os.disk == D_UP_TO_DATE &&
3885 peer_state.conn == C_CONNECTED && real_peer_disk == D_UP_TO_DATE) {
Lars Ellenberg58ffa582012-07-26 14:09:49 +02003886 ov_out_of_sync_print(mdev);
Lars Ellenberg02b91b52012-06-28 18:26:52 +02003887 drbd_resync_finished(mdev);
Lars Ellenberg58ffa582012-07-26 14:09:49 +02003888 return 0;
Lars Ellenberg02b91b52012-06-28 18:26:52 +02003889 }
3890
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003891 /* peer says his disk is inconsistent, while we think it is uptodate,
3892 * and this happens while the peer still thinks we have a sync going on,
3893 * but we think we are already done with the sync.
3894 * We ignore this to avoid flapping pdsk.
3895 * This should not happen, if the peer is a recent version of drbd. */
3896 if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT &&
3897 os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE)
3898 real_peer_disk = D_UP_TO_DATE;
3899
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003900 if (ns.conn == C_WF_REPORT_PARAMS)
3901 ns.conn = C_CONNECTED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003902
Philipp Reisner67531712010-10-27 12:21:30 +02003903 if (peer_state.conn == C_AHEAD)
3904 ns.conn = C_BEHIND;
3905
Philipp Reisnerb411b362009-09-25 16:07:19 -07003906 if (mdev->p_uuid && peer_state.disk >= D_NEGOTIATING &&
3907 get_ldev_if_state(mdev, D_NEGOTIATING)) {
3908 int cr; /* consider resync */
3909
3910 /* if we established a new connection */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003911 cr = (os.conn < C_CONNECTED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003912 /* if we had an established connection
3913 * and one of the nodes newly attaches a disk */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003914 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003915 (peer_state.disk == D_NEGOTIATING ||
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003916 os.disk == D_NEGOTIATING));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003917 /* if we have both been inconsistent, and the peer has been
3918 * forced to be UpToDate with --overwrite-data */
3919 cr |= test_bit(CONSIDER_RESYNC, &mdev->flags);
3920 /* if we had been plain connected, and the admin requested to
3921 * start a sync by "invalidate" or "invalidate-remote" */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003922 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003923 (peer_state.conn >= C_STARTING_SYNC_S &&
3924 peer_state.conn <= C_WF_BITMAP_T));
3925
3926 if (cr)
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003927 ns.conn = drbd_sync_handshake(mdev, peer_state.role, real_peer_disk);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003928
3929 put_ldev(mdev);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003930 if (ns.conn == C_MASK) {
3931 ns.conn = C_CONNECTED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003932 if (mdev->state.disk == D_NEGOTIATING) {
Lars Ellenberg82f59cc2010-10-16 12:13:47 +02003933 drbd_force_state(mdev, NS(disk, D_FAILED));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003934 } else if (peer_state.disk == D_NEGOTIATING) {
3935 dev_err(DEV, "Disk attach process on the peer node was aborted.\n");
3936 peer_state.disk = D_DISKLESS;
Lars Ellenberg580b9762010-02-26 23:15:23 +01003937 real_peer_disk = D_DISKLESS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003938 } else {
Philipp Reisner8169e412011-03-15 18:40:27 +01003939 if (test_and_clear_bit(CONN_DRY_RUN, &mdev->tconn->flags))
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003940 return -EIO;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003941 D_ASSERT(os.conn == C_WF_REPORT_PARAMS);
Philipp Reisner38fa9982011-03-15 18:24:49 +01003942 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003943 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003944 }
3945 }
3946 }
3947
Philipp Reisner87eeee42011-01-19 14:16:30 +01003948 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisner78bae592011-03-28 15:40:12 +02003949 if (os.i != drbd_read_state(mdev).i)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003950 goto retry;
3951 clear_bit(CONSIDER_RESYNC, &mdev->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003952 ns.peer = peer_state.role;
3953 ns.pdsk = real_peer_disk;
3954 ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003955 if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003956 ns.disk = mdev->new_state_tmp.disk;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003957 cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
Philipp Reisner2aebfab2011-03-28 16:48:11 +02003958 if (ns.pdsk == D_CONSISTENT && drbd_suspended(mdev) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
Philipp Reisner481c6f52010-06-22 14:03:27 +02003959 test_bit(NEW_CUR_UUID, &mdev->flags)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01003960 /* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this
Philipp Reisner481c6f52010-06-22 14:03:27 +02003961 for temporal network outages! */
Philipp Reisner87eeee42011-01-19 14:16:30 +01003962 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisner481c6f52010-06-22 14:03:27 +02003963 dev_err(DEV, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
Philipp Reisner2f5cdd02011-02-21 14:29:27 +01003964 tl_clear(mdev->tconn);
Philipp Reisner481c6f52010-06-22 14:03:27 +02003965 drbd_uuid_new_current(mdev);
3966 clear_bit(NEW_CUR_UUID, &mdev->flags);
Philipp Reisner38fa9982011-03-15 18:24:49 +01003967 conn_request_state(mdev->tconn, NS2(conn, C_PROTOCOL_ERROR, susp, 0), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003968 return -EIO;
Philipp Reisner481c6f52010-06-22 14:03:27 +02003969 }
Philipp Reisner65d922c2010-06-16 16:18:09 +02003970 rv = _drbd_set_state(mdev, ns, cs_flags, NULL);
Philipp Reisner78bae592011-03-28 15:40:12 +02003971 ns = drbd_read_state(mdev);
Philipp Reisner87eeee42011-01-19 14:16:30 +01003972 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003973
3974 if (rv < SS_SUCCESS) {
Philipp Reisner38fa9982011-03-15 18:24:49 +01003975 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003976 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003977 }
3978
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003979 if (os.conn > C_WF_REPORT_PARAMS) {
3980 if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003981 peer_state.disk != D_NEGOTIATING ) {
3982 /* we want resync, peer has not yet decided to sync... */
3983 /* Nowadays only used when forcing a node into primary role and
3984 setting its disk to UpToDate with that */
3985 drbd_send_uuids(mdev);
Lars Ellenbergf479ea02011-10-27 16:52:30 +02003986 drbd_send_current_state(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003987 }
3988 }
3989
Philipp Reisner08b165b2011-09-05 16:22:33 +02003990 clear_bit(DISCARD_MY_DATA, &mdev->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003991
Lars Ellenbergcccac982013-03-19 18:16:46 +01003992 drbd_md_sync(mdev); /* update connected indicator, la_size_sect, ... */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003993
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003994 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003995}
3996
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003997static int receive_sync_uuid(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003998{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003999 struct drbd_conf *mdev;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004000 struct p_rs_uuid *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004001
4002 mdev = vnr_to_mdev(tconn, pi->vnr);
4003 if (!mdev)
4004 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004005
4006 wait_event(mdev->misc_wait,
4007 mdev->state.conn == C_WF_SYNC_UUID ||
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02004008 mdev->state.conn == C_BEHIND ||
Philipp Reisnerb411b362009-09-25 16:07:19 -07004009 mdev->state.conn < C_CONNECTED ||
4010 mdev->state.disk < D_NEGOTIATING);
4011
4012 /* D_ASSERT( mdev->state.conn == C_WF_SYNC_UUID ); */
4013
Philipp Reisnerb411b362009-09-25 16:07:19 -07004014 /* Here the _drbd_uuid_ functions are right, current should
4015 _not_ be rotated into the history */
4016 if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
4017 _drbd_uuid_set(mdev, UI_CURRENT, be64_to_cpu(p->uuid));
4018 _drbd_uuid_set(mdev, UI_BITMAP, 0UL);
4019
Lars Ellenberg62b0da32011-01-20 13:25:21 +01004020 drbd_print_uuids(mdev, "updated sync uuid");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004021 drbd_start_resync(mdev, C_SYNC_TARGET);
4022
4023 put_ldev(mdev);
4024 } else
4025 dev_err(DEV, "Ignoring SyncUUID packet!\n");
4026
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004027 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004028}
4029
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004030/**
4031 * receive_bitmap_plain
4032 *
4033 * Return 0 when done, 1 when another iteration is needed, and a negative error
4034 * code upon failure.
4035 */
4036static int
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004037receive_bitmap_plain(struct drbd_conf *mdev, unsigned int size,
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004038 unsigned long *p, struct bm_xfer_ctx *c)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004039{
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004040 unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE -
4041 drbd_header_size(mdev->tconn);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004042 unsigned int num_words = min_t(size_t, data_size / sizeof(*p),
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004043 c->bm_words - c->word_offset);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004044 unsigned int want = num_words * sizeof(*p);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004045 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004046
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004047 if (want != size) {
4048 dev_err(DEV, "%s:want (%u) != size (%u)\n", __func__, want, size);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004049 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004050 }
4051 if (want == 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004052 return 0;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004053 err = drbd_recv_all(mdev->tconn, p, want);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004054 if (err)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004055 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004056
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004057 drbd_bm_merge_lel(mdev, c->word_offset, num_words, p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004058
4059 c->word_offset += num_words;
4060 c->bit_offset = c->word_offset * BITS_PER_LONG;
4061 if (c->bit_offset > c->bm_bits)
4062 c->bit_offset = c->bm_bits;
4063
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004064 return 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004065}
4066
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004067static enum drbd_bitmap_code dcbp_get_code(struct p_compressed_bm *p)
4068{
4069 return (enum drbd_bitmap_code)(p->encoding & 0x0f);
4070}
4071
4072static int dcbp_get_start(struct p_compressed_bm *p)
4073{
4074 return (p->encoding & 0x80) != 0;
4075}
4076
4077static int dcbp_get_pad_bits(struct p_compressed_bm *p)
4078{
4079 return (p->encoding >> 4) & 0x7;
4080}
4081
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004082/**
4083 * recv_bm_rle_bits
4084 *
4085 * Return 0 when done, 1 when another iteration is needed, and a negative error
4086 * code upon failure.
4087 */
4088static int
Philipp Reisnerb411b362009-09-25 16:07:19 -07004089recv_bm_rle_bits(struct drbd_conf *mdev,
4090 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01004091 struct bm_xfer_ctx *c,
4092 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004093{
4094 struct bitstream bs;
4095 u64 look_ahead;
4096 u64 rl;
4097 u64 tmp;
4098 unsigned long s = c->bit_offset;
4099 unsigned long e;
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004100 int toggle = dcbp_get_start(p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004101 int have;
4102 int bits;
4103
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004104 bitstream_init(&bs, p->code, len, dcbp_get_pad_bits(p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004105
4106 bits = bitstream_get_bits(&bs, &look_ahead, 64);
4107 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004108 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004109
4110 for (have = bits; have > 0; s += rl, toggle = !toggle) {
4111 bits = vli_decode_bits(&rl, look_ahead);
4112 if (bits <= 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004113 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004114
4115 if (toggle) {
4116 e = s + rl -1;
4117 if (e >= c->bm_bits) {
4118 dev_err(DEV, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004119 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004120 }
4121 _drbd_bm_set_bits(mdev, s, e);
4122 }
4123
4124 if (have < bits) {
4125 dev_err(DEV, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n",
4126 have, bits, look_ahead,
4127 (unsigned int)(bs.cur.b - p->code),
4128 (unsigned int)bs.buf_len);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004129 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004130 }
Lars Ellenbergd2da5b02013-10-23 10:59:18 +02004131 /* if we consumed all 64 bits, assign 0; >> 64 is "undefined"; */
4132 if (likely(bits < 64))
4133 look_ahead >>= bits;
4134 else
4135 look_ahead = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004136 have -= bits;
4137
4138 bits = bitstream_get_bits(&bs, &tmp, 64 - have);
4139 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004140 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004141 look_ahead |= tmp << have;
4142 have += bits;
4143 }
4144
4145 c->bit_offset = s;
4146 bm_xfer_ctx_bit_to_word_offset(c);
4147
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004148 return (s != c->bm_bits);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004149}
4150
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004151/**
4152 * decode_bitmap_c
4153 *
4154 * Return 0 when done, 1 when another iteration is needed, and a negative error
4155 * code upon failure.
4156 */
4157static int
Philipp Reisnerb411b362009-09-25 16:07:19 -07004158decode_bitmap_c(struct drbd_conf *mdev,
4159 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01004160 struct bm_xfer_ctx *c,
4161 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004162{
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004163 if (dcbp_get_code(p) == RLE_VLI_Bits)
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004164 return recv_bm_rle_bits(mdev, p, c, len - sizeof(*p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004165
4166 /* other variants had been implemented for evaluation,
4167 * but have been dropped as this one turned out to be "best"
4168 * during all our tests. */
4169
4170 dev_err(DEV, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
Philipp Reisner38fa9982011-03-15 18:24:49 +01004171 conn_request_state(mdev->tconn, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004172 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004173}
4174
4175void INFO_bm_xfer_stats(struct drbd_conf *mdev,
4176 const char *direction, struct bm_xfer_ctx *c)
4177{
4178 /* what would it take to transfer it "plaintext" */
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004179 unsigned int header_size = drbd_header_size(mdev->tconn);
4180 unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE - header_size;
4181 unsigned int plain =
4182 header_size * (DIV_ROUND_UP(c->bm_words, data_size) + 1) +
4183 c->bm_words * sizeof(unsigned long);
4184 unsigned int total = c->bytes[0] + c->bytes[1];
4185 unsigned int r;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004186
4187 /* total can not be zero. but just in case: */
4188 if (total == 0)
4189 return;
4190
4191 /* don't report if not compressed */
4192 if (total >= plain)
4193 return;
4194
4195 /* total < plain. check for overflow, still */
4196 r = (total > UINT_MAX/1000) ? (total / (plain/1000))
4197 : (1000 * total / plain);
4198
4199 if (r > 1000)
4200 r = 1000;
4201
4202 r = 1000 - r;
4203 dev_info(DEV, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
4204 "total %u; compression: %u.%u%%\n",
4205 direction,
4206 c->bytes[1], c->packets[1],
4207 c->bytes[0], c->packets[0],
4208 total, r/10, r % 10);
4209}
4210
4211/* Since we are processing the bitfield from lower addresses to higher,
4212 it does not matter if the process it in 32 bit chunks or 64 bit
4213 chunks as long as it is little endian. (Understand it as byte stream,
4214 beginning with the lowest byte...) If we would use big endian
4215 we would need to process it from the highest address to the lowest,
4216 in order to be agnostic to the 32 vs 64 bits issue.
4217
4218 returns 0 on failure, 1 if we successfully received it. */
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004219static int receive_bitmap(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004220{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004221 struct drbd_conf *mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004222 struct bm_xfer_ctx c;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004223 int err;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004224
4225 mdev = vnr_to_mdev(tconn, pi->vnr);
4226 if (!mdev)
4227 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004228
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004229 drbd_bm_lock(mdev, "receive bitmap", BM_LOCKED_SET_ALLOWED);
4230 /* you are supposed to send additional out-of-sync information
4231 * if you actually set bits during this phase */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004232
Philipp Reisnerb411b362009-09-25 16:07:19 -07004233 c = (struct bm_xfer_ctx) {
4234 .bm_bits = drbd_bm_bits(mdev),
4235 .bm_words = drbd_bm_words(mdev),
4236 };
4237
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004238 for(;;) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004239 if (pi->cmd == P_BITMAP)
4240 err = receive_bitmap_plain(mdev, pi->size, pi->data, &c);
4241 else if (pi->cmd == P_COMPRESSED_BITMAP) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004242 /* MAYBE: sanity check that we speak proto >= 90,
4243 * and the feature is enabled! */
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004244 struct p_compressed_bm *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004245
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004246 if (pi->size > DRBD_SOCKET_BUFFER_SIZE - drbd_header_size(tconn)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004247 dev_err(DEV, "ReportCBitmap packet too large\n");
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004248 err = -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004249 goto out;
4250 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004251 if (pi->size <= sizeof(*p)) {
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004252 dev_err(DEV, "ReportCBitmap packet too small (l:%u)\n", pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004253 err = -EIO;
Andreas Gruenbacher78fcbda2010-12-10 22:18:27 +01004254 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004255 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004256 err = drbd_recv_all(mdev->tconn, p, pi->size);
4257 if (err)
4258 goto out;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004259 err = decode_bitmap_c(mdev, p, &c, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004260 } else {
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004261 dev_warn(DEV, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", pi->cmd);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004262 err = -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004263 goto out;
4264 }
4265
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004266 c.packets[pi->cmd == P_BITMAP]++;
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004267 c.bytes[pi->cmd == P_BITMAP] += drbd_header_size(tconn) + pi->size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004268
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004269 if (err <= 0) {
4270 if (err < 0)
4271 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004272 break;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004273 }
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004274 err = drbd_recv_header(mdev->tconn, pi);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004275 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004276 goto out;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004277 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004278
4279 INFO_bm_xfer_stats(mdev, "receive", &c);
4280
4281 if (mdev->state.conn == C_WF_BITMAP_T) {
Andreas Gruenbacherde1f8e42010-12-10 21:04:00 +01004282 enum drbd_state_rv rv;
4283
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004284 err = drbd_send_bitmap(mdev);
4285 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004286 goto out;
4287 /* Omit CS_ORDERED with this state transition to avoid deadlocks. */
Andreas Gruenbacherde1f8e42010-12-10 21:04:00 +01004288 rv = _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
4289 D_ASSERT(rv == SS_SUCCESS);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004290 } else if (mdev->state.conn != C_WF_BITMAP_S) {
4291 /* admin may have requested C_DISCONNECTING,
4292 * other threads may have noticed network errors */
4293 dev_info(DEV, "unexpected cstate (%s) in receive_bitmap\n",
4294 drbd_conn_str(mdev->state.conn));
4295 }
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004296 err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004297
Philipp Reisnerb411b362009-09-25 16:07:19 -07004298 out:
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004299 drbd_bm_unlock(mdev);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004300 if (!err && mdev->state.conn == C_WF_BITMAP_S)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004301 drbd_start_resync(mdev, C_SYNC_SOURCE);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004302 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004303}
4304
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004305static int receive_skip(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004306{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004307 conn_warn(tconn, "skipping unknown optional packet type %d, l: %d!\n",
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004308 pi->cmd, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004309
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004310 return ignore_remaining_packet(tconn, pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004311}
4312
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004313static int receive_UnplugRemote(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004314{
Philipp Reisnerb411b362009-09-25 16:07:19 -07004315 /* Make sure we've acked all the TCP data associated
4316 * with the data requests being unplugged */
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004317 drbd_tcp_quickack(tconn->data.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004318
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004319 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004320}
4321
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004322static int receive_out_of_sync(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisner73a01a12010-10-27 14:33:00 +02004323{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004324 struct drbd_conf *mdev;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004325 struct p_block_desc *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004326
4327 mdev = vnr_to_mdev(tconn, pi->vnr);
4328 if (!mdev)
4329 return -EIO;
Philipp Reisner73a01a12010-10-27 14:33:00 +02004330
Lars Ellenbergf735e3632010-12-17 21:06:18 +01004331 switch (mdev->state.conn) {
4332 case C_WF_SYNC_UUID:
4333 case C_WF_BITMAP_T:
4334 case C_BEHIND:
4335 break;
4336 default:
4337 dev_err(DEV, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n",
4338 drbd_conn_str(mdev->state.conn));
4339 }
4340
Philipp Reisner73a01a12010-10-27 14:33:00 +02004341 drbd_set_out_of_sync(mdev, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
4342
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004343 return 0;
Philipp Reisner73a01a12010-10-27 14:33:00 +02004344}
4345
Philipp Reisner02918be2010-08-20 14:35:10 +02004346struct data_cmd {
4347 int expect_payload;
4348 size_t pkt_size;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004349 int (*fn)(struct drbd_tconn *, struct packet_info *);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004350};
4351
Philipp Reisner02918be2010-08-20 14:35:10 +02004352static struct data_cmd drbd_cmd_handler[] = {
4353 [P_DATA] = { 1, sizeof(struct p_data), receive_Data },
4354 [P_DATA_REPLY] = { 1, sizeof(struct p_data), receive_DataReply },
4355 [P_RS_DATA_REPLY] = { 1, sizeof(struct p_data), receive_RSDataReply } ,
4356 [P_BARRIER] = { 0, sizeof(struct p_barrier), receive_Barrier } ,
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004357 [P_BITMAP] = { 1, 0, receive_bitmap } ,
4358 [P_COMPRESSED_BITMAP] = { 1, 0, receive_bitmap } ,
4359 [P_UNPLUG_REMOTE] = { 0, 0, receive_UnplugRemote },
Philipp Reisner02918be2010-08-20 14:35:10 +02004360 [P_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
4361 [P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004362 [P_SYNC_PARAM] = { 1, 0, receive_SyncParam },
4363 [P_SYNC_PARAM89] = { 1, 0, receive_SyncParam },
Philipp Reisner02918be2010-08-20 14:35:10 +02004364 [P_PROTOCOL] = { 1, sizeof(struct p_protocol), receive_protocol },
4365 [P_UUIDS] = { 0, sizeof(struct p_uuids), receive_uuids },
4366 [P_SIZES] = { 0, sizeof(struct p_sizes), receive_sizes },
4367 [P_STATE] = { 0, sizeof(struct p_state), receive_state },
4368 [P_STATE_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_state },
4369 [P_SYNC_UUID] = { 0, sizeof(struct p_rs_uuid), receive_sync_uuid },
4370 [P_OV_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
4371 [P_OV_REPLY] = { 1, sizeof(struct p_block_req), receive_DataRequest },
4372 [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest },
4373 [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip },
Philipp Reisner73a01a12010-10-27 14:33:00 +02004374 [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004375 [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_conn_state },
Philipp Reisner036b17e2011-05-16 17:38:11 +02004376 [P_PROTOCOL_UPDATE] = { 1, sizeof(struct p_protocol), receive_protocol },
Philipp Reisner02918be2010-08-20 14:35:10 +02004377};
4378
Philipp Reisnereefc2f72011-02-08 12:55:24 +01004379static void drbdd(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004380{
Philipp Reisner77351055b2011-02-07 17:24:26 +01004381 struct packet_info pi;
Philipp Reisner02918be2010-08-20 14:35:10 +02004382 size_t shs; /* sub header size */
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004383 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004384
Philipp Reisnereefc2f72011-02-08 12:55:24 +01004385 while (get_t_state(&tconn->receiver) == RUNNING) {
Andreas Gruenbacherdeebe192011-03-25 00:01:04 +01004386 struct data_cmd *cmd;
4387
Philipp Reisnereefc2f72011-02-08 12:55:24 +01004388 drbd_thread_current_set_cpu(&tconn->receiver);
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004389 if (drbd_recv_header(tconn, &pi))
Philipp Reisner02918be2010-08-20 14:35:10 +02004390 goto err_out;
4391
Andreas Gruenbacherdeebe192011-03-25 00:01:04 +01004392 cmd = &drbd_cmd_handler[pi.cmd];
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004393 if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) || !cmd->fn)) {
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004394 conn_err(tconn, "Unexpected data packet %s (0x%04x)",
4395 cmdname(pi.cmd), pi.cmd);
Philipp Reisner02918be2010-08-20 14:35:10 +02004396 goto err_out;
Lars Ellenberg0b33a912009-11-16 15:58:04 +01004397 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004398
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004399 shs = cmd->pkt_size;
4400 if (pi.size > shs && !cmd->expect_payload) {
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004401 conn_err(tconn, "No payload expected %s l:%d\n",
4402 cmdname(pi.cmd), pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02004403 goto err_out;
4404 }
4405
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02004406 if (shs) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004407 err = drbd_recv_all_warn(tconn, pi.data, shs);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004408 if (err)
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02004409 goto err_out;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004410 pi.size -= shs;
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02004411 }
4412
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004413 err = cmd->fn(tconn, &pi);
4414 if (err) {
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004415 conn_err(tconn, "error receiving %s, e: %d l: %d!\n",
4416 cmdname(pi.cmd), err, pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02004417 goto err_out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004418 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004419 }
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004420 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004421
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004422 err_out:
4423 conn_request_state(tconn, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004424}
4425
Philipp Reisner0e29d162011-02-18 14:23:11 +01004426void conn_flush_workqueue(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004427{
4428 struct drbd_wq_barrier barr;
4429
4430 barr.w.cb = w_prev_work_done;
Philipp Reisner0e29d162011-02-18 14:23:11 +01004431 barr.w.tconn = tconn;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004432 init_completion(&barr.done);
Lars Ellenbergd5b27b02011-11-14 15:42:37 +01004433 drbd_queue_work(&tconn->sender_work, &barr.w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004434 wait_for_completion(&barr.done);
4435}
4436
Philipp Reisner81fa2e62011-05-04 15:10:30 +02004437static void conn_disconnect(struct drbd_tconn *tconn)
Philipp Reisnerf70b35112010-06-24 14:34:40 +02004438{
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02004439 struct drbd_conf *mdev;
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004440 enum drbd_conns oc;
Philipp Reisner376694a2011-11-07 10:54:28 +01004441 int vnr;
Philipp Reisnerf70b35112010-06-24 14:34:40 +02004442
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004443 if (tconn->cstate == C_STANDALONE)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004444 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004445
Lars Ellenberg545752d2011-12-05 14:39:25 +01004446 /* We are about to start the cleanup after connection loss.
4447 * Make sure drbd_make_request knows about that.
4448 * Usually we should be in some network failure state already,
4449 * but just in case we are not, we fix it up here.
4450 */
Philipp Reisnerb8853db2011-12-13 11:09:16 +01004451 conn_request_state(tconn, NS(conn, C_NETWORK_FAILURE), CS_HARD);
Lars Ellenberg545752d2011-12-05 14:39:25 +01004452
Philipp Reisnerb411b362009-09-25 16:07:19 -07004453 /* asender does not clean up anything. it must not interfere, either */
Philipp Reisner360cc742011-02-08 14:29:53 +01004454 drbd_thread_stop(&tconn->asender);
4455 drbd_free_sock(tconn);
4456
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02004457 rcu_read_lock();
4458 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
4459 kref_get(&mdev->kref);
4460 rcu_read_unlock();
4461 drbd_disconnected(mdev);
4462 kref_put(&mdev->kref, &drbd_minor_destroy);
4463 rcu_read_lock();
4464 }
4465 rcu_read_unlock();
4466
Philipp Reisner12038a32011-11-09 19:18:00 +01004467 if (!list_empty(&tconn->current_epoch->list))
4468 conn_err(tconn, "ASSERTION FAILED: tconn->current_epoch->list not empty\n");
4469 /* ok, no more ee's on the fly, it is safe to reset the epoch_size */
4470 atomic_set(&tconn->current_epoch->epoch_size, 0);
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01004471 tconn->send.seen_any_write_yet = false;
Philipp Reisner12038a32011-11-09 19:18:00 +01004472
Philipp Reisner360cc742011-02-08 14:29:53 +01004473 conn_info(tconn, "Connection closed\n");
4474
Philipp Reisnercb703452011-03-24 11:03:07 +01004475 if (conn_highest_role(tconn) == R_PRIMARY && conn_highest_pdsk(tconn) >= D_UNKNOWN)
4476 conn_try_outdate_peer_async(tconn);
4477
Philipp Reisner360cc742011-02-08 14:29:53 +01004478 spin_lock_irq(&tconn->req_lock);
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004479 oc = tconn->cstate;
4480 if (oc >= C_UNCONNECTED)
Philipp Reisner376694a2011-11-07 10:54:28 +01004481 _conn_request_state(tconn, NS(conn, C_UNCONNECTED), CS_VERBOSE);
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004482
Philipp Reisner360cc742011-02-08 14:29:53 +01004483 spin_unlock_irq(&tconn->req_lock);
4484
Lars Ellenbergf3dfa402011-05-02 10:45:05 +02004485 if (oc == C_DISCONNECTING)
Lars Ellenbergd9cc6e22011-04-27 10:25:28 +02004486 conn_request_state(tconn, NS(conn, C_STANDALONE), CS_VERBOSE | CS_HARD);
Philipp Reisner360cc742011-02-08 14:29:53 +01004487}
4488
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02004489static int drbd_disconnected(struct drbd_conf *mdev)
Philipp Reisner360cc742011-02-08 14:29:53 +01004490{
Philipp Reisner360cc742011-02-08 14:29:53 +01004491 unsigned int i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004492
Philipp Reisner85719572010-07-21 10:20:17 +02004493 /* wait for current activity to cease. */
Philipp Reisner87eeee42011-01-19 14:16:30 +01004494 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004495 _drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
4496 _drbd_wait_ee_list_empty(mdev, &mdev->sync_ee);
4497 _drbd_wait_ee_list_empty(mdev, &mdev->read_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01004498 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004499
4500 /* We do not have data structures that would allow us to
4501 * get the rs_pending_cnt down to 0 again.
4502 * * On C_SYNC_TARGET we do not have any data structures describing
4503 * the pending RSDataRequest's we have sent.
4504 * * On C_SYNC_SOURCE there is no data structure that tracks
4505 * the P_RS_DATA_REPLY blocks that we sent to the SyncTarget.
4506 * And no, it is not the sum of the reference counts in the
4507 * resync_LRU. The resync_LRU tracks the whole operation including
4508 * the disk-IO, while the rs_pending_cnt only tracks the blocks
4509 * on the fly. */
4510 drbd_rs_cancel_all(mdev);
4511 mdev->rs_total = 0;
4512 mdev->rs_failed = 0;
4513 atomic_set(&mdev->rs_pending_cnt, 0);
4514 wake_up(&mdev->misc_wait);
4515
Philipp Reisnerb411b362009-09-25 16:07:19 -07004516 del_timer_sync(&mdev->resync_timer);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004517 resync_timer_fn((unsigned long)mdev);
4518
Philipp Reisnerb411b362009-09-25 16:07:19 -07004519 /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
4520 * w_make_resync_request etc. which may still be on the worker queue
4521 * to be "canceled" */
4522 drbd_flush_workqueue(mdev);
4523
Andreas Gruenbachera990be42011-04-06 17:56:48 +02004524 drbd_finish_peer_reqs(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004525
Philipp Reisnerd10b4ea2011-11-30 23:25:36 +01004526 /* This second workqueue flush is necessary, since drbd_finish_peer_reqs()
4527 might have issued a work again. The one before drbd_finish_peer_reqs() is
4528 necessary to reclain net_ee in drbd_finish_peer_reqs(). */
4529 drbd_flush_workqueue(mdev);
4530
Lars Ellenberg08332d72012-08-17 15:09:13 +02004531 /* need to do it again, drbd_finish_peer_reqs() may have populated it
4532 * again via drbd_try_clear_on_disk_bm(). */
4533 drbd_rs_cancel_all(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004534
4535 kfree(mdev->p_uuid);
4536 mdev->p_uuid = NULL;
4537
Philipp Reisner2aebfab2011-03-28 16:48:11 +02004538 if (!drbd_suspended(mdev))
Philipp Reisner2f5cdd02011-02-21 14:29:27 +01004539 tl_clear(mdev->tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004540
4541 drbd_md_sync(mdev);
4542
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004543 /* serialize with bitmap writeout triggered by the state change,
4544 * if any. */
4545 wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
4546
Philipp Reisnerb411b362009-09-25 16:07:19 -07004547 /* tcp_close and release of sendpage pages can be deferred. I don't
4548 * want to use SO_LINGER, because apparently it can be deferred for
4549 * more than 20 seconds (longest time I checked).
4550 *
4551 * Actually we don't care for exactly when the network stack does its
4552 * put_page(), but release our reference on these pages right here.
4553 */
Andreas Gruenbacher7721f562011-04-06 17:14:02 +02004554 i = drbd_free_peer_reqs(mdev, &mdev->net_ee);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004555 if (i)
4556 dev_info(DEV, "net_ee not empty, killed %u entries\n", i);
Lars Ellenberg435f0742010-09-06 12:30:25 +02004557 i = atomic_read(&mdev->pp_in_use_by_net);
4558 if (i)
4559 dev_info(DEV, "pp_in_use_by_net = %d, expected 0\n", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004560 i = atomic_read(&mdev->pp_in_use);
4561 if (i)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02004562 dev_info(DEV, "pp_in_use = %d, expected 0\n", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004563
4564 D_ASSERT(list_empty(&mdev->read_ee));
4565 D_ASSERT(list_empty(&mdev->active_ee));
4566 D_ASSERT(list_empty(&mdev->sync_ee));
4567 D_ASSERT(list_empty(&mdev->done_ee));
4568
Philipp Reisner360cc742011-02-08 14:29:53 +01004569 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004570}
4571
4572/*
4573 * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version
4574 * we can agree on is stored in agreed_pro_version.
4575 *
4576 * feature flags and the reserved array should be enough room for future
4577 * enhancements of the handshake protocol, and possible plugins...
4578 *
4579 * for now, they are expected to be zero, but ignored.
4580 */
Andreas Gruenbacher60381782011-03-28 17:05:50 +02004581static int drbd_send_features(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004582{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004583 struct drbd_socket *sock;
4584 struct p_connection_features *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004585
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004586 sock = &tconn->data;
4587 p = conn_prepare_command(tconn, sock);
4588 if (!p)
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004589 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004590 memset(p, 0, sizeof(*p));
4591 p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
4592 p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004593 return conn_send_command(tconn, sock, P_CONNECTION_FEATURES, sizeof(*p), NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004594}
4595
4596/*
4597 * return values:
4598 * 1 yes, we have a valid connection
4599 * 0 oops, did not work out, please try again
4600 * -1 peer talks different language,
4601 * no point in trying again, please go standalone.
4602 */
Andreas Gruenbacher60381782011-03-28 17:05:50 +02004603static int drbd_do_features(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004604{
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004605 /* ASSERT current == tconn->receiver ... */
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004606 struct p_connection_features *p;
4607 const int expect = sizeof(struct p_connection_features);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004608 struct packet_info pi;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004609 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004610
Andreas Gruenbacher60381782011-03-28 17:05:50 +02004611 err = drbd_send_features(tconn);
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004612 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004613 return 0;
4614
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004615 err = drbd_recv_header(tconn, &pi);
4616 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004617 return 0;
4618
Andreas Gruenbacher60381782011-03-28 17:05:50 +02004619 if (pi.cmd != P_CONNECTION_FEATURES) {
4620 conn_err(tconn, "expected ConnectionFeatures packet, received: %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004621 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004622 return -1;
4623 }
4624
Philipp Reisner77351055b2011-02-07 17:24:26 +01004625 if (pi.size != expect) {
Andreas Gruenbacher60381782011-03-28 17:05:50 +02004626 conn_err(tconn, "expected ConnectionFeatures length: %u, received: %u\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004627 expect, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004628 return -1;
4629 }
4630
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004631 p = pi.data;
4632 err = drbd_recv_all_warn(tconn, p, expect);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004633 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004634 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004635
Philipp Reisnerb411b362009-09-25 16:07:19 -07004636 p->protocol_min = be32_to_cpu(p->protocol_min);
4637 p->protocol_max = be32_to_cpu(p->protocol_max);
4638 if (p->protocol_max == 0)
4639 p->protocol_max = p->protocol_min;
4640
4641 if (PRO_VERSION_MAX < p->protocol_min ||
4642 PRO_VERSION_MIN > p->protocol_max)
4643 goto incompat;
4644
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004645 tconn->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004646
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004647 conn_info(tconn, "Handshake successful: "
4648 "Agreed network protocol version %d\n", tconn->agreed_pro_version);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004649
4650 return 1;
4651
4652 incompat:
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004653 conn_err(tconn, "incompatible DRBD dialects: "
Philipp Reisnerb411b362009-09-25 16:07:19 -07004654 "I support %d-%d, peer supports %d-%d\n",
4655 PRO_VERSION_MIN, PRO_VERSION_MAX,
4656 p->protocol_min, p->protocol_max);
4657 return -1;
4658}
4659
4660#if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
Philipp Reisner13e60372011-02-08 09:54:40 +01004661static int drbd_do_auth(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004662{
Philipp Reisneref57f9e2013-03-27 14:08:44 +01004663 conn_err(tconn, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
4664 conn_err(tconn, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004665 return -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004666}
4667#else
4668#define CHALLENGE_LEN 64
Johannes Thomab10d96c2010-01-07 16:02:50 +01004669
4670/* Return value:
4671 1 - auth succeeded,
4672 0 - failed, try again (network error),
4673 -1 - auth failed, don't try again.
4674*/
4675
Philipp Reisner13e60372011-02-08 09:54:40 +01004676static int drbd_do_auth(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004677{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004678 struct drbd_socket *sock;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004679 char my_challenge[CHALLENGE_LEN]; /* 64 Bytes... */
4680 struct scatterlist sg;
4681 char *response = NULL;
4682 char *right_response = NULL;
4683 char *peers_ch = NULL;
Philipp Reisner44ed1672011-04-19 17:10:19 +02004684 unsigned int key_len;
4685 char secret[SHARED_SECRET_MAX]; /* 64 byte */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004686 unsigned int resp_size;
4687 struct hash_desc desc;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004688 struct packet_info pi;
Philipp Reisner44ed1672011-04-19 17:10:19 +02004689 struct net_conf *nc;
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004690 int err, rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004691
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004692 /* FIXME: Put the challenge/response into the preallocated socket buffer. */
4693
Philipp Reisner44ed1672011-04-19 17:10:19 +02004694 rcu_read_lock();
4695 nc = rcu_dereference(tconn->net_conf);
4696 key_len = strlen(nc->shared_secret);
4697 memcpy(secret, nc->shared_secret, key_len);
4698 rcu_read_unlock();
4699
Philipp Reisner13e60372011-02-08 09:54:40 +01004700 desc.tfm = tconn->cram_hmac_tfm;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004701 desc.flags = 0;
4702
Philipp Reisner44ed1672011-04-19 17:10:19 +02004703 rv = crypto_hash_setkey(tconn->cram_hmac_tfm, (u8 *)secret, key_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004704 if (rv) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004705 conn_err(tconn, "crypto_hash_setkey() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004706 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004707 goto fail;
4708 }
4709
4710 get_random_bytes(my_challenge, CHALLENGE_LEN);
4711
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004712 sock = &tconn->data;
4713 if (!conn_prepare_command(tconn, sock)) {
4714 rv = 0;
4715 goto fail;
4716 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004717 rv = !conn_send_command(tconn, sock, P_AUTH_CHALLENGE, 0,
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004718 my_challenge, CHALLENGE_LEN);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004719 if (!rv)
4720 goto fail;
4721
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004722 err = drbd_recv_header(tconn, &pi);
4723 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004724 rv = 0;
4725 goto fail;
4726 }
4727
Philipp Reisner77351055b2011-02-07 17:24:26 +01004728 if (pi.cmd != P_AUTH_CHALLENGE) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004729 conn_err(tconn, "expected AuthChallenge packet, received: %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004730 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004731 rv = 0;
4732 goto fail;
4733 }
4734
Philipp Reisner77351055b2011-02-07 17:24:26 +01004735 if (pi.size > CHALLENGE_LEN * 2) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004736 conn_err(tconn, "expected AuthChallenge payload too big.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004737 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004738 goto fail;
4739 }
4740
Philipp Reisner77351055b2011-02-07 17:24:26 +01004741 peers_ch = kmalloc(pi.size, GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004742 if (peers_ch == NULL) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004743 conn_err(tconn, "kmalloc of peers_ch failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004744 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004745 goto fail;
4746 }
4747
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004748 err = drbd_recv_all_warn(tconn, peers_ch, pi.size);
4749 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004750 rv = 0;
4751 goto fail;
4752 }
4753
Philipp Reisner13e60372011-02-08 09:54:40 +01004754 resp_size = crypto_hash_digestsize(tconn->cram_hmac_tfm);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004755 response = kmalloc(resp_size, GFP_NOIO);
4756 if (response == NULL) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004757 conn_err(tconn, "kmalloc of response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004758 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004759 goto fail;
4760 }
4761
4762 sg_init_table(&sg, 1);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004763 sg_set_buf(&sg, peers_ch, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004764
4765 rv = crypto_hash_digest(&desc, &sg, sg.length, response);
4766 if (rv) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004767 conn_err(tconn, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004768 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004769 goto fail;
4770 }
4771
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004772 if (!conn_prepare_command(tconn, sock)) {
4773 rv = 0;
4774 goto fail;
4775 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004776 rv = !conn_send_command(tconn, sock, P_AUTH_RESPONSE, 0,
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004777 response, resp_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004778 if (!rv)
4779 goto fail;
4780
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004781 err = drbd_recv_header(tconn, &pi);
4782 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004783 rv = 0;
4784 goto fail;
4785 }
4786
Philipp Reisner77351055b2011-02-07 17:24:26 +01004787 if (pi.cmd != P_AUTH_RESPONSE) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004788 conn_err(tconn, "expected AuthResponse packet, received: %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004789 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004790 rv = 0;
4791 goto fail;
4792 }
4793
Philipp Reisner77351055b2011-02-07 17:24:26 +01004794 if (pi.size != resp_size) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004795 conn_err(tconn, "expected AuthResponse payload of wrong size\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004796 rv = 0;
4797 goto fail;
4798 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004799
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004800 err = drbd_recv_all_warn(tconn, response , resp_size);
4801 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004802 rv = 0;
4803 goto fail;
4804 }
4805
4806 right_response = kmalloc(resp_size, GFP_NOIO);
Julia Lawall2d1ee872009-12-27 22:27:11 +01004807 if (right_response == NULL) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004808 conn_err(tconn, "kmalloc of right_response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004809 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004810 goto fail;
4811 }
4812
4813 sg_set_buf(&sg, my_challenge, CHALLENGE_LEN);
4814
4815 rv = crypto_hash_digest(&desc, &sg, sg.length, right_response);
4816 if (rv) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004817 conn_err(tconn, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004818 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004819 goto fail;
4820 }
4821
4822 rv = !memcmp(response, right_response, resp_size);
4823
4824 if (rv)
Philipp Reisner44ed1672011-04-19 17:10:19 +02004825 conn_info(tconn, "Peer authenticated using %d bytes HMAC\n",
4826 resp_size);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004827 else
4828 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004829
4830 fail:
4831 kfree(peers_ch);
4832 kfree(response);
4833 kfree(right_response);
4834
4835 return rv;
4836}
4837#endif
4838
4839int drbdd_init(struct drbd_thread *thi)
4840{
Philipp Reisner392c8802011-02-09 10:33:31 +01004841 struct drbd_tconn *tconn = thi->tconn;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004842 int h;
4843
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004844 conn_info(tconn, "receiver (re)started\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004845
4846 do {
Philipp Reisner81fa2e62011-05-04 15:10:30 +02004847 h = conn_connect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004848 if (h == 0) {
Philipp Reisner81fa2e62011-05-04 15:10:30 +02004849 conn_disconnect(tconn);
Philipp Reisner20ee6392011-01-18 15:28:59 +01004850 schedule_timeout_interruptible(HZ);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004851 }
4852 if (h == -1) {
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004853 conn_warn(tconn, "Discarding network configuration.\n");
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004854 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004855 }
4856 } while (h == 0);
4857
Philipp Reisner91fd4da2011-04-20 17:47:29 +02004858 if (h > 0)
4859 drbdd(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004860
Philipp Reisner81fa2e62011-05-04 15:10:30 +02004861 conn_disconnect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004862
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004863 conn_info(tconn, "receiver terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004864 return 0;
4865}
4866
4867/* ********* acknowledge sender ******** */
4868
Andreas Gruenbachere05e1e52011-03-25 15:16:26 +01004869static int got_conn_RqSReply(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004870{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004871 struct p_req_state_reply *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004872 int retcode = be32_to_cpu(p->retcode);
4873
4874 if (retcode >= SS_SUCCESS) {
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004875 set_bit(CONN_WD_ST_CHG_OKAY, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004876 } else {
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004877 set_bit(CONN_WD_ST_CHG_FAIL, &tconn->flags);
4878 conn_err(tconn, "Requested state change failed by peer: %s (%d)\n",
4879 drbd_set_st_err_str(retcode), retcode);
4880 }
4881 wake_up(&tconn->ping_wait);
4882
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004883 return 0;
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004884}
4885
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004886static int got_RqSReply(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004887{
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004888 struct drbd_conf *mdev;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004889 struct p_req_state_reply *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004890 int retcode = be32_to_cpu(p->retcode);
4891
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004892 mdev = vnr_to_mdev(tconn, pi->vnr);
4893 if (!mdev)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004894 return -EIO;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004895
Philipp Reisner4d0fc3f2012-01-20 13:52:27 +01004896 if (test_bit(CONN_WD_ST_CHG_REQ, &tconn->flags)) {
4897 D_ASSERT(tconn->agreed_pro_version < 100);
4898 return got_conn_RqSReply(tconn, pi);
4899 }
4900
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004901 if (retcode >= SS_SUCCESS) {
4902 set_bit(CL_ST_CHG_SUCCESS, &mdev->flags);
4903 } else {
4904 set_bit(CL_ST_CHG_FAIL, &mdev->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004905 dev_err(DEV, "Requested state change failed by peer: %s (%d)\n",
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004906 drbd_set_st_err_str(retcode), retcode);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004907 }
4908 wake_up(&mdev->state_wait);
4909
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004910 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004911}
4912
Andreas Gruenbachere05e1e52011-03-25 15:16:26 +01004913static int got_Ping(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004914{
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004915 return drbd_send_ping_ack(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004916
4917}
4918
Andreas Gruenbachere05e1e52011-03-25 15:16:26 +01004919static int got_PingAck(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004920{
4921 /* restore idle timeout */
Philipp Reisner2a67d8b2011-02-09 14:10:32 +01004922 tconn->meta.socket->sk->sk_rcvtimeo = tconn->net_conf->ping_int*HZ;
4923 if (!test_and_set_bit(GOT_PING_ACK, &tconn->flags))
4924 wake_up(&tconn->ping_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004925
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004926 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004927}
4928
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004929static int got_IsInSync(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004930{
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004931 struct drbd_conf *mdev;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004932 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004933 sector_t sector = be64_to_cpu(p->sector);
4934 int blksize = be32_to_cpu(p->blksize);
4935
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004936 mdev = vnr_to_mdev(tconn, pi->vnr);
4937 if (!mdev)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004938 return -EIO;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004939
Philipp Reisner31890f42011-01-19 14:12:51 +01004940 D_ASSERT(mdev->tconn->agreed_pro_version >= 89);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004941
4942 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4943
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004944 if (get_ldev(mdev)) {
4945 drbd_rs_complete_io(mdev, sector);
4946 drbd_set_in_sync(mdev, sector, blksize);
4947 /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
4948 mdev->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
4949 put_ldev(mdev);
4950 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004951 dec_rs_pending(mdev);
Philipp Reisner778f2712010-07-06 11:14:00 +02004952 atomic_add(blksize >> 9, &mdev->rs_sect_in);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004953
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004954 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004955}
4956
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004957static int
4958validate_req_change_req_state(struct drbd_conf *mdev, u64 id, sector_t sector,
4959 struct rb_root *root, const char *func,
4960 enum drbd_req_event what, bool missing_ok)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004961{
4962 struct drbd_request *req;
4963 struct bio_and_error m;
4964
Philipp Reisner87eeee42011-01-19 14:16:30 +01004965 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004966 req = find_request(mdev, root, id, sector, missing_ok, func);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004967 if (unlikely(!req)) {
Philipp Reisner87eeee42011-01-19 14:16:30 +01004968 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02004969 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004970 }
4971 __req_mod(req, what, &m);
Philipp Reisner87eeee42011-01-19 14:16:30 +01004972 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004973
4974 if (m.bio)
4975 complete_master_bio(mdev, &m);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02004976 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004977}
4978
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004979static int got_BlockAck(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004980{
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004981 struct drbd_conf *mdev;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004982 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004983 sector_t sector = be64_to_cpu(p->sector);
4984 int blksize = be32_to_cpu(p->blksize);
4985 enum drbd_req_event what;
4986
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004987 mdev = vnr_to_mdev(tconn, pi->vnr);
4988 if (!mdev)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004989 return -EIO;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004990
Philipp Reisnerb411b362009-09-25 16:07:19 -07004991 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4992
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01004993 if (p->block_id == ID_SYNCER) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004994 drbd_set_in_sync(mdev, sector, blksize);
4995 dec_rs_pending(mdev);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004996 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004997 }
Andreas Gruenbachere05e1e52011-03-25 15:16:26 +01004998 switch (pi->cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004999 case P_RS_WRITE_ACK:
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01005000 what = WRITE_ACKED_BY_PEER_AND_SIS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005001 break;
5002 case P_WRITE_ACK:
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01005003 what = WRITE_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005004 break;
5005 case P_RECV_ACK:
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01005006 what = RECV_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005007 break;
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02005008 case P_SUPERSEDED:
5009 what = CONFLICT_RESOLVED;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01005010 break;
5011 case P_RETRY_WRITE:
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01005012 what = POSTPONE_WRITE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005013 break;
5014 default:
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005015 BUG();
Philipp Reisnerb411b362009-09-25 16:07:19 -07005016 }
5017
5018 return validate_req_change_req_state(mdev, p->block_id, sector,
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005019 &mdev->write_requests, __func__,
5020 what, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005021}
5022
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005023static int got_NegAck(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005024{
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005025 struct drbd_conf *mdev;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005026 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005027 sector_t sector = be64_to_cpu(p->sector);
Philipp Reisner2deb8332011-01-17 18:39:18 +01005028 int size = be32_to_cpu(p->blksize);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005029 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005030
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005031 mdev = vnr_to_mdev(tconn, pi->vnr);
5032 if (!mdev)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005033 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005034
5035 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
5036
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01005037 if (p->block_id == ID_SYNCER) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07005038 dec_rs_pending(mdev);
5039 drbd_rs_failed_io(mdev, sector, size);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005040 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005041 }
Philipp Reisner2deb8332011-01-17 18:39:18 +01005042
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005043 err = validate_req_change_req_state(mdev, p->block_id, sector,
5044 &mdev->write_requests, __func__,
Philipp Reisner303d1442011-04-13 16:24:47 -07005045 NEG_ACKED, true);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005046 if (err) {
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01005047 /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs.
5048 The master bio might already be completed, therefore the
5049 request is no longer in the collision hash. */
5050 /* In Protocol B we might already have got a P_RECV_ACK
5051 but then get a P_NEG_ACK afterwards. */
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01005052 drbd_set_out_of_sync(mdev, sector, size);
Philipp Reisner2deb8332011-01-17 18:39:18 +01005053 }
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005054 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005055}
5056
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005057static int got_NegDReply(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005058{
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005059 struct drbd_conf *mdev;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005060 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005061 sector_t sector = be64_to_cpu(p->sector);
5062
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005063 mdev = vnr_to_mdev(tconn, pi->vnr);
5064 if (!mdev)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005065 return -EIO;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005066
Philipp Reisnerb411b362009-09-25 16:07:19 -07005067 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01005068
Philipp Reisner380207d2011-11-11 12:31:20 +01005069 dev_err(DEV, "Got NegDReply; Sector %llus, len %u.\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07005070 (unsigned long long)sector, be32_to_cpu(p->blksize));
5071
5072 return validate_req_change_req_state(mdev, p->block_id, sector,
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005073 &mdev->read_requests, __func__,
5074 NEG_ACKED, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005075}
5076
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005077static int got_NegRSDReply(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005078{
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005079 struct drbd_conf *mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005080 sector_t sector;
5081 int size;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005082 struct p_block_ack *p = pi->data;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005083
5084 mdev = vnr_to_mdev(tconn, pi->vnr);
5085 if (!mdev)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005086 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005087
5088 sector = be64_to_cpu(p->sector);
5089 size = be32_to_cpu(p->blksize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005090
5091 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
5092
5093 dec_rs_pending(mdev);
5094
5095 if (get_ldev_if_state(mdev, D_FAILED)) {
5096 drbd_rs_complete_io(mdev, sector);
Andreas Gruenbachere05e1e52011-03-25 15:16:26 +01005097 switch (pi->cmd) {
Philipp Reisnerd612d302010-12-27 10:53:28 +01005098 case P_NEG_RS_DREPLY:
5099 drbd_rs_failed_io(mdev, sector, size);
5100 case P_RS_CANCEL:
5101 break;
5102 default:
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005103 BUG();
Philipp Reisnerd612d302010-12-27 10:53:28 +01005104 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005105 put_ldev(mdev);
5106 }
5107
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005108 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005109}
5110
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005111static int got_BarrierAck(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005112{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005113 struct p_barrier_ack *p = pi->data;
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005114 struct drbd_conf *mdev;
5115 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005116
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005117 tl_release(tconn, p->barrier, be32_to_cpu(p->set_size));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005118
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005119 rcu_read_lock();
5120 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
5121 if (mdev->state.conn == C_AHEAD &&
5122 atomic_read(&mdev->ap_in_flight) == 0 &&
5123 !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &mdev->flags)) {
5124 mdev->start_resync_timer.expires = jiffies + HZ;
5125 add_timer(&mdev->start_resync_timer);
5126 }
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02005127 }
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005128 rcu_read_unlock();
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02005129
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005130 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005131}
5132
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005133static int got_OVResult(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005134{
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005135 struct drbd_conf *mdev;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005136 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005137 struct drbd_work *w;
5138 sector_t sector;
5139 int size;
5140
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005141 mdev = vnr_to_mdev(tconn, pi->vnr);
5142 if (!mdev)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005143 return -EIO;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005144
Philipp Reisnerb411b362009-09-25 16:07:19 -07005145 sector = be64_to_cpu(p->sector);
5146 size = be32_to_cpu(p->blksize);
5147
5148 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
5149
5150 if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
Andreas Gruenbacher8f7bed72010-12-19 23:53:14 +01005151 drbd_ov_out_of_sync_found(mdev, sector, size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005152 else
Andreas Gruenbacher8f7bed72010-12-19 23:53:14 +01005153 ov_out_of_sync_print(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005154
Lars Ellenberg1d53f092010-09-05 01:13:24 +02005155 if (!get_ldev(mdev))
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005156 return 0;
Lars Ellenberg1d53f092010-09-05 01:13:24 +02005157
Philipp Reisnerb411b362009-09-25 16:07:19 -07005158 drbd_rs_complete_io(mdev, sector);
5159 dec_rs_pending(mdev);
5160
Lars Ellenbergea5442a2010-11-05 09:48:01 +01005161 --mdev->ov_left;
5162
5163 /* let's advance progress step marks only for every other megabyte */
5164 if ((mdev->ov_left & 0x200) == 0x200)
5165 drbd_advance_rs_marks(mdev, mdev->ov_left);
5166
5167 if (mdev->ov_left == 0) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07005168 w = kmalloc(sizeof(*w), GFP_NOIO);
5169 if (w) {
5170 w->cb = w_ov_finished;
Philipp Reisnera21e9292011-02-08 15:08:49 +01005171 w->mdev = mdev;
Lars Ellenbergd5b27b02011-11-14 15:42:37 +01005172 drbd_queue_work(&mdev->tconn->sender_work, w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005173 } else {
5174 dev_err(DEV, "kmalloc(w) failed.");
Andreas Gruenbacher8f7bed72010-12-19 23:53:14 +01005175 ov_out_of_sync_print(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005176 drbd_resync_finished(mdev);
5177 }
5178 }
Lars Ellenberg1d53f092010-09-05 01:13:24 +02005179 put_ldev(mdev);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005180 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005181}
5182
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005183static int got_skip(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisner0ced55a2010-04-30 15:26:20 +02005184{
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005185 return 0;
Philipp Reisner0ced55a2010-04-30 15:26:20 +02005186}
5187
Andreas Gruenbachera990be42011-04-06 17:56:48 +02005188static int tconn_finish_peer_reqs(struct drbd_tconn *tconn)
Philipp Reisner32862ec2011-02-08 16:41:01 +01005189{
Philipp Reisner082a3432011-03-15 16:05:42 +01005190 struct drbd_conf *mdev;
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005191 int vnr, not_empty = 0;
Philipp Reisner32862ec2011-02-08 16:41:01 +01005192
5193 do {
5194 clear_bit(SIGNAL_ASENDER, &tconn->flags);
5195 flush_signals(current);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005196
5197 rcu_read_lock();
5198 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
5199 kref_get(&mdev->kref);
5200 rcu_read_unlock();
Philipp Reisnerd3fcb492011-04-13 14:46:05 -07005201 if (drbd_finish_peer_reqs(mdev)) {
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005202 kref_put(&mdev->kref, &drbd_minor_destroy);
5203 return 1;
Philipp Reisnerd3fcb492011-04-13 14:46:05 -07005204 }
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005205 kref_put(&mdev->kref, &drbd_minor_destroy);
5206 rcu_read_lock();
Philipp Reisner082a3432011-03-15 16:05:42 +01005207 }
Philipp Reisner32862ec2011-02-08 16:41:01 +01005208 set_bit(SIGNAL_ASENDER, &tconn->flags);
Philipp Reisner082a3432011-03-15 16:05:42 +01005209
5210 spin_lock_irq(&tconn->req_lock);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005211 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
Philipp Reisner082a3432011-03-15 16:05:42 +01005212 not_empty = !list_empty(&mdev->done_ee);
5213 if (not_empty)
5214 break;
5215 }
5216 spin_unlock_irq(&tconn->req_lock);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005217 rcu_read_unlock();
Philipp Reisner32862ec2011-02-08 16:41:01 +01005218 } while (not_empty);
5219
5220 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005221}
5222
5223struct asender_cmd {
5224 size_t pkt_size;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005225 int (*fn)(struct drbd_tconn *tconn, struct packet_info *);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005226};
5227
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01005228static struct asender_cmd asender_tbl[] = {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005229 [P_PING] = { 0, got_Ping },
5230 [P_PING_ACK] = { 0, got_PingAck },
Philipp Reisnerb411b362009-09-25 16:07:19 -07005231 [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
5232 [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
5233 [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02005234 [P_SUPERSEDED] = { sizeof(struct p_block_ack), got_BlockAck },
Philipp Reisnerb411b362009-09-25 16:07:19 -07005235 [P_NEG_ACK] = { sizeof(struct p_block_ack), got_NegAck },
5236 [P_NEG_DREPLY] = { sizeof(struct p_block_ack), got_NegDReply },
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005237 [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), got_NegRSDReply },
Philipp Reisnerb411b362009-09-25 16:07:19 -07005238 [P_OV_RESULT] = { sizeof(struct p_block_ack), got_OVResult },
5239 [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck },
5240 [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
5241 [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync },
Philipp Reisner02918be2010-08-20 14:35:10 +02005242 [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip },
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005243 [P_RS_CANCEL] = { sizeof(struct p_block_ack), got_NegRSDReply },
5244 [P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), got_conn_RqSReply },
5245 [P_RETRY_WRITE] = { sizeof(struct p_block_ack), got_BlockAck },
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01005246};
Philipp Reisnerb411b362009-09-25 16:07:19 -07005247
5248int drbd_asender(struct drbd_thread *thi)
5249{
Philipp Reisner392c8802011-02-09 10:33:31 +01005250 struct drbd_tconn *tconn = thi->tconn;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005251 struct asender_cmd *cmd = NULL;
Philipp Reisner77351055b2011-02-07 17:24:26 +01005252 struct packet_info pi;
Philipp Reisner257d0af2011-01-26 12:15:29 +01005253 int rv;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005254 void *buf = tconn->meta.rbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005255 int received = 0;
Andreas Gruenbacher52b061a2011-03-30 11:38:49 +02005256 unsigned int header_size = drbd_header_size(tconn);
5257 int expect = header_size;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005258 bool ping_timeout_active = false;
5259 struct net_conf *nc;
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005260 int ping_timeo, tcp_cork, ping_int;
Philipp Reisner3990e042013-03-27 14:08:48 +01005261 struct sched_param param = { .sched_priority = 2 };
Philipp Reisnerb411b362009-09-25 16:07:19 -07005262
Philipp Reisner3990e042013-03-27 14:08:48 +01005263 rv = sched_setscheduler(current, SCHED_RR, &param);
5264 if (rv < 0)
5265 conn_err(tconn, "drbd_asender: ERROR set priority, ret=%d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005266
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +01005267 while (get_t_state(thi) == RUNNING) {
Philipp Reisner80822282011-02-08 12:46:30 +01005268 drbd_thread_current_set_cpu(thi);
Philipp Reisner44ed1672011-04-19 17:10:19 +02005269
5270 rcu_read_lock();
5271 nc = rcu_dereference(tconn->net_conf);
5272 ping_timeo = nc->ping_timeo;
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005273 tcp_cork = nc->tcp_cork;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005274 ping_int = nc->ping_int;
5275 rcu_read_unlock();
5276
Philipp Reisner32862ec2011-02-08 16:41:01 +01005277 if (test_and_clear_bit(SEND_PING, &tconn->flags)) {
Andreas Gruenbachera17647a2011-04-01 12:49:42 +02005278 if (drbd_send_ping(tconn)) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01005279 conn_err(tconn, "drbd_send_ping has failed\n");
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01005280 goto reconnect;
5281 }
Philipp Reisner44ed1672011-04-19 17:10:19 +02005282 tconn->meta.socket->sk->sk_rcvtimeo = ping_timeo * HZ / 10;
5283 ping_timeout_active = true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005284 }
5285
Philipp Reisner32862ec2011-02-08 16:41:01 +01005286 /* TODO: conditionally cork; it may hurt latency if we cork without
5287 much to send */
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005288 if (tcp_cork)
Philipp Reisner32862ec2011-02-08 16:41:01 +01005289 drbd_tcp_cork(tconn->meta.socket);
Andreas Gruenbachera990be42011-04-06 17:56:48 +02005290 if (tconn_finish_peer_reqs(tconn)) {
5291 conn_err(tconn, "tconn_finish_peer_reqs() failed\n");
Philipp Reisner32862ec2011-02-08 16:41:01 +01005292 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005293 }
5294 /* but unconditionally uncork unless disabled */
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005295 if (tcp_cork)
Philipp Reisner32862ec2011-02-08 16:41:01 +01005296 drbd_tcp_uncork(tconn->meta.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005297
5298 /* short circuit, recv_msg would return EINTR anyways. */
5299 if (signal_pending(current))
5300 continue;
5301
Philipp Reisner32862ec2011-02-08 16:41:01 +01005302 rv = drbd_recv_short(tconn->meta.socket, buf, expect-received, 0);
5303 clear_bit(SIGNAL_ASENDER, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005304
5305 flush_signals(current);
5306
5307 /* Note:
5308 * -EINTR (on meta) we got a signal
5309 * -EAGAIN (on meta) rcvtimeo expired
5310 * -ECONNRESET other side closed the connection
5311 * -ERESTARTSYS (on data) we got a signal
5312 * rv < 0 other than above: unexpected error!
5313 * rv == expected: full header or command
5314 * rv < expected: "woken" by signal during receive
5315 * rv == 0 : "connection shut down by peer"
5316 */
5317 if (likely(rv > 0)) {
5318 received += rv;
5319 buf += rv;
5320 } else if (rv == 0) {
Philipp Reisnerb66623e2012-08-08 21:19:09 +02005321 if (test_bit(DISCONNECT_SENT, &tconn->flags)) {
5322 long t;
5323 rcu_read_lock();
5324 t = rcu_dereference(tconn->net_conf)->ping_timeo * HZ/10;
5325 rcu_read_unlock();
5326
5327 t = wait_event_timeout(tconn->ping_wait,
5328 tconn->cstate < C_WF_REPORT_PARAMS,
5329 t);
Philipp Reisner599377a2012-08-17 14:50:22 +02005330 if (t)
5331 break;
5332 }
Philipp Reisner32862ec2011-02-08 16:41:01 +01005333 conn_err(tconn, "meta connection shut down by peer.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005334 goto reconnect;
5335 } else if (rv == -EAGAIN) {
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02005336 /* If the data socket received something meanwhile,
5337 * that is good enough: peer is still alive. */
Philipp Reisner32862ec2011-02-08 16:41:01 +01005338 if (time_after(tconn->last_received,
5339 jiffies - tconn->meta.socket->sk->sk_rcvtimeo))
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02005340 continue;
Lars Ellenbergf36af182011-03-09 22:44:55 +01005341 if (ping_timeout_active) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01005342 conn_err(tconn, "PingAck did not arrive in time.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005343 goto reconnect;
5344 }
Philipp Reisner32862ec2011-02-08 16:41:01 +01005345 set_bit(SEND_PING, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005346 continue;
5347 } else if (rv == -EINTR) {
5348 continue;
5349 } else {
Philipp Reisner32862ec2011-02-08 16:41:01 +01005350 conn_err(tconn, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005351 goto reconnect;
5352 }
5353
5354 if (received == expect && cmd == NULL) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005355 if (decode_header(tconn, tconn->meta.rbuf, &pi))
Philipp Reisnerb411b362009-09-25 16:07:19 -07005356 goto reconnect;
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01005357 cmd = &asender_tbl[pi.cmd];
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005358 if (pi.cmd >= ARRAY_SIZE(asender_tbl) || !cmd->fn) {
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02005359 conn_err(tconn, "Unexpected meta packet %s (0x%04x)\n",
5360 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005361 goto disconnect;
5362 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005363 expect = header_size + cmd->pkt_size;
Andreas Gruenbacher52b061a2011-03-30 11:38:49 +02005364 if (pi.size != expect - header_size) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01005365 conn_err(tconn, "Wrong packet size on meta (c: %d, l: %d)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01005366 pi.cmd, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005367 goto reconnect;
Philipp Reisner257d0af2011-01-26 12:15:29 +01005368 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005369 }
5370 if (received == expect) {
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005371 bool err;
Philipp Reisnera4fbda82011-03-16 11:13:17 +01005372
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005373 err = cmd->fn(tconn, &pi);
5374 if (err) {
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005375 conn_err(tconn, "%pf failed\n", cmd->fn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005376 goto reconnect;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005377 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005378
Philipp Reisnera4fbda82011-03-16 11:13:17 +01005379 tconn->last_received = jiffies;
Lars Ellenbergf36af182011-03-09 22:44:55 +01005380
Philipp Reisner44ed1672011-04-19 17:10:19 +02005381 if (cmd == &asender_tbl[P_PING_ACK]) {
5382 /* restore idle timeout */
5383 tconn->meta.socket->sk->sk_rcvtimeo = ping_int * HZ;
5384 ping_timeout_active = false;
5385 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005386
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005387 buf = tconn->meta.rbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005388 received = 0;
Andreas Gruenbacher52b061a2011-03-30 11:38:49 +02005389 expect = header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005390 cmd = NULL;
5391 }
5392 }
5393
5394 if (0) {
5395reconnect:
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01005396 conn_request_state(tconn, NS(conn, C_NETWORK_FAILURE), CS_HARD);
Philipp Reisner19fffd72012-08-28 16:48:03 +02005397 conn_md_sync(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005398 }
5399 if (0) {
5400disconnect:
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01005401 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005402 }
Philipp Reisner32862ec2011-02-08 16:41:01 +01005403 clear_bit(SIGNAL_ASENDER, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005404
Philipp Reisner32862ec2011-02-08 16:41:01 +01005405 conn_info(tconn, "asender terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005406
5407 return 0;
5408}