blob: 1385714eccb705f30c3cbe29850701981732a330 [file] [log] [blame]
Philipp Reisnerb411b362009-09-25 16:07:19 -07001/*
2 drbd_receiver.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23 */
24
25
Philipp Reisnerb411b362009-09-25 16:07:19 -070026#include <linux/module.h>
27
28#include <asm/uaccess.h>
29#include <net/sock.h>
30
Philipp Reisnerb411b362009-09-25 16:07:19 -070031#include <linux/drbd.h>
32#include <linux/fs.h>
33#include <linux/file.h>
34#include <linux/in.h>
35#include <linux/mm.h>
36#include <linux/memcontrol.h>
37#include <linux/mm_inline.h>
38#include <linux/slab.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070039#include <linux/pkt_sched.h>
40#define __KERNEL_SYSCALLS__
41#include <linux/unistd.h>
42#include <linux/vmalloc.h>
43#include <linux/random.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070044#include <linux/string.h>
45#include <linux/scatterlist.h>
46#include "drbd_int.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070047#include "drbd_req.h"
48
49#include "drbd_vli.h"
50
Philipp Reisner77351055b2011-02-07 17:24:26 +010051struct packet_info {
52 enum drbd_packet cmd;
Andreas Gruenbachere2857212011-03-25 00:57:38 +010053 unsigned int size;
54 unsigned int vnr;
Andreas Gruenbachere6589832011-03-30 12:54:42 +020055 void *data;
Philipp Reisner77351055b2011-02-07 17:24:26 +010056};
57
Philipp Reisnerb411b362009-09-25 16:07:19 -070058enum finish_epoch {
59 FE_STILL_LIVE,
60 FE_DESTROYED,
61 FE_RECYCLED,
62};
63
Andreas Gruenbacher60381782011-03-28 17:05:50 +020064static int drbd_do_features(struct drbd_tconn *tconn);
Philipp Reisner13e60372011-02-08 09:54:40 +010065static int drbd_do_auth(struct drbd_tconn *tconn);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +020066static int drbd_disconnected(struct drbd_conf *mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -070067
Philipp Reisner1e9dd292011-11-10 15:14:53 +010068static enum finish_epoch drbd_may_finish_epoch(struct drbd_tconn *, struct drbd_epoch *, enum epoch_event);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +010069static int e_end_block(struct drbd_work *, int);
Philipp Reisnerb411b362009-09-25 16:07:19 -070070
Philipp Reisnerb411b362009-09-25 16:07:19 -070071
72#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
73
Lars Ellenberg45bb9122010-05-14 17:10:48 +020074/*
75 * some helper functions to deal with single linked page lists,
76 * page->private being our "next" pointer.
77 */
78
79/* If at least n pages are linked at head, get n pages off.
80 * Otherwise, don't modify head, and return NULL.
81 * Locking is the responsibility of the caller.
82 */
83static struct page *page_chain_del(struct page **head, int n)
84{
85 struct page *page;
86 struct page *tmp;
87
88 BUG_ON(!n);
89 BUG_ON(!head);
90
91 page = *head;
Philipp Reisner23ce4222010-05-20 13:35:31 +020092
93 if (!page)
94 return NULL;
95
Lars Ellenberg45bb9122010-05-14 17:10:48 +020096 while (page) {
97 tmp = page_chain_next(page);
98 if (--n == 0)
99 break; /* found sufficient pages */
100 if (tmp == NULL)
101 /* insufficient pages, don't use any of them. */
102 return NULL;
103 page = tmp;
104 }
105
106 /* add end of list marker for the returned list */
107 set_page_private(page, 0);
108 /* actual return value, and adjustment of head */
109 page = *head;
110 *head = tmp;
111 return page;
112}
113
114/* may be used outside of locks to find the tail of a (usually short)
115 * "private" page chain, before adding it back to a global chain head
116 * with page_chain_add() under a spinlock. */
117static struct page *page_chain_tail(struct page *page, int *len)
118{
119 struct page *tmp;
120 int i = 1;
121 while ((tmp = page_chain_next(page)))
122 ++i, page = tmp;
123 if (len)
124 *len = i;
125 return page;
126}
127
128static int page_chain_free(struct page *page)
129{
130 struct page *tmp;
131 int i = 0;
132 page_chain_for_each_safe(page, tmp) {
133 put_page(page);
134 ++i;
135 }
136 return i;
137}
138
139static void page_chain_add(struct page **head,
140 struct page *chain_first, struct page *chain_last)
141{
142#if 1
143 struct page *tmp;
144 tmp = page_chain_tail(chain_first, NULL);
145 BUG_ON(tmp != chain_last);
146#endif
147
148 /* add chain to head */
149 set_page_private(chain_last, (unsigned long)*head);
150 *head = chain_first;
151}
152
Andreas Gruenbacher18c2d522011-04-07 21:08:50 +0200153static struct page *__drbd_alloc_pages(struct drbd_conf *mdev,
154 unsigned int number)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700155{
156 struct page *page = NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200157 struct page *tmp = NULL;
Andreas Gruenbacher18c2d522011-04-07 21:08:50 +0200158 unsigned int i = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700159
160 /* Yes, testing drbd_pp_vacant outside the lock is racy.
161 * So what. It saves a spin_lock. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200162 if (drbd_pp_vacant >= number) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700163 spin_lock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200164 page = page_chain_del(&drbd_pp_pool, number);
165 if (page)
166 drbd_pp_vacant -= number;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700167 spin_unlock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200168 if (page)
169 return page;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700170 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200171
Philipp Reisnerb411b362009-09-25 16:07:19 -0700172 /* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD
173 * "criss-cross" setup, that might cause write-out on some other DRBD,
174 * which in turn might block on the other node at this very place. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200175 for (i = 0; i < number; i++) {
176 tmp = alloc_page(GFP_TRY);
177 if (!tmp)
178 break;
179 set_page_private(tmp, (unsigned long)page);
180 page = tmp;
181 }
182
183 if (i == number)
184 return page;
185
186 /* Not enough pages immediately available this time.
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200187 * No need to jump around here, drbd_alloc_pages will retry this
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200188 * function "soon". */
189 if (page) {
190 tmp = page_chain_tail(page, NULL);
191 spin_lock(&drbd_pp_lock);
192 page_chain_add(&drbd_pp_pool, page, tmp);
193 drbd_pp_vacant += i;
194 spin_unlock(&drbd_pp_lock);
195 }
196 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700197}
198
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200199static void reclaim_finished_net_peer_reqs(struct drbd_conf *mdev,
200 struct list_head *to_be_freed)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700201{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100202 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700203 struct list_head *le, *tle;
204
205 /* The EEs are always appended to the end of the list. Since
206 they are sent in order over the wire, they have to finish
207 in order. As soon as we see the first not finished we can
208 stop to examine the list... */
209
210 list_for_each_safe(le, tle, &mdev->net_ee) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100211 peer_req = list_entry(le, struct drbd_peer_request, w.list);
Andreas Gruenbacher045417f2011-04-07 21:34:24 +0200212 if (drbd_peer_req_has_active_page(peer_req))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700213 break;
214 list_move(le, to_be_freed);
215 }
216}
217
218static void drbd_kick_lo_and_reclaim_net(struct drbd_conf *mdev)
219{
220 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100221 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700222
Philipp Reisner87eeee42011-01-19 14:16:30 +0100223 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200224 reclaim_finished_net_peer_reqs(mdev, &reclaimed);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100225 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700226
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100227 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +0200228 drbd_free_net_peer_req(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700229}
230
231/**
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200232 * drbd_alloc_pages() - Returns @number pages, retries forever (or until signalled)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700233 * @mdev: DRBD device.
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200234 * @number: number of pages requested
235 * @retry: whether to retry, if not enough pages are available right now
Philipp Reisnerb411b362009-09-25 16:07:19 -0700236 *
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200237 * Tries to allocate number pages, first from our own page pool, then from
238 * the kernel, unless this allocation would exceed the max_buffers setting.
239 * Possibly retry until DRBD frees sufficient pages somewhere else.
240 *
241 * Returns a page chain linked via page->private.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700242 */
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200243struct page *drbd_alloc_pages(struct drbd_conf *mdev, unsigned int number,
244 bool retry)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700245{
246 struct page *page = NULL;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200247 struct net_conf *nc;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700248 DEFINE_WAIT(wait);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200249 int mxb;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700250
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200251 /* Yes, we may run up to @number over max_buffers. If we
252 * follow it strictly, the admin will get it wrong anyways. */
Philipp Reisner44ed1672011-04-19 17:10:19 +0200253 rcu_read_lock();
254 nc = rcu_dereference(mdev->tconn->net_conf);
255 mxb = nc ? nc->max_buffers : 1000000;
256 rcu_read_unlock();
257
258 if (atomic_read(&mdev->pp_in_use) < mxb)
Andreas Gruenbacher18c2d522011-04-07 21:08:50 +0200259 page = __drbd_alloc_pages(mdev, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700260
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200261 while (page == NULL) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700262 prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
263
264 drbd_kick_lo_and_reclaim_net(mdev);
265
Philipp Reisner44ed1672011-04-19 17:10:19 +0200266 if (atomic_read(&mdev->pp_in_use) < mxb) {
Andreas Gruenbacher18c2d522011-04-07 21:08:50 +0200267 page = __drbd_alloc_pages(mdev, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700268 if (page)
269 break;
270 }
271
272 if (!retry)
273 break;
274
275 if (signal_pending(current)) {
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200276 dev_warn(DEV, "drbd_alloc_pages interrupted!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700277 break;
278 }
279
280 schedule();
281 }
282 finish_wait(&drbd_pp_wait, &wait);
283
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200284 if (page)
285 atomic_add(number, &mdev->pp_in_use);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700286 return page;
287}
288
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200289/* Must not be used from irq, as that may deadlock: see drbd_alloc_pages.
Philipp Reisner87eeee42011-01-19 14:16:30 +0100290 * Is also used from inside an other spin_lock_irq(&mdev->tconn->req_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200291 * Either links the page chain back to the global pool,
292 * or returns all pages to the system. */
Andreas Gruenbacher5cc287e2011-04-07 21:02:59 +0200293static void drbd_free_pages(struct drbd_conf *mdev, struct page *page, int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700294{
Lars Ellenberg435f0742010-09-06 12:30:25 +0200295 atomic_t *a = is_net ? &mdev->pp_in_use_by_net : &mdev->pp_in_use;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700296 int i;
Lars Ellenberg435f0742010-09-06 12:30:25 +0200297
Lars Ellenberga73ff322012-06-25 19:15:38 +0200298 if (page == NULL)
299 return;
300
Philipp Reisner81a5d602011-02-22 19:53:16 -0500301 if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200302 i = page_chain_free(page);
303 else {
304 struct page *tmp;
305 tmp = page_chain_tail(page, &i);
306 spin_lock(&drbd_pp_lock);
307 page_chain_add(&drbd_pp_pool, page, tmp);
308 drbd_pp_vacant += i;
309 spin_unlock(&drbd_pp_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700310 }
Lars Ellenberg435f0742010-09-06 12:30:25 +0200311 i = atomic_sub_return(i, a);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200312 if (i < 0)
Lars Ellenberg435f0742010-09-06 12:30:25 +0200313 dev_warn(DEV, "ASSERTION FAILED: %s: %d < 0\n",
314 is_net ? "pp_in_use_by_net" : "pp_in_use", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700315 wake_up(&drbd_pp_wait);
316}
317
318/*
319You need to hold the req_lock:
320 _drbd_wait_ee_list_empty()
321
322You must not have the req_lock:
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +0200323 drbd_free_peer_req()
Andreas Gruenbacher0db55362011-04-06 16:09:15 +0200324 drbd_alloc_peer_req()
Andreas Gruenbacher7721f562011-04-06 17:14:02 +0200325 drbd_free_peer_reqs()
Philipp Reisnerb411b362009-09-25 16:07:19 -0700326 drbd_ee_fix_bhs()
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200327 drbd_finish_peer_reqs()
Philipp Reisnerb411b362009-09-25 16:07:19 -0700328 drbd_clear_done_ee()
329 drbd_wait_ee_list_empty()
330*/
331
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100332struct drbd_peer_request *
Andreas Gruenbacher0db55362011-04-06 16:09:15 +0200333drbd_alloc_peer_req(struct drbd_conf *mdev, u64 id, sector_t sector,
334 unsigned int data_size, gfp_t gfp_mask) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700335{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100336 struct drbd_peer_request *peer_req;
Lars Ellenberga73ff322012-06-25 19:15:38 +0200337 struct page *page = NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200338 unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700339
Andreas Gruenbacher0cf9d272010-12-07 10:43:29 +0100340 if (drbd_insert_fault(mdev, DRBD_FAULT_AL_EE))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700341 return NULL;
342
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100343 peer_req = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
344 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700345 if (!(gfp_mask & __GFP_NOWARN))
Andreas Gruenbacher0db55362011-04-06 16:09:15 +0200346 dev_err(DEV, "%s: allocation failed\n", __func__);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700347 return NULL;
348 }
349
Lars Ellenberga73ff322012-06-25 19:15:38 +0200350 if (data_size) {
Lars Ellenberg81a35372012-07-30 09:00:54 +0200351 page = drbd_alloc_pages(mdev, nr_pages, (gfp_mask & __GFP_WAIT));
Lars Ellenberga73ff322012-06-25 19:15:38 +0200352 if (!page)
353 goto fail;
354 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700355
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100356 drbd_clear_interval(&peer_req->i);
357 peer_req->i.size = data_size;
358 peer_req->i.sector = sector;
359 peer_req->i.local = false;
360 peer_req->i.waiting = false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700361
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100362 peer_req->epoch = NULL;
Philipp Reisnera21e9292011-02-08 15:08:49 +0100363 peer_req->w.mdev = mdev;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100364 peer_req->pages = page;
365 atomic_set(&peer_req->pending_bios, 0);
366 peer_req->flags = 0;
Andreas Gruenbacher9a8e7752011-01-11 14:04:09 +0100367 /*
368 * The block_id is opaque to the receiver. It is not endianness
369 * converted, and sent back to the sender unchanged.
370 */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100371 peer_req->block_id = id;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700372
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100373 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700374
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200375 fail:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100376 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700377 return NULL;
378}
379
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +0200380void __drbd_free_peer_req(struct drbd_conf *mdev, struct drbd_peer_request *peer_req,
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100381 int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700382{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100383 if (peer_req->flags & EE_HAS_DIGEST)
384 kfree(peer_req->digest);
Andreas Gruenbacher5cc287e2011-04-07 21:02:59 +0200385 drbd_free_pages(mdev, peer_req->pages, is_net);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100386 D_ASSERT(atomic_read(&peer_req->pending_bios) == 0);
387 D_ASSERT(drbd_interval_empty(&peer_req->i));
388 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700389}
390
Andreas Gruenbacher7721f562011-04-06 17:14:02 +0200391int drbd_free_peer_reqs(struct drbd_conf *mdev, struct list_head *list)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700392{
393 LIST_HEAD(work_list);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100394 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700395 int count = 0;
Lars Ellenberg435f0742010-09-06 12:30:25 +0200396 int is_net = list == &mdev->net_ee;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700397
Philipp Reisner87eeee42011-01-19 14:16:30 +0100398 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700399 list_splice_init(list, &work_list);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100400 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700401
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100402 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +0200403 __drbd_free_peer_req(mdev, peer_req, is_net);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700404 count++;
405 }
406 return count;
407}
408
Philipp Reisnerb411b362009-09-25 16:07:19 -0700409/*
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200410 * See also comments in _req_mod(,BARRIER_ACKED) and receive_Barrier.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700411 */
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200412static int drbd_finish_peer_reqs(struct drbd_conf *mdev)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700413{
414 LIST_HEAD(work_list);
415 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100416 struct drbd_peer_request *peer_req, *t;
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100417 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700418
Philipp Reisner87eeee42011-01-19 14:16:30 +0100419 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200420 reclaim_finished_net_peer_reqs(mdev, &reclaimed);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700421 list_splice_init(&mdev->done_ee, &work_list);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100422 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700423
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100424 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +0200425 drbd_free_net_peer_req(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700426
427 /* possible callbacks here:
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +0200428 * e_end_block, and e_end_resync_block, e_send_superseded.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700429 * all ignore the last argument.
430 */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100431 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100432 int err2;
433
Philipp Reisnerb411b362009-09-25 16:07:19 -0700434 /* list_del not necessary, next/prev members not touched */
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100435 err2 = peer_req->w.cb(&peer_req->w, !!err);
436 if (!err)
437 err = err2;
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +0200438 drbd_free_peer_req(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700439 }
440 wake_up(&mdev->ee_wait);
441
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100442 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700443}
444
Andreas Gruenbacherd4da1532011-04-07 00:06:56 +0200445static void _drbd_wait_ee_list_empty(struct drbd_conf *mdev,
446 struct list_head *head)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700447{
448 DEFINE_WAIT(wait);
449
450 /* avoids spin_lock/unlock
451 * and calling prepare_to_wait in the fast path */
452 while (!list_empty(head)) {
453 prepare_to_wait(&mdev->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100454 spin_unlock_irq(&mdev->tconn->req_lock);
Jens Axboe7eaceac2011-03-10 08:52:07 +0100455 io_schedule();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700456 finish_wait(&mdev->ee_wait, &wait);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100457 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700458 }
459}
460
Andreas Gruenbacherd4da1532011-04-07 00:06:56 +0200461static void drbd_wait_ee_list_empty(struct drbd_conf *mdev,
462 struct list_head *head)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700463{
Philipp Reisner87eeee42011-01-19 14:16:30 +0100464 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700465 _drbd_wait_ee_list_empty(mdev, head);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100466 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700467}
468
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100469static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700470{
Philipp Reisnerb411b362009-09-25 16:07:19 -0700471 struct kvec iov = {
472 .iov_base = buf,
473 .iov_len = size,
474 };
475 struct msghdr msg = {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700476 .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
477 };
Al Virof730c842014-02-08 21:07:38 -0500478 return kernel_recvmsg(sock, &msg, &iov, 1, size, msg.msg_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700479}
480
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100481static int drbd_recv(struct drbd_tconn *tconn, void *buf, size_t size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700482{
Philipp Reisnerb411b362009-09-25 16:07:19 -0700483 int rv;
484
Philipp Reisner1393b592012-09-03 14:04:23 +0200485 rv = drbd_recv_short(tconn->data.socket, buf, size, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700486
Philipp Reisnerdbd08202012-08-17 16:55:47 +0200487 if (rv < 0) {
488 if (rv == -ECONNRESET)
Philipp Reisner155522d2012-08-08 21:19:09 +0200489 conn_info(tconn, "sock was reset by peer\n");
Philipp Reisnerdbd08202012-08-17 16:55:47 +0200490 else if (rv != -ERESTARTSYS)
Philipp Reisner155522d2012-08-08 21:19:09 +0200491 conn_err(tconn, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerdbd08202012-08-17 16:55:47 +0200492 } else if (rv == 0) {
Philipp Reisnerb66623e2012-08-08 21:19:09 +0200493 if (test_bit(DISCONNECT_SENT, &tconn->flags)) {
494 long t;
495 rcu_read_lock();
496 t = rcu_dereference(tconn->net_conf)->ping_timeo * HZ/10;
497 rcu_read_unlock();
498
499 t = wait_event_timeout(tconn->ping_wait, tconn->cstate < C_WF_REPORT_PARAMS, t);
500
Philipp Reisner599377a2012-08-17 14:50:22 +0200501 if (t)
502 goto out;
503 }
Philipp Reisnerb66623e2012-08-08 21:19:09 +0200504 conn_info(tconn, "sock was shut down by peer\n");
Philipp Reisner599377a2012-08-17 14:50:22 +0200505 }
506
Philipp Reisnerb411b362009-09-25 16:07:19 -0700507 if (rv != size)
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100508 conn_request_state(tconn, NS(conn, C_BROKEN_PIPE), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700509
Philipp Reisner599377a2012-08-17 14:50:22 +0200510out:
Philipp Reisnerb411b362009-09-25 16:07:19 -0700511 return rv;
512}
513
Andreas Gruenbacherc6967742011-03-17 17:15:20 +0100514static int drbd_recv_all(struct drbd_tconn *tconn, void *buf, size_t size)
515{
516 int err;
517
518 err = drbd_recv(tconn, buf, size);
519 if (err != size) {
520 if (err >= 0)
521 err = -EIO;
522 } else
523 err = 0;
524 return err;
525}
526
Andreas Gruenbachera5c31902011-03-24 03:28:04 +0100527static int drbd_recv_all_warn(struct drbd_tconn *tconn, void *buf, size_t size)
528{
529 int err;
530
531 err = drbd_recv_all(tconn, buf, size);
532 if (err && !signal_pending(current))
533 conn_warn(tconn, "short read (expected size %d)\n", (int)size);
534 return err;
535}
536
Lars Ellenberg5dbf1672010-05-25 16:18:01 +0200537/* quoting tcp(7):
538 * On individual connections, the socket buffer size must be set prior to the
539 * listen(2) or connect(2) calls in order to have it take effect.
540 * This is our wrapper to do so.
541 */
542static void drbd_setbufsize(struct socket *sock, unsigned int snd,
543 unsigned int rcv)
544{
545 /* open coded SO_SNDBUF, SO_RCVBUF */
546 if (snd) {
547 sock->sk->sk_sndbuf = snd;
548 sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
549 }
550 if (rcv) {
551 sock->sk->sk_rcvbuf = rcv;
552 sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
553 }
554}
555
Philipp Reisnereac3e992011-02-07 14:05:07 +0100556static struct socket *drbd_try_connect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700557{
558 const char *what;
559 struct socket *sock;
560 struct sockaddr_in6 src_in6;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200561 struct sockaddr_in6 peer_in6;
562 struct net_conf *nc;
563 int err, peer_addr_len, my_addr_len;
Andreas Gruenbacher69ef82d2011-05-11 14:34:35 +0200564 int sndbuf_size, rcvbuf_size, connect_int;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700565 int disconnect_on_error = 1;
566
Philipp Reisner44ed1672011-04-19 17:10:19 +0200567 rcu_read_lock();
568 nc = rcu_dereference(tconn->net_conf);
569 if (!nc) {
570 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700571 return NULL;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200572 }
Philipp Reisner44ed1672011-04-19 17:10:19 +0200573 sndbuf_size = nc->sndbuf_size;
574 rcvbuf_size = nc->rcvbuf_size;
Andreas Gruenbacher69ef82d2011-05-11 14:34:35 +0200575 connect_int = nc->connect_int;
Andreas Gruenbacher089c0752011-06-14 18:28:09 +0200576 rcu_read_unlock();
Philipp Reisner44ed1672011-04-19 17:10:19 +0200577
Andreas Gruenbacher089c0752011-06-14 18:28:09 +0200578 my_addr_len = min_t(int, tconn->my_addr_len, sizeof(src_in6));
579 memcpy(&src_in6, &tconn->my_addr, my_addr_len);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200580
Andreas Gruenbacher089c0752011-06-14 18:28:09 +0200581 if (((struct sockaddr *)&tconn->my_addr)->sa_family == AF_INET6)
Philipp Reisner44ed1672011-04-19 17:10:19 +0200582 src_in6.sin6_port = 0;
583 else
584 ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
585
Andreas Gruenbacher089c0752011-06-14 18:28:09 +0200586 peer_addr_len = min_t(int, tconn->peer_addr_len, sizeof(src_in6));
587 memcpy(&peer_in6, &tconn->peer_addr, peer_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700588
589 what = "sock_create_kern";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200590 err = sock_create_kern(((struct sockaddr *)&src_in6)->sa_family,
591 SOCK_STREAM, IPPROTO_TCP, &sock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700592 if (err < 0) {
593 sock = NULL;
594 goto out;
595 }
596
597 sock->sk->sk_rcvtimeo =
Andreas Gruenbacher69ef82d2011-05-11 14:34:35 +0200598 sock->sk->sk_sndtimeo = connect_int * HZ;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200599 drbd_setbufsize(sock, sndbuf_size, rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700600
601 /* explicitly bind to the configured IP as source IP
602 * for the outgoing connections.
603 * This is needed for multihomed hosts and to be
604 * able to use lo: interfaces for drbd.
605 * Make sure to use 0 as port number, so linux selects
606 * a free one dynamically.
607 */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700608 what = "bind before connect";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200609 err = sock->ops->bind(sock, (struct sockaddr *) &src_in6, my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700610 if (err < 0)
611 goto out;
612
613 /* connect may fail, peer not yet available.
614 * stay C_WF_CONNECTION, don't go Disconnecting! */
615 disconnect_on_error = 0;
616 what = "connect";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200617 err = sock->ops->connect(sock, (struct sockaddr *) &peer_in6, peer_addr_len, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700618
619out:
620 if (err < 0) {
621 if (sock) {
622 sock_release(sock);
623 sock = NULL;
624 }
625 switch (-err) {
626 /* timeout, busy, signal pending */
627 case ETIMEDOUT: case EAGAIN: case EINPROGRESS:
628 case EINTR: case ERESTARTSYS:
629 /* peer not (yet) available, network problem */
630 case ECONNREFUSED: case ENETUNREACH:
631 case EHOSTDOWN: case EHOSTUNREACH:
632 disconnect_on_error = 0;
633 break;
634 default:
Philipp Reisnereac3e992011-02-07 14:05:07 +0100635 conn_err(tconn, "%s failed, err = %d\n", what, err);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700636 }
637 if (disconnect_on_error)
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100638 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700639 }
Philipp Reisner44ed1672011-04-19 17:10:19 +0200640
Philipp Reisnerb411b362009-09-25 16:07:19 -0700641 return sock;
642}
643
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200644struct accept_wait_data {
645 struct drbd_tconn *tconn;
646 struct socket *s_listen;
647 struct completion door_bell;
648 void (*original_sk_state_change)(struct sock *sk);
649
650};
651
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200652static void drbd_incoming_connection(struct sock *sk)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700653{
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200654 struct accept_wait_data *ad = sk->sk_user_data;
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200655 void (*state_change)(struct sock *sk);
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200656
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200657 state_change = ad->original_sk_state_change;
658 if (sk->sk_state == TCP_ESTABLISHED)
659 complete(&ad->door_bell);
660 state_change(sk);
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200661}
662
663static int prepare_listen_socket(struct drbd_tconn *tconn, struct accept_wait_data *ad)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700664{
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200665 int err, sndbuf_size, rcvbuf_size, my_addr_len;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200666 struct sockaddr_in6 my_addr;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200667 struct socket *s_listen;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200668 struct net_conf *nc;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700669 const char *what;
670
Philipp Reisner44ed1672011-04-19 17:10:19 +0200671 rcu_read_lock();
672 nc = rcu_dereference(tconn->net_conf);
673 if (!nc) {
674 rcu_read_unlock();
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200675 return -EIO;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200676 }
Philipp Reisner44ed1672011-04-19 17:10:19 +0200677 sndbuf_size = nc->sndbuf_size;
678 rcvbuf_size = nc->rcvbuf_size;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200679 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700680
Andreas Gruenbacher089c0752011-06-14 18:28:09 +0200681 my_addr_len = min_t(int, tconn->my_addr_len, sizeof(struct sockaddr_in6));
682 memcpy(&my_addr, &tconn->my_addr, my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700683
684 what = "sock_create_kern";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200685 err = sock_create_kern(((struct sockaddr *)&my_addr)->sa_family,
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200686 SOCK_STREAM, IPPROTO_TCP, &s_listen);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700687 if (err) {
688 s_listen = NULL;
689 goto out;
690 }
691
Philipp Reisner98683652012-11-09 14:18:43 +0100692 s_listen->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
Philipp Reisner44ed1672011-04-19 17:10:19 +0200693 drbd_setbufsize(s_listen, sndbuf_size, rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700694
695 what = "bind before listen";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200696 err = s_listen->ops->bind(s_listen, (struct sockaddr *)&my_addr, my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700697 if (err < 0)
698 goto out;
699
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200700 ad->s_listen = s_listen;
701 write_lock_bh(&s_listen->sk->sk_callback_lock);
702 ad->original_sk_state_change = s_listen->sk->sk_state_change;
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200703 s_listen->sk->sk_state_change = drbd_incoming_connection;
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200704 s_listen->sk->sk_user_data = ad;
705 write_unlock_bh(&s_listen->sk->sk_callback_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700706
Philipp Reisner2820fd32012-07-12 10:22:48 +0200707 what = "listen";
708 err = s_listen->ops->listen(s_listen, 5);
709 if (err < 0)
710 goto out;
711
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200712 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700713out:
714 if (s_listen)
715 sock_release(s_listen);
716 if (err < 0) {
717 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200718 conn_err(tconn, "%s failed, err = %d\n", what, err);
719 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700720 }
721 }
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200722
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200723 return -EIO;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200724}
725
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200726static void unregister_state_change(struct sock *sk, struct accept_wait_data *ad)
727{
728 write_lock_bh(&sk->sk_callback_lock);
729 sk->sk_state_change = ad->original_sk_state_change;
730 sk->sk_user_data = NULL;
731 write_unlock_bh(&sk->sk_callback_lock);
732}
733
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200734static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn, struct accept_wait_data *ad)
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200735{
736 int timeo, connect_int, err = 0;
737 struct socket *s_estab = NULL;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200738 struct net_conf *nc;
739
740 rcu_read_lock();
741 nc = rcu_dereference(tconn->net_conf);
742 if (!nc) {
743 rcu_read_unlock();
744 return NULL;
745 }
746 connect_int = nc->connect_int;
747 rcu_read_unlock();
748
749 timeo = connect_int * HZ;
Akinobu Mita38b682b22013-04-29 16:21:31 -0700750 /* 28.5% random jitter */
751 timeo += (prandom_u32() & 1) ? timeo / 7 : -timeo / 7;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200752
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200753 err = wait_for_completion_interruptible_timeout(&ad->door_bell, timeo);
754 if (err <= 0)
755 return NULL;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200756
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200757 err = kernel_accept(ad->s_listen, &s_estab, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700758 if (err < 0) {
759 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200760 conn_err(tconn, "accept failed, err = %d\n", err);
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100761 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700762 }
763 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700764
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200765 if (s_estab)
766 unregister_state_change(s_estab->sk, ad);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700767
768 return s_estab;
769}
770
Andreas Gruenbachere6589832011-03-30 12:54:42 +0200771static int decode_header(struct drbd_tconn *, void *, struct packet_info *);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700772
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200773static int send_first_packet(struct drbd_tconn *tconn, struct drbd_socket *sock,
774 enum drbd_packet cmd)
775{
776 if (!conn_prepare_command(tconn, sock))
777 return -EIO;
Andreas Gruenbachere6589832011-03-30 12:54:42 +0200778 return conn_send_command(tconn, sock, cmd, 0, NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700779}
780
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200781static int receive_first_packet(struct drbd_tconn *tconn, struct socket *sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700782{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200783 unsigned int header_size = drbd_header_size(tconn);
784 struct packet_info pi;
785 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700786
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200787 err = drbd_recv_short(sock, tconn->data.rbuf, header_size, 0);
788 if (err != header_size) {
789 if (err >= 0)
790 err = -EIO;
791 return err;
792 }
793 err = decode_header(tconn, tconn->data.rbuf, &pi);
794 if (err)
795 return err;
796 return pi.cmd;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700797}
798
799/**
800 * drbd_socket_okay() - Free the socket if its connection is not okay
Philipp Reisnerb411b362009-09-25 16:07:19 -0700801 * @sock: pointer to the pointer to the socket.
802 */
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100803static int drbd_socket_okay(struct socket **sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700804{
805 int rr;
806 char tb[4];
807
808 if (!*sock)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100809 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700810
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100811 rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700812
813 if (rr > 0 || rr == -EAGAIN) {
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100814 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700815 } else {
816 sock_release(*sock);
817 *sock = NULL;
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100818 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700819 }
820}
Philipp Reisner2325eb62011-03-15 16:56:18 +0100821/* Gets called if a connection is established, or if a new minor gets created
822 in a connection */
Philipp Reisnerc141ebd2011-05-05 16:13:10 +0200823int drbd_connected(struct drbd_conf *mdev)
Philipp Reisner907599e2011-02-08 11:25:37 +0100824{
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100825 int err;
Philipp Reisner907599e2011-02-08 11:25:37 +0100826
827 atomic_set(&mdev->packet_seq, 0);
828 mdev->peer_seq = 0;
829
Philipp Reisner8410da82011-02-11 20:11:10 +0100830 mdev->state_mutex = mdev->tconn->agreed_pro_version < 100 ?
831 &mdev->tconn->cstate_mutex :
832 &mdev->own_state_mutex;
833
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100834 err = drbd_send_sync_param(mdev);
835 if (!err)
836 err = drbd_send_sizes(mdev, 0, 0);
837 if (!err)
838 err = drbd_send_uuids(mdev);
839 if (!err)
Philipp Reisner43de7c82011-11-10 13:16:13 +0100840 err = drbd_send_current_state(mdev);
Philipp Reisner907599e2011-02-08 11:25:37 +0100841 clear_bit(USE_DEGR_WFC_T, &mdev->flags);
842 clear_bit(RESIZE_PENDING, &mdev->flags);
Philipp Reisner2d56a972013-03-27 14:08:34 +0100843 atomic_set(&mdev->ap_in_flight, 0);
Philipp Reisner8b924f12011-03-01 11:08:28 +0100844 mod_timer(&mdev->request_timer, jiffies + HZ); /* just start it here. */
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100845 return err;
Philipp Reisner907599e2011-02-08 11:25:37 +0100846}
Philipp Reisnerb411b362009-09-25 16:07:19 -0700847
848/*
849 * return values:
850 * 1 yes, we have a valid connection
851 * 0 oops, did not work out, please try again
852 * -1 peer talks different language,
853 * no point in trying again, please go standalone.
854 * -2 We do not have a network config...
855 */
Philipp Reisner81fa2e62011-05-04 15:10:30 +0200856static int conn_connect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700857{
Philipp Reisner7da35862011-12-19 22:42:56 +0100858 struct drbd_socket sock, msock;
Philipp Reisnerc141ebd2011-05-05 16:13:10 +0200859 struct drbd_conf *mdev;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200860 struct net_conf *nc;
Philipp Reisner92f14952012-08-01 11:41:01 +0200861 int vnr, timeout, h, ok;
Philipp Reisner08b165b2011-09-05 16:22:33 +0200862 bool discard_my_data;
Philipp Reisner197296f2012-03-26 16:47:11 +0200863 enum drbd_state_rv rv;
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200864 struct accept_wait_data ad = {
865 .tconn = tconn,
866 .door_bell = COMPLETION_INITIALIZER_ONSTACK(ad.door_bell),
867 };
Philipp Reisnerb411b362009-09-25 16:07:19 -0700868
Philipp Reisnerb66623e2012-08-08 21:19:09 +0200869 clear_bit(DISCONNECT_SENT, &tconn->flags);
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100870 if (conn_request_state(tconn, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700871 return -2;
872
Philipp Reisner7da35862011-12-19 22:42:56 +0100873 mutex_init(&sock.mutex);
874 sock.sbuf = tconn->data.sbuf;
875 sock.rbuf = tconn->data.rbuf;
876 sock.socket = NULL;
877 mutex_init(&msock.mutex);
878 msock.sbuf = tconn->meta.sbuf;
879 msock.rbuf = tconn->meta.rbuf;
880 msock.socket = NULL;
881
Andreas Gruenbacher0916e0e2011-03-21 14:10:15 +0100882 /* Assume that the peer only understands protocol 80 until we know better. */
883 tconn->agreed_pro_version = 80;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700884
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200885 if (prepare_listen_socket(tconn, &ad))
886 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700887
888 do {
Andreas Gruenbacher2bf89622011-03-28 16:33:12 +0200889 struct socket *s;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700890
Philipp Reisner92f14952012-08-01 11:41:01 +0200891 s = drbd_try_connect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700892 if (s) {
Philipp Reisner7da35862011-12-19 22:42:56 +0100893 if (!sock.socket) {
894 sock.socket = s;
895 send_first_packet(tconn, &sock, P_INITIAL_DATA);
896 } else if (!msock.socket) {
Lars Ellenberg427c0432012-08-01 12:43:01 +0200897 clear_bit(RESOLVE_CONFLICTS, &tconn->flags);
Philipp Reisner7da35862011-12-19 22:42:56 +0100898 msock.socket = s;
899 send_first_packet(tconn, &msock, P_INITIAL_META);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700900 } else {
Philipp Reisner81fa2e62011-05-04 15:10:30 +0200901 conn_err(tconn, "Logic error in conn_connect()\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700902 goto out_release_sockets;
903 }
904 }
905
Philipp Reisner7da35862011-12-19 22:42:56 +0100906 if (sock.socket && msock.socket) {
907 rcu_read_lock();
908 nc = rcu_dereference(tconn->net_conf);
909 timeout = nc->ping_timeo * HZ / 10;
910 rcu_read_unlock();
911 schedule_timeout_interruptible(timeout);
912 ok = drbd_socket_okay(&sock.socket);
913 ok = drbd_socket_okay(&msock.socket) && ok;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700914 if (ok)
915 break;
916 }
917
918retry:
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200919 s = drbd_wait_for_connect(tconn, &ad);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700920 if (s) {
Philipp Reisner92f14952012-08-01 11:41:01 +0200921 int fp = receive_first_packet(tconn, s);
Philipp Reisner7da35862011-12-19 22:42:56 +0100922 drbd_socket_okay(&sock.socket);
923 drbd_socket_okay(&msock.socket);
Philipp Reisner92f14952012-08-01 11:41:01 +0200924 switch (fp) {
Andreas Gruenbachere5d6f332011-03-28 16:44:40 +0200925 case P_INITIAL_DATA:
Philipp Reisner7da35862011-12-19 22:42:56 +0100926 if (sock.socket) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100927 conn_warn(tconn, "initial packet S crossed\n");
Philipp Reisner7da35862011-12-19 22:42:56 +0100928 sock_release(sock.socket);
Philipp Reisner80c6eed2012-08-01 14:53:39 +0200929 sock.socket = s;
930 goto randomize;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700931 }
Philipp Reisner7da35862011-12-19 22:42:56 +0100932 sock.socket = s;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700933 break;
Andreas Gruenbachere5d6f332011-03-28 16:44:40 +0200934 case P_INITIAL_META:
Lars Ellenberg427c0432012-08-01 12:43:01 +0200935 set_bit(RESOLVE_CONFLICTS, &tconn->flags);
Philipp Reisner7da35862011-12-19 22:42:56 +0100936 if (msock.socket) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100937 conn_warn(tconn, "initial packet M crossed\n");
Philipp Reisner7da35862011-12-19 22:42:56 +0100938 sock_release(msock.socket);
Philipp Reisner80c6eed2012-08-01 14:53:39 +0200939 msock.socket = s;
940 goto randomize;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700941 }
Philipp Reisner7da35862011-12-19 22:42:56 +0100942 msock.socket = s;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700943 break;
944 default:
Philipp Reisner907599e2011-02-08 11:25:37 +0100945 conn_warn(tconn, "Error receiving initial packet\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700946 sock_release(s);
Philipp Reisner80c6eed2012-08-01 14:53:39 +0200947randomize:
Akinobu Mita38b682b22013-04-29 16:21:31 -0700948 if (prandom_u32() & 1)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700949 goto retry;
950 }
951 }
952
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100953 if (tconn->cstate <= C_DISCONNECTING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700954 goto out_release_sockets;
955 if (signal_pending(current)) {
956 flush_signals(current);
957 smp_rmb();
Philipp Reisner907599e2011-02-08 11:25:37 +0100958 if (get_t_state(&tconn->receiver) == EXITING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700959 goto out_release_sockets;
960 }
961
Philipp Reisnerb666dbf2012-07-26 14:12:59 +0200962 ok = drbd_socket_okay(&sock.socket);
963 ok = drbd_socket_okay(&msock.socket) && ok;
964 } while (!ok);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700965
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200966 if (ad.s_listen)
967 sock_release(ad.s_listen);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700968
Philipp Reisner98683652012-11-09 14:18:43 +0100969 sock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
970 msock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700971
Philipp Reisner7da35862011-12-19 22:42:56 +0100972 sock.socket->sk->sk_allocation = GFP_NOIO;
973 msock.socket->sk->sk_allocation = GFP_NOIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700974
Philipp Reisner7da35862011-12-19 22:42:56 +0100975 sock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
976 msock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700977
Philipp Reisnerb411b362009-09-25 16:07:19 -0700978 /* NOT YET ...
Philipp Reisner7da35862011-12-19 22:42:56 +0100979 * sock.socket->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10;
980 * sock.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
Andreas Gruenbacher60381782011-03-28 17:05:50 +0200981 * first set it to the P_CONNECTION_FEATURES timeout,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700982 * which we set to 4x the configured ping_timeout. */
Philipp Reisner44ed1672011-04-19 17:10:19 +0200983 rcu_read_lock();
984 nc = rcu_dereference(tconn->net_conf);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700985
Philipp Reisner7da35862011-12-19 22:42:56 +0100986 sock.socket->sk->sk_sndtimeo =
987 sock.socket->sk->sk_rcvtimeo = nc->ping_timeo*4*HZ/10;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200988
Philipp Reisner7da35862011-12-19 22:42:56 +0100989 msock.socket->sk->sk_rcvtimeo = nc->ping_int*HZ;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200990 timeout = nc->timeout * HZ / 10;
Philipp Reisner08b165b2011-09-05 16:22:33 +0200991 discard_my_data = nc->discard_my_data;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200992 rcu_read_unlock();
993
Philipp Reisner7da35862011-12-19 22:42:56 +0100994 msock.socket->sk->sk_sndtimeo = timeout;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700995
996 /* we don't want delays.
Lucas De Marchi25985ed2011-03-30 22:57:33 -0300997 * we use TCP_CORK where appropriate, though */
Philipp Reisner7da35862011-12-19 22:42:56 +0100998 drbd_tcp_nodelay(sock.socket);
999 drbd_tcp_nodelay(msock.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001000
Philipp Reisner7da35862011-12-19 22:42:56 +01001001 tconn->data.socket = sock.socket;
1002 tconn->meta.socket = msock.socket;
Philipp Reisner907599e2011-02-08 11:25:37 +01001003 tconn->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001004
Andreas Gruenbacher60381782011-03-28 17:05:50 +02001005 h = drbd_do_features(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001006 if (h <= 0)
1007 return h;
1008
Philipp Reisner907599e2011-02-08 11:25:37 +01001009 if (tconn->cram_hmac_tfm) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001010 /* drbd_request_state(mdev, NS(conn, WFAuth)); */
Philipp Reisner907599e2011-02-08 11:25:37 +01001011 switch (drbd_do_auth(tconn)) {
Johannes Thomab10d96c2010-01-07 16:02:50 +01001012 case -1:
Philipp Reisner907599e2011-02-08 11:25:37 +01001013 conn_err(tconn, "Authentication of peer failed\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001014 return -1;
Johannes Thomab10d96c2010-01-07 16:02:50 +01001015 case 0:
Philipp Reisner907599e2011-02-08 11:25:37 +01001016 conn_err(tconn, "Authentication of peer failed, trying again.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01001017 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001018 }
1019 }
1020
Philipp Reisner7da35862011-12-19 22:42:56 +01001021 tconn->data.socket->sk->sk_sndtimeo = timeout;
1022 tconn->data.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001023
Andreas Gruenbacher387eb302011-03-16 01:05:37 +01001024 if (drbd_send_protocol(tconn) == -EOPNOTSUPP)
Philipp Reisner7e2455c2010-04-22 14:50:23 +02001025 return -1;
Philipp Reisner1e86ac42011-08-04 10:33:08 +02001026
Philipp Reisnera1096a62012-04-06 12:07:34 +02001027 set_bit(STATE_SENT, &tconn->flags);
Philipp Reisner197296f2012-03-26 16:47:11 +02001028
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02001029 rcu_read_lock();
1030 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
1031 kref_get(&mdev->kref);
Andreas Gruenbacher26ea8f92013-06-25 16:50:03 +02001032 rcu_read_unlock();
1033
Philipp Reisner13c76ab2012-11-22 17:06:00 +01001034 /* Prevent a race between resync-handshake and
1035 * being promoted to Primary.
1036 *
1037 * Grab and release the state mutex, so we know that any current
1038 * drbd_set_role() is finished, and any incoming drbd_set_role
1039 * will see the STATE_SENT flag, and wait for it to be cleared.
1040 */
1041 mutex_lock(mdev->state_mutex);
1042 mutex_unlock(mdev->state_mutex);
1043
Philipp Reisner08b165b2011-09-05 16:22:33 +02001044 if (discard_my_data)
1045 set_bit(DISCARD_MY_DATA, &mdev->flags);
1046 else
1047 clear_bit(DISCARD_MY_DATA, &mdev->flags);
1048
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02001049 drbd_connected(mdev);
1050 kref_put(&mdev->kref, &drbd_minor_destroy);
1051 rcu_read_lock();
1052 }
1053 rcu_read_unlock();
1054
Philipp Reisnera1096a62012-04-06 12:07:34 +02001055 rv = conn_request_state(tconn, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE);
Lars Ellenberged635cb02012-11-05 11:54:30 +01001056 if (rv < SS_SUCCESS || tconn->cstate != C_WF_REPORT_PARAMS) {
Philipp Reisnera1096a62012-04-06 12:07:34 +02001057 clear_bit(STATE_SENT, &tconn->flags);
Philipp Reisner1e86ac42011-08-04 10:33:08 +02001058 return 0;
Philipp Reisnera1096a62012-04-06 12:07:34 +02001059 }
Philipp Reisner1e86ac42011-08-04 10:33:08 +02001060
Philipp Reisner823bd832012-11-08 15:04:36 +01001061 drbd_thread_start(&tconn->asender);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001062
Philipp Reisner08b165b2011-09-05 16:22:33 +02001063 mutex_lock(&tconn->conf_update);
1064 /* The discard_my_data flag is a single-shot modifier to the next
1065 * connection attempt, the handshake of which is now well underway.
1066 * No need for rcu style copying of the whole struct
1067 * just to clear a single value. */
1068 tconn->net_conf->discard_my_data = 0;
1069 mutex_unlock(&tconn->conf_update);
1070
Philipp Reisnerd3fcb492011-04-13 14:46:05 -07001071 return h;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001072
1073out_release_sockets:
Philipp Reisner7a426fd2012-07-12 14:22:37 +02001074 if (ad.s_listen)
1075 sock_release(ad.s_listen);
Philipp Reisner7da35862011-12-19 22:42:56 +01001076 if (sock.socket)
1077 sock_release(sock.socket);
1078 if (msock.socket)
1079 sock_release(msock.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001080 return -1;
1081}
1082
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001083static int decode_header(struct drbd_tconn *tconn, void *header, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001084{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001085 unsigned int header_size = drbd_header_size(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001086
Andreas Gruenbacher0c8e36d2011-03-30 16:00:17 +02001087 if (header_size == sizeof(struct p_header100) &&
1088 *(__be32 *)header == cpu_to_be32(DRBD_MAGIC_100)) {
1089 struct p_header100 *h = header;
1090 if (h->pad != 0) {
1091 conn_err(tconn, "Header padding is not zero\n");
1092 return -EINVAL;
1093 }
1094 pi->vnr = be16_to_cpu(h->volume);
1095 pi->cmd = be16_to_cpu(h->command);
1096 pi->size = be32_to_cpu(h->length);
1097 } else if (header_size == sizeof(struct p_header95) &&
1098 *(__be16 *)header == cpu_to_be16(DRBD_MAGIC_BIG)) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001099 struct p_header95 *h = header;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001100 pi->cmd = be16_to_cpu(h->command);
Andreas Gruenbacherb55d84b2011-03-22 13:17:47 +01001101 pi->size = be32_to_cpu(h->length);
1102 pi->vnr = 0;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001103 } else if (header_size == sizeof(struct p_header80) &&
1104 *(__be32 *)header == cpu_to_be32(DRBD_MAGIC)) {
1105 struct p_header80 *h = header;
1106 pi->cmd = be16_to_cpu(h->command);
1107 pi->size = be16_to_cpu(h->length);
Philipp Reisner77351055b2011-02-07 17:24:26 +01001108 pi->vnr = 0;
Philipp Reisner02918be2010-08-20 14:35:10 +02001109 } else {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001110 conn_err(tconn, "Wrong magic value 0x%08x in protocol version %d\n",
1111 be32_to_cpu(*(__be32 *)header),
1112 tconn->agreed_pro_version);
Andreas Gruenbacher8172f3e2011-03-16 17:22:39 +01001113 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001114 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001115 pi->data = header + header_size;
Andreas Gruenbacher8172f3e2011-03-16 17:22:39 +01001116 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001117}
1118
Philipp Reisner9ba7aa02011-02-07 17:32:41 +01001119static int drbd_recv_header(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisner257d0af2011-01-26 12:15:29 +01001120{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001121 void *buffer = tconn->data.rbuf;
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001122 int err;
Philipp Reisner257d0af2011-01-26 12:15:29 +01001123
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001124 err = drbd_recv_all_warn(tconn, buffer, drbd_header_size(tconn));
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001125 if (err)
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001126 return err;
Philipp Reisner257d0af2011-01-26 12:15:29 +01001127
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001128 err = decode_header(tconn, buffer, pi);
Philipp Reisner9ba7aa02011-02-07 17:32:41 +01001129 tconn->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001130
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001131 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001132}
1133
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001134static void drbd_flush(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001135{
1136 int rv;
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001137 struct drbd_conf *mdev;
1138 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001139
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001140 if (tconn->write_ordering >= WO_bdev_flush) {
Lars Ellenberg615e0872011-11-17 14:32:12 +01001141 rcu_read_lock();
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001142 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
Lars Ellenberg615e0872011-11-17 14:32:12 +01001143 if (!get_ldev(mdev))
1144 continue;
1145 kref_get(&mdev->kref);
1146 rcu_read_unlock();
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001147
Lars Ellenberg615e0872011-11-17 14:32:12 +01001148 rv = blkdev_issue_flush(mdev->ldev->backing_bdev,
1149 GFP_NOIO, NULL);
1150 if (rv) {
1151 dev_info(DEV, "local disk flush failed with status %d\n", rv);
1152 /* would rather check on EOPNOTSUPP, but that is not reliable.
1153 * don't try again for ANY return value != 0
1154 * if (rv == -EOPNOTSUPP) */
1155 drbd_bump_write_ordering(tconn, WO_drain_io);
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001156 }
Lars Ellenberg615e0872011-11-17 14:32:12 +01001157 put_ldev(mdev);
1158 kref_put(&mdev->kref, &drbd_minor_destroy);
1159
1160 rcu_read_lock();
1161 if (rv)
1162 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001163 }
Lars Ellenberg615e0872011-11-17 14:32:12 +01001164 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07001165 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001166}
1167
1168/**
1169 * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it.
1170 * @mdev: DRBD device.
1171 * @epoch: Epoch object.
1172 * @ev: Epoch event.
1173 */
Philipp Reisner1e9dd292011-11-10 15:14:53 +01001174static enum finish_epoch drbd_may_finish_epoch(struct drbd_tconn *tconn,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001175 struct drbd_epoch *epoch,
1176 enum epoch_event ev)
1177{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001178 int epoch_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001179 struct drbd_epoch *next_epoch;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001180 enum finish_epoch rv = FE_STILL_LIVE;
1181
Philipp Reisner12038a32011-11-09 19:18:00 +01001182 spin_lock(&tconn->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001183 do {
1184 next_epoch = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001185
1186 epoch_size = atomic_read(&epoch->epoch_size);
1187
1188 switch (ev & ~EV_CLEANUP) {
1189 case EV_PUT:
1190 atomic_dec(&epoch->active);
1191 break;
1192 case EV_GOT_BARRIER_NR:
1193 set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001194 break;
1195 case EV_BECAME_LAST:
1196 /* nothing to do*/
1197 break;
1198 }
1199
Philipp Reisnerb411b362009-09-25 16:07:19 -07001200 if (epoch_size != 0 &&
1201 atomic_read(&epoch->active) == 0 &&
Philipp Reisner80f9fd52011-07-18 15:45:15 +02001202 (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) || ev & EV_CLEANUP)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001203 if (!(ev & EV_CLEANUP)) {
Philipp Reisner12038a32011-11-09 19:18:00 +01001204 spin_unlock(&tconn->epoch_lock);
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001205 drbd_send_b_ack(epoch->tconn, epoch->barrier_nr, epoch_size);
Philipp Reisner12038a32011-11-09 19:18:00 +01001206 spin_lock(&tconn->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001207 }
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001208#if 0
1209 /* FIXME: dec unacked on connection, once we have
1210 * something to count pending connection packets in. */
Philipp Reisner80f9fd52011-07-18 15:45:15 +02001211 if (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags))
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001212 dec_unacked(epoch->tconn);
1213#endif
Philipp Reisnerb411b362009-09-25 16:07:19 -07001214
Philipp Reisner12038a32011-11-09 19:18:00 +01001215 if (tconn->current_epoch != epoch) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001216 next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
1217 list_del(&epoch->list);
1218 ev = EV_BECAME_LAST | (ev & EV_CLEANUP);
Philipp Reisner12038a32011-11-09 19:18:00 +01001219 tconn->epochs--;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001220 kfree(epoch);
1221
1222 if (rv == FE_STILL_LIVE)
1223 rv = FE_DESTROYED;
1224 } else {
1225 epoch->flags = 0;
1226 atomic_set(&epoch->epoch_size, 0);
Uwe Kleine-König698f9312010-07-02 20:41:51 +02001227 /* atomic_set(&epoch->active, 0); is already zero */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001228 if (rv == FE_STILL_LIVE)
1229 rv = FE_RECYCLED;
1230 }
1231 }
1232
1233 if (!next_epoch)
1234 break;
1235
1236 epoch = next_epoch;
1237 } while (1);
1238
Philipp Reisner12038a32011-11-09 19:18:00 +01001239 spin_unlock(&tconn->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001240
Philipp Reisnerb411b362009-09-25 16:07:19 -07001241 return rv;
1242}
1243
1244/**
1245 * drbd_bump_write_ordering() - Fall back to an other write ordering method
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001246 * @tconn: DRBD connection.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001247 * @wo: Write ordering method to try.
1248 */
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001249void drbd_bump_write_ordering(struct drbd_tconn *tconn, enum write_ordering_e wo)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001250{
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001251 struct disk_conf *dc;
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001252 struct drbd_conf *mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001253 enum write_ordering_e pwo;
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001254 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001255 static char *write_ordering_str[] = {
1256 [WO_none] = "none",
1257 [WO_drain_io] = "drain",
1258 [WO_bdev_flush] = "flush",
Philipp Reisnerb411b362009-09-25 16:07:19 -07001259 };
1260
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001261 pwo = tconn->write_ordering;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001262 wo = min(pwo, wo);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001263 rcu_read_lock();
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001264 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
Philipp Reisner27eb13e2012-03-30 14:12:15 +02001265 if (!get_ldev_if_state(mdev, D_ATTACHING))
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001266 continue;
1267 dc = rcu_dereference(mdev->ldev->disk_conf);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001268
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001269 if (wo == WO_bdev_flush && !dc->disk_flushes)
1270 wo = WO_drain_io;
1271 if (wo == WO_drain_io && !dc->disk_drain)
1272 wo = WO_none;
1273 put_ldev(mdev);
1274 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001275 rcu_read_unlock();
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001276 tconn->write_ordering = wo;
1277 if (pwo != tconn->write_ordering || wo == WO_bdev_flush)
1278 conn_info(tconn, "Method to ensure write ordering: %s\n", write_ordering_str[tconn->write_ordering]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001279}
1280
1281/**
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001282 * drbd_submit_peer_request()
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001283 * @mdev: DRBD device.
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001284 * @peer_req: peer request
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001285 * @rw: flag field, see bio->bi_rw
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001286 *
1287 * May spread the pages to multiple bios,
1288 * depending on bio_add_page restrictions.
1289 *
1290 * Returns 0 if all bios have been submitted,
1291 * -ENOMEM if we could not allocate enough bios,
1292 * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a
1293 * single page to an empty bio (which should never happen and likely indicates
1294 * that the lower level IO stack is in some way broken). This has been observed
1295 * on certain Xen deployments.
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001296 */
1297/* TODO allocate from our own bio_set. */
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001298int drbd_submit_peer_request(struct drbd_conf *mdev,
1299 struct drbd_peer_request *peer_req,
1300 const unsigned rw, const int fault_type)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001301{
1302 struct bio *bios = NULL;
1303 struct bio *bio;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001304 struct page *page = peer_req->pages;
1305 sector_t sector = peer_req->i.sector;
1306 unsigned ds = peer_req->i.size;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001307 unsigned n_bios = 0;
1308 unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT;
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001309 int err = -ENOMEM;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001310
1311 /* In most cases, we will only need one bio. But in case the lower
1312 * level restrictions happen to be different at this offset on this
1313 * side than those of the sending peer, we may need to submit the
Lars Ellenberg9476f392011-02-23 17:02:01 +01001314 * request in more than one bio.
1315 *
1316 * Plain bio_alloc is good enough here, this is no DRBD internally
1317 * generated bio, but a bio allocated on behalf of the peer.
1318 */
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001319next_bio:
1320 bio = bio_alloc(GFP_NOIO, nr_pages);
1321 if (!bio) {
1322 dev_err(DEV, "submit_ee: Allocation of a bio failed\n");
1323 goto fail;
1324 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001325 /* > peer_req->i.sector, unless this is the first bio */
Kent Overstreet4f024f32013-10-11 15:44:27 -07001326 bio->bi_iter.bi_sector = sector;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001327 bio->bi_bdev = mdev->ldev->backing_bdev;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001328 bio->bi_rw = rw;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001329 bio->bi_private = peer_req;
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001330 bio->bi_end_io = drbd_peer_request_endio;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001331
1332 bio->bi_next = bios;
1333 bios = bio;
1334 ++n_bios;
1335
1336 page_chain_for_each(page) {
1337 unsigned len = min_t(unsigned, ds, PAGE_SIZE);
1338 if (!bio_add_page(bio, page, len, 0)) {
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001339 /* A single page must always be possible!
1340 * But in case it fails anyways,
1341 * we deal with it, and complain (below). */
1342 if (bio->bi_vcnt == 0) {
1343 dev_err(DEV,
1344 "bio_add_page failed for len=%u, "
1345 "bi_vcnt=0 (bi_sector=%llu)\n",
Kent Overstreet4f024f32013-10-11 15:44:27 -07001346 len, (uint64_t)bio->bi_iter.bi_sector);
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001347 err = -ENOSPC;
1348 goto fail;
1349 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001350 goto next_bio;
1351 }
1352 ds -= len;
1353 sector += len >> 9;
1354 --nr_pages;
1355 }
1356 D_ASSERT(page == NULL);
1357 D_ASSERT(ds == 0);
1358
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001359 atomic_set(&peer_req->pending_bios, n_bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001360 do {
1361 bio = bios;
1362 bios = bios->bi_next;
1363 bio->bi_next = NULL;
1364
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001365 drbd_generic_make_request(mdev, fault_type, bio);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001366 } while (bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001367 return 0;
1368
1369fail:
1370 while (bios) {
1371 bio = bios;
1372 bios = bios->bi_next;
1373 bio_put(bio);
1374 }
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001375 return err;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001376}
1377
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001378static void drbd_remove_epoch_entry_interval(struct drbd_conf *mdev,
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001379 struct drbd_peer_request *peer_req)
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001380{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001381 struct drbd_interval *i = &peer_req->i;
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001382
1383 drbd_remove_interval(&mdev->write_requests, i);
1384 drbd_clear_interval(i);
1385
Andreas Gruenbacher6c852be2011-02-04 15:38:52 +01001386 /* Wake up any processes waiting for this peer request to complete. */
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001387 if (i->waiting)
1388 wake_up(&mdev->misc_wait);
1389}
1390
Philipp Reisner77fede52011-11-10 21:19:11 +01001391void conn_wait_active_ee_empty(struct drbd_tconn *tconn)
1392{
1393 struct drbd_conf *mdev;
1394 int vnr;
1395
1396 rcu_read_lock();
1397 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
1398 kref_get(&mdev->kref);
1399 rcu_read_unlock();
1400 drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
1401 kref_put(&mdev->kref, &drbd_minor_destroy);
1402 rcu_read_lock();
1403 }
1404 rcu_read_unlock();
1405}
1406
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001407static int receive_Barrier(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001408{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001409 int rv;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001410 struct p_barrier *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001411 struct drbd_epoch *epoch;
1412
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001413 /* FIXME these are unacked on connection,
1414 * not a specific (peer)device.
1415 */
Philipp Reisner12038a32011-11-09 19:18:00 +01001416 tconn->current_epoch->barrier_nr = p->barrier;
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001417 tconn->current_epoch->tconn = tconn;
Philipp Reisner1e9dd292011-11-10 15:14:53 +01001418 rv = drbd_may_finish_epoch(tconn, tconn->current_epoch, EV_GOT_BARRIER_NR);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001419
1420 /* P_BARRIER_ACK may imply that the corresponding extent is dropped from
1421 * the activity log, which means it would not be resynced in case the
1422 * R_PRIMARY crashes now.
1423 * Therefore we must send the barrier_ack after the barrier request was
1424 * completed. */
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001425 switch (tconn->write_ordering) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001426 case WO_none:
1427 if (rv == FE_RECYCLED)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001428 return 0;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001429
1430 /* receiver context, in the writeout path of the other node.
1431 * avoid potential distributed deadlock */
1432 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1433 if (epoch)
1434 break;
1435 else
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001436 conn_warn(tconn, "Allocation of an epoch failed, slowing down\n");
Philipp Reisner2451fc32010-08-24 13:43:11 +02001437 /* Fall through */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001438
1439 case WO_bdev_flush:
1440 case WO_drain_io:
Philipp Reisner77fede52011-11-10 21:19:11 +01001441 conn_wait_active_ee_empty(tconn);
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001442 drbd_flush(tconn);
Philipp Reisner2451fc32010-08-24 13:43:11 +02001443
Philipp Reisner12038a32011-11-09 19:18:00 +01001444 if (atomic_read(&tconn->current_epoch->epoch_size)) {
Philipp Reisner2451fc32010-08-24 13:43:11 +02001445 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1446 if (epoch)
1447 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001448 }
1449
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001450 return 0;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001451 default:
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001452 conn_err(tconn, "Strangeness in tconn->write_ordering %d\n", tconn->write_ordering);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001453 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001454 }
1455
1456 epoch->flags = 0;
1457 atomic_set(&epoch->epoch_size, 0);
1458 atomic_set(&epoch->active, 0);
1459
Philipp Reisner12038a32011-11-09 19:18:00 +01001460 spin_lock(&tconn->epoch_lock);
1461 if (atomic_read(&tconn->current_epoch->epoch_size)) {
1462 list_add(&epoch->list, &tconn->current_epoch->list);
1463 tconn->current_epoch = epoch;
1464 tconn->epochs++;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001465 } else {
1466 /* The current_epoch got recycled while we allocated this one... */
1467 kfree(epoch);
1468 }
Philipp Reisner12038a32011-11-09 19:18:00 +01001469 spin_unlock(&tconn->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001470
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001471 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001472}
1473
1474/* used from receive_RSDataReply (recv_resync_read)
1475 * and from receive_Data */
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +01001476static struct drbd_peer_request *
1477read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector,
1478 int data_size) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001479{
Lars Ellenberg66660322010-04-06 12:15:04 +02001480 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001481 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001482 struct page *page;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001483 int dgs, ds, err;
Philipp Reisnera0638452011-01-19 14:31:32 +01001484 void *dig_in = mdev->tconn->int_dig_in;
1485 void *dig_vv = mdev->tconn->int_dig_vv;
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001486 unsigned long *data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001487
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001488 dgs = 0;
1489 if (mdev->tconn->peer_integrity_tfm) {
1490 dgs = crypto_hash_digestsize(mdev->tconn->peer_integrity_tfm);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001491 /*
1492 * FIXME: Receive the incoming digest into the receive buffer
1493 * here, together with its struct p_data?
1494 */
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001495 err = drbd_recv_all_warn(mdev->tconn, dig_in, dgs);
1496 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001497 return NULL;
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001498 data_size -= dgs;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001499 }
1500
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01001501 if (!expect(IS_ALIGNED(data_size, 512)))
1502 return NULL;
1503 if (!expect(data_size <= DRBD_MAX_BIO_SIZE))
1504 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001505
Lars Ellenberg66660322010-04-06 12:15:04 +02001506 /* even though we trust out peer,
1507 * we sometimes have to double check. */
1508 if (sector + (data_size>>9) > capacity) {
Lars Ellenbergfdda6542011-01-24 15:11:01 +01001509 dev_err(DEV, "request from peer beyond end of local disk: "
1510 "capacity: %llus < sector: %llus + size: %u\n",
Lars Ellenberg66660322010-04-06 12:15:04 +02001511 (unsigned long long)capacity,
1512 (unsigned long long)sector, data_size);
1513 return NULL;
1514 }
1515
Philipp Reisnerb411b362009-09-25 16:07:19 -07001516 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
1517 * "criss-cross" setup, that might cause write-out on some other DRBD,
1518 * which in turn might block on the other node at this very place. */
Andreas Gruenbacher0db55362011-04-06 16:09:15 +02001519 peer_req = drbd_alloc_peer_req(mdev, id, sector, data_size, GFP_NOIO);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001520 if (!peer_req)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001521 return NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001522
Lars Ellenberga73ff322012-06-25 19:15:38 +02001523 if (!data_size)
Lars Ellenberg81a35372012-07-30 09:00:54 +02001524 return peer_req;
Lars Ellenberga73ff322012-06-25 19:15:38 +02001525
Philipp Reisnerb411b362009-09-25 16:07:19 -07001526 ds = data_size;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001527 page = peer_req->pages;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001528 page_chain_for_each(page) {
1529 unsigned len = min_t(int, ds, PAGE_SIZE);
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001530 data = kmap(page);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001531 err = drbd_recv_all_warn(mdev->tconn, data, len);
Andreas Gruenbacher0cf9d272010-12-07 10:43:29 +01001532 if (drbd_insert_fault(mdev, DRBD_FAULT_RECEIVE)) {
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001533 dev_err(DEV, "Fault injection: Corrupting data on receive\n");
1534 data[0] = data[0] ^ (unsigned long)-1;
1535 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001536 kunmap(page);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001537 if (err) {
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +02001538 drbd_free_peer_req(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001539 return NULL;
1540 }
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001541 ds -= len;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001542 }
1543
1544 if (dgs) {
Andreas Gruenbacher5b614ab2011-04-27 21:00:12 +02001545 drbd_csum_ee(mdev, mdev->tconn->peer_integrity_tfm, peer_req, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001546 if (memcmp(dig_in, dig_vv, dgs)) {
Lars Ellenberg470be442010-11-10 10:36:52 +01001547 dev_err(DEV, "Digest integrity check FAILED: %llus +%u\n",
1548 (unsigned long long)sector, data_size);
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +02001549 drbd_free_peer_req(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001550 return NULL;
1551 }
1552 }
1553 mdev->recv_cnt += data_size>>9;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001554 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001555}
1556
1557/* drbd_drain_block() just takes a data block
1558 * out of the socket input buffer, and discards it.
1559 */
1560static int drbd_drain_block(struct drbd_conf *mdev, int data_size)
1561{
1562 struct page *page;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001563 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001564 void *data;
1565
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001566 if (!data_size)
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001567 return 0;
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001568
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +02001569 page = drbd_alloc_pages(mdev, 1, 1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001570
1571 data = kmap(page);
1572 while (data_size) {
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001573 unsigned int len = min_t(int, data_size, PAGE_SIZE);
1574
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001575 err = drbd_recv_all_warn(mdev->tconn, data, len);
1576 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001577 break;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001578 data_size -= len;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001579 }
1580 kunmap(page);
Andreas Gruenbacher5cc287e2011-04-07 21:02:59 +02001581 drbd_free_pages(mdev, page, 0);
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001582 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001583}
1584
1585static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req,
1586 sector_t sector, int data_size)
1587{
Kent Overstreet79886132013-11-23 17:19:00 -08001588 struct bio_vec bvec;
1589 struct bvec_iter iter;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001590 struct bio *bio;
Kent Overstreet79886132013-11-23 17:19:00 -08001591 int dgs, err, expect;
Philipp Reisnera0638452011-01-19 14:31:32 +01001592 void *dig_in = mdev->tconn->int_dig_in;
1593 void *dig_vv = mdev->tconn->int_dig_vv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001594
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001595 dgs = 0;
1596 if (mdev->tconn->peer_integrity_tfm) {
1597 dgs = crypto_hash_digestsize(mdev->tconn->peer_integrity_tfm);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001598 err = drbd_recv_all_warn(mdev->tconn, dig_in, dgs);
1599 if (err)
1600 return err;
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001601 data_size -= dgs;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001602 }
1603
Philipp Reisnerb411b362009-09-25 16:07:19 -07001604 /* optimistically update recv_cnt. if receiving fails below,
1605 * we disconnect anyways, and counters will be reset. */
1606 mdev->recv_cnt += data_size>>9;
1607
1608 bio = req->master_bio;
Kent Overstreet4f024f32013-10-11 15:44:27 -07001609 D_ASSERT(sector == bio->bi_iter.bi_sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001610
Kent Overstreet79886132013-11-23 17:19:00 -08001611 bio_for_each_segment(bvec, bio, iter) {
1612 void *mapped = kmap(bvec.bv_page) + bvec.bv_offset;
1613 expect = min_t(int, data_size, bvec.bv_len);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001614 err = drbd_recv_all_warn(mdev->tconn, mapped, expect);
Kent Overstreet79886132013-11-23 17:19:00 -08001615 kunmap(bvec.bv_page);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001616 if (err)
1617 return err;
1618 data_size -= expect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001619 }
1620
1621 if (dgs) {
Andreas Gruenbacher5b614ab2011-04-27 21:00:12 +02001622 drbd_csum_bio(mdev, mdev->tconn->peer_integrity_tfm, bio, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001623 if (memcmp(dig_in, dig_vv, dgs)) {
1624 dev_err(DEV, "Digest integrity check FAILED. Broken NICs?\n");
Andreas Gruenbacher28284ce2011-03-16 17:54:02 +01001625 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001626 }
1627 }
1628
1629 D_ASSERT(data_size == 0);
Andreas Gruenbacher28284ce2011-03-16 17:54:02 +01001630 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001631}
1632
Andreas Gruenbachera990be42011-04-06 17:56:48 +02001633/*
1634 * e_end_resync_block() is called in asender context via
1635 * drbd_finish_peer_reqs().
1636 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001637static int e_end_resync_block(struct drbd_work *w, int unused)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001638{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001639 struct drbd_peer_request *peer_req =
1640 container_of(w, struct drbd_peer_request, w);
Philipp Reisner00d56942011-02-09 18:09:48 +01001641 struct drbd_conf *mdev = w->mdev;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001642 sector_t sector = peer_req->i.sector;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001643 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001644
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001645 D_ASSERT(drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001646
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001647 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
1648 drbd_set_in_sync(mdev, sector, peer_req->i.size);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001649 err = drbd_send_ack(mdev, P_RS_WRITE_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001650 } else {
1651 /* Record failure to sync */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001652 drbd_rs_failed_io(mdev, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001653
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001654 err = drbd_send_ack(mdev, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001655 }
1656 dec_unacked(mdev);
1657
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001658 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001659}
1660
1661static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_size) __releases(local)
1662{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001663 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001664
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001665 peer_req = read_in_block(mdev, ID_SYNCER, sector, data_size);
1666 if (!peer_req)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001667 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001668
1669 dec_rs_pending(mdev);
1670
Philipp Reisnerb411b362009-09-25 16:07:19 -07001671 inc_unacked(mdev);
1672 /* corresponding dec_unacked() in e_end_resync_block()
1673 * respective _drbd_clear_done_ee */
1674
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001675 peer_req->w.cb = e_end_resync_block;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001676
Philipp Reisner87eeee42011-01-19 14:16:30 +01001677 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001678 list_add(&peer_req->w.list, &mdev->sync_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001679 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001680
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001681 atomic_add(data_size >> 9, &mdev->rs_sect_ev);
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001682 if (drbd_submit_peer_request(mdev, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0)
Andreas Gruenbachere1c1b0f2011-03-16 17:58:27 +01001683 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001684
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001685 /* don't care for the reason here */
1686 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01001687 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001688 list_del(&peer_req->w.list);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001689 spin_unlock_irq(&mdev->tconn->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02001690
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +02001691 drbd_free_peer_req(mdev, peer_req);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001692fail:
1693 put_ldev(mdev);
Andreas Gruenbachere1c1b0f2011-03-16 17:58:27 +01001694 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001695}
1696
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001697static struct drbd_request *
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001698find_request(struct drbd_conf *mdev, struct rb_root *root, u64 id,
1699 sector_t sector, bool missing_ok, const char *func)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001700{
1701 struct drbd_request *req;
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001702
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001703 /* Request object according to our peer */
1704 req = (struct drbd_request *)(unsigned long)id;
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001705 if (drbd_contains_interval(root, sector, &req->i) && req->i.local)
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001706 return req;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001707 if (!missing_ok) {
Andreas Gruenbacher5af172e2011-07-15 09:43:23 +02001708 dev_err(DEV, "%s: failed to find request 0x%lx, sector %llus\n", func,
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001709 (unsigned long)id, (unsigned long long)sector);
1710 }
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001711 return NULL;
1712}
1713
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001714static int receive_DataReply(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001715{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001716 struct drbd_conf *mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001717 struct drbd_request *req;
1718 sector_t sector;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001719 int err;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001720 struct p_data *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001721
1722 mdev = vnr_to_mdev(tconn, pi->vnr);
1723 if (!mdev)
1724 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001725
1726 sector = be64_to_cpu(p->sector);
1727
Philipp Reisner87eeee42011-01-19 14:16:30 +01001728 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001729 req = find_request(mdev, &mdev->read_requests, p->block_id, sector, false, __func__);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001730 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001731 if (unlikely(!req))
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001732 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001733
Bart Van Assche24c48302011-05-21 18:32:29 +02001734 /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid
Philipp Reisnerb411b362009-09-25 16:07:19 -07001735 * special casing it there for the various failure cases.
1736 * still no race with drbd_fail_pending_reads */
Andreas Gruenbachere2857212011-03-25 00:57:38 +01001737 err = recv_dless_read(mdev, req, sector, pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001738 if (!err)
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001739 req_mod(req, DATA_RECEIVED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001740 /* else: nothing. handled from drbd_disconnect...
1741 * I don't think we may complete this just yet
1742 * in case we are "on-disconnect: freeze" */
1743
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001744 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001745}
1746
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001747static int receive_RSDataReply(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001748{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001749 struct drbd_conf *mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001750 sector_t sector;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001751 int err;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001752 struct p_data *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001753
1754 mdev = vnr_to_mdev(tconn, pi->vnr);
1755 if (!mdev)
1756 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001757
1758 sector = be64_to_cpu(p->sector);
1759 D_ASSERT(p->block_id == ID_SYNCER);
1760
1761 if (get_ldev(mdev)) {
1762 /* data is submitted to disk within recv_resync_read.
1763 * corresponding put_ldev done below on error,
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001764 * or in drbd_peer_request_endio. */
Andreas Gruenbachere2857212011-03-25 00:57:38 +01001765 err = recv_resync_read(mdev, sector, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001766 } else {
1767 if (__ratelimit(&drbd_ratelimit_state))
1768 dev_err(DEV, "Can not write resync data to local disk.\n");
1769
Andreas Gruenbachere2857212011-03-25 00:57:38 +01001770 err = drbd_drain_block(mdev, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001771
Andreas Gruenbachere2857212011-03-25 00:57:38 +01001772 drbd_send_ack_dp(mdev, P_NEG_ACK, p, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001773 }
1774
Andreas Gruenbachere2857212011-03-25 00:57:38 +01001775 atomic_add(pi->size >> 9, &mdev->rs_sect_in);
Philipp Reisner778f2712010-07-06 11:14:00 +02001776
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001777 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001778}
1779
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001780static void restart_conflicting_writes(struct drbd_conf *mdev,
1781 sector_t sector, int size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001782{
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001783 struct drbd_interval *i;
1784 struct drbd_request *req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001785
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001786 drbd_for_each_overlap(i, &mdev->write_requests, sector, size) {
1787 if (!i->local)
1788 continue;
1789 req = container_of(i, struct drbd_request, i);
1790 if (req->rq_state & RQ_LOCAL_PENDING ||
1791 !(req->rq_state & RQ_POSTPONED))
1792 continue;
Lars Ellenberg2312f0b32011-11-24 10:36:25 +01001793 /* as it is RQ_POSTPONED, this will cause it to
1794 * be queued on the retry workqueue. */
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02001795 __req_mod(req, CONFLICT_RESOLVED, NULL);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001796 }
1797}
1798
Andreas Gruenbachera990be42011-04-06 17:56:48 +02001799/*
1800 * e_end_block() is called in asender context via drbd_finish_peer_reqs().
Philipp Reisnerb411b362009-09-25 16:07:19 -07001801 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001802static int e_end_block(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001803{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001804 struct drbd_peer_request *peer_req =
1805 container_of(w, struct drbd_peer_request, w);
Philipp Reisner00d56942011-02-09 18:09:48 +01001806 struct drbd_conf *mdev = w->mdev;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001807 sector_t sector = peer_req->i.sector;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001808 int err = 0, pcmd;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001809
Philipp Reisner303d1442011-04-13 16:24:47 -07001810 if (peer_req->flags & EE_SEND_WRITE_ACK) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001811 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001812 pcmd = (mdev->state.conn >= C_SYNC_SOURCE &&
1813 mdev->state.conn <= C_PAUSED_SYNC_T &&
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001814 peer_req->flags & EE_MAY_SET_IN_SYNC) ?
Philipp Reisnerb411b362009-09-25 16:07:19 -07001815 P_RS_WRITE_ACK : P_WRITE_ACK;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001816 err = drbd_send_ack(mdev, pcmd, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001817 if (pcmd == P_RS_WRITE_ACK)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001818 drbd_set_in_sync(mdev, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001819 } else {
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001820 err = drbd_send_ack(mdev, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001821 /* we expect it to be marked out of sync anyways...
1822 * maybe assert this? */
1823 }
1824 dec_unacked(mdev);
1825 }
1826 /* we delete from the conflict detection hash _after_ we sent out the
1827 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */
Philipp Reisner302bdea2011-04-21 11:36:49 +02001828 if (peer_req->flags & EE_IN_INTERVAL_TREE) {
Philipp Reisner87eeee42011-01-19 14:16:30 +01001829 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001830 D_ASSERT(!drbd_interval_empty(&peer_req->i));
1831 drbd_remove_epoch_entry_interval(mdev, peer_req);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001832 if (peer_req->flags & EE_RESTART_REQUESTS)
1833 restart_conflicting_writes(mdev, sector, peer_req->i.size);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001834 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +01001835 } else
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001836 D_ASSERT(drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001837
Philipp Reisner1e9dd292011-11-10 15:14:53 +01001838 drbd_may_finish_epoch(mdev->tconn, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001839
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001840 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001841}
1842
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001843static int e_send_ack(struct drbd_work *w, enum drbd_packet ack)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001844{
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001845 struct drbd_conf *mdev = w->mdev;
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001846 struct drbd_peer_request *peer_req =
1847 container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001848 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001849
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001850 err = drbd_send_ack(mdev, ack, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001851 dec_unacked(mdev);
1852
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001853 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001854}
1855
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02001856static int e_send_superseded(struct drbd_work *w, int unused)
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001857{
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02001858 return e_send_ack(w, P_SUPERSEDED);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001859}
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001860
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001861static int e_send_retry_write(struct drbd_work *w, int unused)
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001862{
1863 struct drbd_tconn *tconn = w->mdev->tconn;
1864
1865 return e_send_ack(w, tconn->agreed_pro_version >= 100 ?
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02001866 P_RETRY_WRITE : P_SUPERSEDED);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001867}
1868
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001869static bool seq_greater(u32 a, u32 b)
1870{
1871 /*
1872 * We assume 32-bit wrap-around here.
1873 * For 24-bit wrap-around, we would have to shift:
1874 * a <<= 8; b <<= 8;
1875 */
1876 return (s32)a - (s32)b > 0;
1877}
1878
1879static u32 seq_max(u32 a, u32 b)
1880{
1881 return seq_greater(a, b) ? a : b;
1882}
1883
Andreas Gruenbacher43ae0772011-02-03 18:42:08 +01001884static void update_peer_seq(struct drbd_conf *mdev, unsigned int peer_seq)
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001885{
Lars Ellenberg3c13b682011-02-23 16:10:01 +01001886 unsigned int newest_peer_seq;
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001887
Philipp Reisnerb874d232013-10-23 10:59:16 +02001888 if (test_bit(RESOLVE_CONFLICTS, &mdev->tconn->flags)) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001889 spin_lock(&mdev->peer_seq_lock);
Lars Ellenberg3c13b682011-02-23 16:10:01 +01001890 newest_peer_seq = seq_max(mdev->peer_seq, peer_seq);
1891 mdev->peer_seq = newest_peer_seq;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001892 spin_unlock(&mdev->peer_seq_lock);
Lars Ellenberg3c13b682011-02-23 16:10:01 +01001893 /* wake up only if we actually changed mdev->peer_seq */
1894 if (peer_seq == newest_peer_seq)
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001895 wake_up(&mdev->seq_wait);
1896 }
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001897}
1898
Lars Ellenbergd93f6302012-03-26 15:49:13 +02001899static inline int overlaps(sector_t s1, int l1, sector_t s2, int l2)
1900{
1901 return !((s1 + (l1>>9) <= s2) || (s1 >= s2 + (l2>>9)));
1902}
1903
1904/* maybe change sync_ee into interval trees as well? */
Philipp Reisner3ea35df2012-04-06 12:13:18 +02001905static bool overlapping_resync_write(struct drbd_conf *mdev, struct drbd_peer_request *peer_req)
Lars Ellenbergd93f6302012-03-26 15:49:13 +02001906{
1907 struct drbd_peer_request *rs_req;
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001908 bool rv = 0;
1909
Lars Ellenbergd93f6302012-03-26 15:49:13 +02001910 spin_lock_irq(&mdev->tconn->req_lock);
1911 list_for_each_entry(rs_req, &mdev->sync_ee, w.list) {
1912 if (overlaps(peer_req->i.sector, peer_req->i.size,
1913 rs_req->i.sector, rs_req->i.size)) {
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001914 rv = 1;
1915 break;
1916 }
1917 }
Lars Ellenbergd93f6302012-03-26 15:49:13 +02001918 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001919
1920 return rv;
1921}
1922
Philipp Reisnerb411b362009-09-25 16:07:19 -07001923/* Called from receive_Data.
1924 * Synchronize packets on sock with packets on msock.
1925 *
1926 * This is here so even when a P_DATA packet traveling via sock overtook an Ack
1927 * packet traveling on msock, they are still processed in the order they have
1928 * been sent.
1929 *
1930 * Note: we don't care for Ack packets overtaking P_DATA packets.
1931 *
1932 * In case packet_seq is larger than mdev->peer_seq number, there are
1933 * outstanding packets on the msock. We wait for them to arrive.
1934 * In case we are the logically next packet, we update mdev->peer_seq
1935 * ourselves. Correctly handles 32bit wrap around.
1936 *
1937 * Assume we have a 10 GBit connection, that is about 1<<30 byte per second,
1938 * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds
1939 * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have
1940 * 1<<9 == 512 seconds aka ages for the 32bit wrap around...
1941 *
1942 * returns 0 if we may process the packet,
1943 * -ERESTARTSYS if we were interrupted (by disconnect signal). */
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001944static int wait_for_and_update_peer_seq(struct drbd_conf *mdev, const u32 peer_seq)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001945{
1946 DEFINE_WAIT(wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001947 long timeout;
Philipp Reisnerb874d232013-10-23 10:59:16 +02001948 int ret = 0, tp;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001949
Philipp Reisnerb874d232013-10-23 10:59:16 +02001950 if (!test_bit(RESOLVE_CONFLICTS, &mdev->tconn->flags))
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001951 return 0;
1952
Philipp Reisnerb411b362009-09-25 16:07:19 -07001953 spin_lock(&mdev->peer_seq_lock);
1954 for (;;) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001955 if (!seq_greater(peer_seq - 1, mdev->peer_seq)) {
1956 mdev->peer_seq = seq_max(mdev->peer_seq, peer_seq);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001957 break;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001958 }
Philipp Reisnerb874d232013-10-23 10:59:16 +02001959
Philipp Reisnerb411b362009-09-25 16:07:19 -07001960 if (signal_pending(current)) {
1961 ret = -ERESTARTSYS;
1962 break;
1963 }
Philipp Reisnerb874d232013-10-23 10:59:16 +02001964
1965 rcu_read_lock();
1966 tp = rcu_dereference(mdev->tconn->net_conf)->two_primaries;
1967 rcu_read_unlock();
1968
1969 if (!tp)
1970 break;
1971
1972 /* Only need to wait if two_primaries is enabled */
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001973 prepare_to_wait(&mdev->seq_wait, &wait, TASK_INTERRUPTIBLE);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001974 spin_unlock(&mdev->peer_seq_lock);
Philipp Reisner44ed1672011-04-19 17:10:19 +02001975 rcu_read_lock();
1976 timeout = rcu_dereference(mdev->tconn->net_conf)->ping_timeo*HZ/10;
1977 rcu_read_unlock();
Andreas Gruenbacher71b1c1e2011-03-01 15:40:43 +01001978 timeout = schedule_timeout(timeout);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001979 spin_lock(&mdev->peer_seq_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001980 if (!timeout) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001981 ret = -ETIMEDOUT;
Andreas Gruenbacher71b1c1e2011-03-01 15:40:43 +01001982 dev_err(DEV, "Timed out waiting for missing ack packets; disconnecting\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001983 break;
1984 }
1985 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001986 spin_unlock(&mdev->peer_seq_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001987 finish_wait(&mdev->seq_wait, &wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001988 return ret;
1989}
1990
Lars Ellenberg688593c2010-11-17 22:25:03 +01001991/* see also bio_flags_to_wire()
1992 * DRBD_REQ_*, because we need to semantically map the flags to data packet
1993 * flags and back. We may replicate to other kernel versions. */
1994static unsigned long wire_flags_to_bio(struct drbd_conf *mdev, u32 dpf)
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02001995{
Lars Ellenberg688593c2010-11-17 22:25:03 +01001996 return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
1997 (dpf & DP_FUA ? REQ_FUA : 0) |
1998 (dpf & DP_FLUSH ? REQ_FLUSH : 0) |
1999 (dpf & DP_DISCARD ? REQ_DISCARD : 0);
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02002000}
2001
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002002static void fail_postponed_requests(struct drbd_conf *mdev, sector_t sector,
2003 unsigned int size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002004{
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002005 struct drbd_interval *i;
2006
2007 repeat:
2008 drbd_for_each_overlap(i, &mdev->write_requests, sector, size) {
2009 struct drbd_request *req;
2010 struct bio_and_error m;
2011
2012 if (!i->local)
2013 continue;
2014 req = container_of(i, struct drbd_request, i);
2015 if (!(req->rq_state & RQ_POSTPONED))
2016 continue;
2017 req->rq_state &= ~RQ_POSTPONED;
2018 __req_mod(req, NEG_ACKED, &m);
2019 spin_unlock_irq(&mdev->tconn->req_lock);
2020 if (m.bio)
2021 complete_master_bio(mdev, &m);
2022 spin_lock_irq(&mdev->tconn->req_lock);
2023 goto repeat;
2024 }
2025}
2026
2027static int handle_write_conflicts(struct drbd_conf *mdev,
2028 struct drbd_peer_request *peer_req)
2029{
2030 struct drbd_tconn *tconn = mdev->tconn;
Lars Ellenberg427c0432012-08-01 12:43:01 +02002031 bool resolve_conflicts = test_bit(RESOLVE_CONFLICTS, &tconn->flags);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002032 sector_t sector = peer_req->i.sector;
2033 const unsigned int size = peer_req->i.size;
2034 struct drbd_interval *i;
2035 bool equal;
2036 int err;
2037
2038 /*
2039 * Inserting the peer request into the write_requests tree will prevent
2040 * new conflicting local requests from being added.
2041 */
2042 drbd_insert_interval(&mdev->write_requests, &peer_req->i);
2043
2044 repeat:
2045 drbd_for_each_overlap(i, &mdev->write_requests, sector, size) {
2046 if (i == &peer_req->i)
2047 continue;
2048
2049 if (!i->local) {
2050 /*
2051 * Our peer has sent a conflicting remote request; this
2052 * should not happen in a two-node setup. Wait for the
2053 * earlier peer request to complete.
2054 */
2055 err = drbd_wait_misc(mdev, i);
2056 if (err)
2057 goto out;
2058 goto repeat;
2059 }
2060
2061 equal = i->sector == sector && i->size == size;
2062 if (resolve_conflicts) {
2063 /*
2064 * If the peer request is fully contained within the
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002065 * overlapping request, it can be considered overwritten
2066 * and thus superseded; otherwise, it will be retried
2067 * once all overlapping requests have completed.
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002068 */
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002069 bool superseded = i->sector <= sector && i->sector +
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002070 (i->size >> 9) >= sector + (size >> 9);
2071
2072 if (!equal)
2073 dev_alert(DEV, "Concurrent writes detected: "
2074 "local=%llus +%u, remote=%llus +%u, "
2075 "assuming %s came first\n",
2076 (unsigned long long)i->sector, i->size,
2077 (unsigned long long)sector, size,
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002078 superseded ? "local" : "remote");
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002079
2080 inc_unacked(mdev);
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002081 peer_req->w.cb = superseded ? e_send_superseded :
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002082 e_send_retry_write;
2083 list_add_tail(&peer_req->w.list, &mdev->done_ee);
2084 wake_asender(mdev->tconn);
2085
2086 err = -ENOENT;
2087 goto out;
2088 } else {
2089 struct drbd_request *req =
2090 container_of(i, struct drbd_request, i);
2091
2092 if (!equal)
2093 dev_alert(DEV, "Concurrent writes detected: "
2094 "local=%llus +%u, remote=%llus +%u\n",
2095 (unsigned long long)i->sector, i->size,
2096 (unsigned long long)sector, size);
2097
2098 if (req->rq_state & RQ_LOCAL_PENDING ||
2099 !(req->rq_state & RQ_POSTPONED)) {
2100 /*
2101 * Wait for the node with the discard flag to
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002102 * decide if this request has been superseded
2103 * or needs to be retried.
2104 * Requests that have been superseded will
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002105 * disappear from the write_requests tree.
2106 *
2107 * In addition, wait for the conflicting
2108 * request to finish locally before submitting
2109 * the conflicting peer request.
2110 */
2111 err = drbd_wait_misc(mdev, &req->i);
2112 if (err) {
2113 _conn_request_state(mdev->tconn,
2114 NS(conn, C_TIMEOUT),
2115 CS_HARD);
2116 fail_postponed_requests(mdev, sector, size);
2117 goto out;
2118 }
2119 goto repeat;
2120 }
2121 /*
2122 * Remember to restart the conflicting requests after
2123 * the new peer request has completed.
2124 */
2125 peer_req->flags |= EE_RESTART_REQUESTS;
2126 }
2127 }
2128 err = 0;
2129
2130 out:
2131 if (err)
2132 drbd_remove_epoch_entry_interval(mdev, peer_req);
2133 return err;
2134}
2135
Philipp Reisnerb411b362009-09-25 16:07:19 -07002136/* mirrored write */
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002137static int receive_Data(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002138{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002139 struct drbd_conf *mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002140 sector_t sector;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002141 struct drbd_peer_request *peer_req;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02002142 struct p_data *p = pi->data;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002143 u32 peer_seq = be32_to_cpu(p->seq_num);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002144 int rw = WRITE;
2145 u32 dp_flags;
Philipp Reisner302bdea2011-04-21 11:36:49 +02002146 int err, tp;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002147
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002148 mdev = vnr_to_mdev(tconn, pi->vnr);
2149 if (!mdev)
2150 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002151
Philipp Reisnerb411b362009-09-25 16:07:19 -07002152 if (!get_ldev(mdev)) {
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002153 int err2;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002154
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002155 err = wait_for_and_update_peer_seq(mdev, peer_seq);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002156 drbd_send_ack_dp(mdev, P_NEG_ACK, p, pi->size);
Philipp Reisner12038a32011-11-09 19:18:00 +01002157 atomic_inc(&tconn->current_epoch->epoch_size);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002158 err2 = drbd_drain_block(mdev, pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002159 if (!err)
2160 err = err2;
2161 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002162 }
2163
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01002164 /*
2165 * Corresponding put_ldev done either below (on various errors), or in
2166 * drbd_peer_request_endio, if we successfully submit the data at the
2167 * end of this function.
2168 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002169
2170 sector = be64_to_cpu(p->sector);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002171 peer_req = read_in_block(mdev, p->block_id, sector, pi->size);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002172 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002173 put_ldev(mdev);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002174 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002175 }
2176
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002177 peer_req->w.cb = e_end_block;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002178
Lars Ellenberg688593c2010-11-17 22:25:03 +01002179 dp_flags = be32_to_cpu(p->dp_flags);
2180 rw |= wire_flags_to_bio(mdev, dp_flags);
Lars Ellenberg81a35372012-07-30 09:00:54 +02002181 if (peer_req->pages == NULL) {
2182 D_ASSERT(peer_req->i.size == 0);
Lars Ellenberga73ff322012-06-25 19:15:38 +02002183 D_ASSERT(dp_flags & DP_FLUSH);
2184 }
Lars Ellenberg688593c2010-11-17 22:25:03 +01002185
2186 if (dp_flags & DP_MAY_SET_IN_SYNC)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002187 peer_req->flags |= EE_MAY_SET_IN_SYNC;
Lars Ellenberg688593c2010-11-17 22:25:03 +01002188
Philipp Reisner12038a32011-11-09 19:18:00 +01002189 spin_lock(&tconn->epoch_lock);
2190 peer_req->epoch = tconn->current_epoch;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002191 atomic_inc(&peer_req->epoch->epoch_size);
2192 atomic_inc(&peer_req->epoch->active);
Philipp Reisner12038a32011-11-09 19:18:00 +01002193 spin_unlock(&tconn->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002194
Philipp Reisner302bdea2011-04-21 11:36:49 +02002195 rcu_read_lock();
2196 tp = rcu_dereference(mdev->tconn->net_conf)->two_primaries;
2197 rcu_read_unlock();
2198 if (tp) {
2199 peer_req->flags |= EE_IN_INTERVAL_TREE;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002200 err = wait_for_and_update_peer_seq(mdev, peer_seq);
2201 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002202 goto out_interrupted;
Philipp Reisner87eeee42011-01-19 14:16:30 +01002203 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002204 err = handle_write_conflicts(mdev, peer_req);
2205 if (err) {
2206 spin_unlock_irq(&mdev->tconn->req_lock);
2207 if (err == -ENOENT) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002208 put_ldev(mdev);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002209 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002210 }
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002211 goto out_interrupted;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002212 }
Philipp Reisnerb874d232013-10-23 10:59:16 +02002213 } else {
2214 update_peer_seq(mdev, peer_seq);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002215 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb874d232013-10-23 10:59:16 +02002216 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002217 list_add(&peer_req->w.list, &mdev->active_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002218 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002219
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01002220 if (mdev->state.conn == C_SYNC_TARGET)
Philipp Reisner3ea35df2012-04-06 12:13:18 +02002221 wait_event(mdev->ee_wait, !overlapping_resync_write(mdev, peer_req));
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01002222
Philipp Reisner303d1442011-04-13 16:24:47 -07002223 if (mdev->tconn->agreed_pro_version < 100) {
Philipp Reisner44ed1672011-04-19 17:10:19 +02002224 rcu_read_lock();
2225 switch (rcu_dereference(mdev->tconn->net_conf)->wire_protocol) {
Philipp Reisner303d1442011-04-13 16:24:47 -07002226 case DRBD_PROT_C:
2227 dp_flags |= DP_SEND_WRITE_ACK;
2228 break;
2229 case DRBD_PROT_B:
2230 dp_flags |= DP_SEND_RECEIVE_ACK;
2231 break;
2232 }
Philipp Reisner44ed1672011-04-19 17:10:19 +02002233 rcu_read_unlock();
Philipp Reisner303d1442011-04-13 16:24:47 -07002234 }
2235
2236 if (dp_flags & DP_SEND_WRITE_ACK) {
2237 peer_req->flags |= EE_SEND_WRITE_ACK;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002238 inc_unacked(mdev);
2239 /* corresponding dec_unacked() in e_end_block()
2240 * respective _drbd_clear_done_ee */
Philipp Reisner303d1442011-04-13 16:24:47 -07002241 }
2242
2243 if (dp_flags & DP_SEND_RECEIVE_ACK) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002244 /* I really don't like it that the receiver thread
2245 * sends on the msock, but anyways */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002246 drbd_send_ack(mdev, P_RECV_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002247 }
2248
Lars Ellenberg6719fb02010-10-18 23:04:07 +02002249 if (mdev->state.pdsk < D_INCONSISTENT) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002250 /* In case we have the only disk of the cluster, */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002251 drbd_set_out_of_sync(mdev, peer_req->i.sector, peer_req->i.size);
2252 peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
2253 peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
Lars Ellenberg56392d22013-03-19 18:16:48 +01002254 drbd_al_begin_io(mdev, &peer_req->i, true);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002255 }
2256
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002257 err = drbd_submit_peer_request(mdev, peer_req, rw, DRBD_FAULT_DT_WR);
2258 if (!err)
2259 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002260
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002261 /* don't care for the reason here */
2262 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01002263 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002264 list_del(&peer_req->w.list);
2265 drbd_remove_epoch_entry_interval(mdev, peer_req);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002266 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002267 if (peer_req->flags & EE_CALL_AL_COMPLETE_IO)
Lars Ellenberg181286a2011-03-31 15:18:56 +02002268 drbd_al_complete_io(mdev, &peer_req->i);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002269
Philipp Reisnerb411b362009-09-25 16:07:19 -07002270out_interrupted:
Philipp Reisner1e9dd292011-11-10 15:14:53 +01002271 drbd_may_finish_epoch(tconn, peer_req->epoch, EV_PUT + EV_CLEANUP);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002272 put_ldev(mdev);
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +02002273 drbd_free_peer_req(mdev, peer_req);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002274 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002275}
2276
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002277/* We may throttle resync, if the lower device seems to be busy,
2278 * and current sync rate is above c_min_rate.
2279 *
2280 * To decide whether or not the lower device is busy, we use a scheme similar
2281 * to MD RAID is_mddev_idle(): if the partition stats reveal "significant"
2282 * (more than 64 sectors) of activity we cannot account for with our own resync
2283 * activity, it obviously is "busy".
2284 *
2285 * The current sync rate used here uses only the most recent two step marks,
2286 * to have a short time average so we can react faster.
2287 */
Philipp Reisnere3555d82010-11-07 15:56:29 +01002288int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002289{
2290 struct gendisk *disk = mdev->ldev->backing_bdev->bd_contains->bd_disk;
2291 unsigned long db, dt, dbdt;
Philipp Reisnere3555d82010-11-07 15:56:29 +01002292 struct lc_element *tmp;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002293 int curr_events;
2294 int throttle = 0;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002295 unsigned int c_min_rate;
2296
2297 rcu_read_lock();
2298 c_min_rate = rcu_dereference(mdev->ldev->disk_conf)->c_min_rate;
2299 rcu_read_unlock();
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002300
2301 /* feature disabled? */
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002302 if (c_min_rate == 0)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002303 return 0;
2304
Philipp Reisnere3555d82010-11-07 15:56:29 +01002305 spin_lock_irq(&mdev->al_lock);
2306 tmp = lc_find(mdev->resync, BM_SECT_TO_EXT(sector));
2307 if (tmp) {
2308 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
2309 if (test_bit(BME_PRIORITY, &bm_ext->flags)) {
2310 spin_unlock_irq(&mdev->al_lock);
2311 return 0;
2312 }
2313 /* Do not slow down if app IO is already waiting for this extent */
2314 }
2315 spin_unlock_irq(&mdev->al_lock);
2316
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002317 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
2318 (int)part_stat_read(&disk->part0, sectors[1]) -
2319 atomic_read(&mdev->rs_sect_ev);
Philipp Reisnere3555d82010-11-07 15:56:29 +01002320
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002321 if (!mdev->rs_last_events || curr_events - mdev->rs_last_events > 64) {
2322 unsigned long rs_left;
2323 int i;
2324
2325 mdev->rs_last_events = curr_events;
2326
2327 /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
2328 * approx. */
Lars Ellenberg2649f082010-11-05 10:05:47 +01002329 i = (mdev->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
2330
2331 if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T)
2332 rs_left = mdev->ov_left;
2333 else
2334 rs_left = drbd_bm_total_weight(mdev) - mdev->rs_failed;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002335
2336 dt = ((long)jiffies - (long)mdev->rs_mark_time[i]) / HZ;
2337 if (!dt)
2338 dt++;
2339 db = mdev->rs_mark_left[i] - rs_left;
2340 dbdt = Bit2KB(db/dt);
2341
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002342 if (dbdt > c_min_rate)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002343 throttle = 1;
2344 }
2345 return throttle;
2346}
2347
2348
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002349static int receive_DataRequest(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002350{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002351 struct drbd_conf *mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002352 sector_t sector;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002353 sector_t capacity;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002354 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002355 struct digest_info *di = NULL;
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002356 int size, verb;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002357 unsigned int fault_type;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02002358 struct p_block_req *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002359
2360 mdev = vnr_to_mdev(tconn, pi->vnr);
2361 if (!mdev)
2362 return -EIO;
2363 capacity = drbd_get_capacity(mdev->this_bdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002364
2365 sector = be64_to_cpu(p->sector);
2366 size = be32_to_cpu(p->blksize);
2367
Andreas Gruenbacherc670a392011-02-21 12:41:39 +01002368 if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002369 dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2370 (unsigned long long)sector, size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002371 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002372 }
2373 if (sector + (size>>9) > capacity) {
2374 dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2375 (unsigned long long)sector, size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002376 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002377 }
2378
2379 if (!get_ldev_if_state(mdev, D_UP_TO_DATE)) {
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002380 verb = 1;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002381 switch (pi->cmd) {
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002382 case P_DATA_REQUEST:
2383 drbd_send_ack_rp(mdev, P_NEG_DREPLY, p);
2384 break;
2385 case P_RS_DATA_REQUEST:
2386 case P_CSUM_RS_REQUEST:
2387 case P_OV_REQUEST:
2388 drbd_send_ack_rp(mdev, P_NEG_RS_DREPLY , p);
2389 break;
2390 case P_OV_REPLY:
2391 verb = 0;
2392 dec_rs_pending(mdev);
2393 drbd_send_ack_ex(mdev, P_OV_RESULT, sector, size, ID_IN_SYNC);
2394 break;
2395 default:
Andreas Gruenbacher49ba9b12011-03-25 00:35:45 +01002396 BUG();
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002397 }
2398 if (verb && __ratelimit(&drbd_ratelimit_state))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002399 dev_err(DEV, "Can not satisfy peer's read request, "
2400 "no local data.\n");
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002401
Lars Ellenberga821cc42010-09-06 12:31:37 +02002402 /* drain possibly payload */
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002403 return drbd_drain_block(mdev, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002404 }
2405
2406 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
2407 * "criss-cross" setup, that might cause write-out on some other DRBD,
2408 * which in turn might block on the other node at this very place. */
Andreas Gruenbacher0db55362011-04-06 16:09:15 +02002409 peer_req = drbd_alloc_peer_req(mdev, p->block_id, sector, size, GFP_NOIO);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002410 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002411 put_ldev(mdev);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002412 return -ENOMEM;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002413 }
2414
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002415 switch (pi->cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002416 case P_DATA_REQUEST:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002417 peer_req->w.cb = w_e_end_data_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002418 fault_type = DRBD_FAULT_DT_RD;
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002419 /* application IO, don't drbd_rs_begin_io */
2420 goto submit;
2421
Philipp Reisnerb411b362009-09-25 16:07:19 -07002422 case P_RS_DATA_REQUEST:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002423 peer_req->w.cb = w_e_end_rsdata_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002424 fault_type = DRBD_FAULT_RS_RD;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002425 /* used in the sector offset progress display */
2426 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002427 break;
2428
2429 case P_OV_REPLY:
2430 case P_CSUM_RS_REQUEST:
2431 fault_type = DRBD_FAULT_RS_RD;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002432 di = kmalloc(sizeof(*di) + pi->size, GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002433 if (!di)
2434 goto out_free_e;
2435
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002436 di->digest_size = pi->size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002437 di->digest = (((char *)di)+sizeof(struct digest_info));
2438
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002439 peer_req->digest = di;
2440 peer_req->flags |= EE_HAS_DIGEST;
Lars Ellenbergc36c3ce2010-08-11 20:42:55 +02002441
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002442 if (drbd_recv_all(mdev->tconn, di->digest, pi->size))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002443 goto out_free_e;
2444
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002445 if (pi->cmd == P_CSUM_RS_REQUEST) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002446 D_ASSERT(mdev->tconn->agreed_pro_version >= 89);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002447 peer_req->w.cb = w_e_end_csum_rs_req;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002448 /* used in the sector offset progress display */
2449 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002450 } else if (pi->cmd == P_OV_REPLY) {
Lars Ellenberg2649f082010-11-05 10:05:47 +01002451 /* track progress, we may need to throttle */
2452 atomic_add(size >> 9, &mdev->rs_sect_in);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002453 peer_req->w.cb = w_e_end_ov_reply;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002454 dec_rs_pending(mdev);
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002455 /* drbd_rs_begin_io done when we sent this request,
2456 * but accounting still needs to be done. */
2457 goto submit_for_resync;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002458 }
2459 break;
2460
2461 case P_OV_REQUEST:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002462 if (mdev->ov_start_sector == ~(sector_t)0 &&
Philipp Reisner31890f42011-01-19 14:12:51 +01002463 mdev->tconn->agreed_pro_version >= 90) {
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002464 unsigned long now = jiffies;
2465 int i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002466 mdev->ov_start_sector = sector;
2467 mdev->ov_position = sector;
Lars Ellenberg30b743a2010-11-05 09:39:06 +01002468 mdev->ov_left = drbd_bm_bits(mdev) - BM_SECT_TO_BIT(sector);
2469 mdev->rs_total = mdev->ov_left;
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002470 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
2471 mdev->rs_mark_left[i] = mdev->ov_left;
2472 mdev->rs_mark_time[i] = now;
2473 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002474 dev_info(DEV, "Online Verify start sector: %llu\n",
2475 (unsigned long long)sector);
2476 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002477 peer_req->w.cb = w_e_end_ov_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002478 fault_type = DRBD_FAULT_RS_RD;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002479 break;
2480
Philipp Reisnerb411b362009-09-25 16:07:19 -07002481 default:
Andreas Gruenbacher49ba9b12011-03-25 00:35:45 +01002482 BUG();
Philipp Reisnerb411b362009-09-25 16:07:19 -07002483 }
2484
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002485 /* Throttle, drbd_rs_begin_io and submit should become asynchronous
2486 * wrt the receiver, but it is not as straightforward as it may seem.
2487 * Various places in the resync start and stop logic assume resync
2488 * requests are processed in order, requeuing this on the worker thread
2489 * introduces a bunch of new code for synchronization between threads.
2490 *
2491 * Unlimited throttling before drbd_rs_begin_io may stall the resync
2492 * "forever", throttling after drbd_rs_begin_io will lock that extent
2493 * for application writes for the same time. For now, just throttle
2494 * here, where the rest of the code expects the receiver to sleep for
2495 * a while, anyways.
2496 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002497
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002498 /* Throttle before drbd_rs_begin_io, as that locks out application IO;
2499 * this defers syncer requests for some time, before letting at least
2500 * on request through. The resync controller on the receiving side
2501 * will adapt to the incoming rate accordingly.
2502 *
2503 * We cannot throttle here if remote is Primary/SyncTarget:
2504 * we would also throttle its application reads.
2505 * In that case, throttling is done on the SyncTarget only.
2506 */
Philipp Reisnere3555d82010-11-07 15:56:29 +01002507 if (mdev->state.peer != R_PRIMARY && drbd_rs_should_slow_down(mdev, sector))
2508 schedule_timeout_uninterruptible(HZ/10);
2509 if (drbd_rs_begin_io(mdev, sector))
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002510 goto out_free_e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002511
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002512submit_for_resync:
2513 atomic_add(size >> 9, &mdev->rs_sect_ev);
2514
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002515submit:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002516 inc_unacked(mdev);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002517 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002518 list_add_tail(&peer_req->w.list, &mdev->read_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002519 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002520
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01002521 if (drbd_submit_peer_request(mdev, peer_req, READ, fault_type) == 0)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002522 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002523
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002524 /* don't care for the reason here */
2525 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01002526 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002527 list_del(&peer_req->w.list);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002528 spin_unlock_irq(&mdev->tconn->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002529 /* no drbd_rs_complete_io(), we are dropping the connection anyways */
2530
Philipp Reisnerb411b362009-09-25 16:07:19 -07002531out_free_e:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002532 put_ldev(mdev);
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +02002533 drbd_free_peer_req(mdev, peer_req);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002534 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002535}
2536
2537static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local)
2538{
2539 int self, peer, rv = -100;
2540 unsigned long ch_self, ch_peer;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002541 enum drbd_after_sb_p after_sb_0p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002542
2543 self = mdev->ldev->md.uuid[UI_BITMAP] & 1;
2544 peer = mdev->p_uuid[UI_BITMAP] & 1;
2545
2546 ch_peer = mdev->p_uuid[UI_SIZE];
2547 ch_self = mdev->comm_bm_set;
2548
Philipp Reisner44ed1672011-04-19 17:10:19 +02002549 rcu_read_lock();
2550 after_sb_0p = rcu_dereference(mdev->tconn->net_conf)->after_sb_0p;
2551 rcu_read_unlock();
2552 switch (after_sb_0p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002553 case ASB_CONSENSUS:
2554 case ASB_DISCARD_SECONDARY:
2555 case ASB_CALL_HELPER:
Philipp Reisner44ed1672011-04-19 17:10:19 +02002556 case ASB_VIOLENTLY:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002557 dev_err(DEV, "Configuration error.\n");
2558 break;
2559 case ASB_DISCONNECT:
2560 break;
2561 case ASB_DISCARD_YOUNGER_PRI:
2562 if (self == 0 && peer == 1) {
2563 rv = -1;
2564 break;
2565 }
2566 if (self == 1 && peer == 0) {
2567 rv = 1;
2568 break;
2569 }
2570 /* Else fall through to one of the other strategies... */
2571 case ASB_DISCARD_OLDER_PRI:
2572 if (self == 0 && peer == 1) {
2573 rv = 1;
2574 break;
2575 }
2576 if (self == 1 && peer == 0) {
2577 rv = -1;
2578 break;
2579 }
2580 /* Else fall through to one of the other strategies... */
Lars Ellenbergad19bf62009-10-14 09:36:49 +02002581 dev_warn(DEV, "Discard younger/older primary did not find a decision\n"
Philipp Reisnerb411b362009-09-25 16:07:19 -07002582 "Using discard-least-changes instead\n");
2583 case ASB_DISCARD_ZERO_CHG:
2584 if (ch_peer == 0 && ch_self == 0) {
Lars Ellenberg427c0432012-08-01 12:43:01 +02002585 rv = test_bit(RESOLVE_CONFLICTS, &mdev->tconn->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002586 ? -1 : 1;
2587 break;
2588 } else {
2589 if (ch_peer == 0) { rv = 1; break; }
2590 if (ch_self == 0) { rv = -1; break; }
2591 }
Philipp Reisner44ed1672011-04-19 17:10:19 +02002592 if (after_sb_0p == ASB_DISCARD_ZERO_CHG)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002593 break;
2594 case ASB_DISCARD_LEAST_CHG:
2595 if (ch_self < ch_peer)
2596 rv = -1;
2597 else if (ch_self > ch_peer)
2598 rv = 1;
2599 else /* ( ch_self == ch_peer ) */
2600 /* Well, then use something else. */
Lars Ellenberg427c0432012-08-01 12:43:01 +02002601 rv = test_bit(RESOLVE_CONFLICTS, &mdev->tconn->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002602 ? -1 : 1;
2603 break;
2604 case ASB_DISCARD_LOCAL:
2605 rv = -1;
2606 break;
2607 case ASB_DISCARD_REMOTE:
2608 rv = 1;
2609 }
2610
2611 return rv;
2612}
2613
2614static int drbd_asb_recover_1p(struct drbd_conf *mdev) __must_hold(local)
2615{
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002616 int hg, rv = -100;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002617 enum drbd_after_sb_p after_sb_1p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002618
Philipp Reisner44ed1672011-04-19 17:10:19 +02002619 rcu_read_lock();
2620 after_sb_1p = rcu_dereference(mdev->tconn->net_conf)->after_sb_1p;
2621 rcu_read_unlock();
2622 switch (after_sb_1p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002623 case ASB_DISCARD_YOUNGER_PRI:
2624 case ASB_DISCARD_OLDER_PRI:
2625 case ASB_DISCARD_LEAST_CHG:
2626 case ASB_DISCARD_LOCAL:
2627 case ASB_DISCARD_REMOTE:
Philipp Reisner44ed1672011-04-19 17:10:19 +02002628 case ASB_DISCARD_ZERO_CHG:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002629 dev_err(DEV, "Configuration error.\n");
2630 break;
2631 case ASB_DISCONNECT:
2632 break;
2633 case ASB_CONSENSUS:
2634 hg = drbd_asb_recover_0p(mdev);
2635 if (hg == -1 && mdev->state.role == R_SECONDARY)
2636 rv = hg;
2637 if (hg == 1 && mdev->state.role == R_PRIMARY)
2638 rv = hg;
2639 break;
2640 case ASB_VIOLENTLY:
2641 rv = drbd_asb_recover_0p(mdev);
2642 break;
2643 case ASB_DISCARD_SECONDARY:
2644 return mdev->state.role == R_PRIMARY ? 1 : -1;
2645 case ASB_CALL_HELPER:
2646 hg = drbd_asb_recover_0p(mdev);
2647 if (hg == -1 && mdev->state.role == R_PRIMARY) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002648 enum drbd_state_rv rv2;
2649
Philipp Reisnerb411b362009-09-25 16:07:19 -07002650 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2651 * we might be here in C_WF_REPORT_PARAMS which is transient.
2652 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002653 rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
2654 if (rv2 != SS_SUCCESS) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002655 drbd_khelper(mdev, "pri-lost-after-sb");
2656 } else {
2657 dev_warn(DEV, "Successfully gave up primary role.\n");
2658 rv = hg;
2659 }
2660 } else
2661 rv = hg;
2662 }
2663
2664 return rv;
2665}
2666
2667static int drbd_asb_recover_2p(struct drbd_conf *mdev) __must_hold(local)
2668{
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002669 int hg, rv = -100;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002670 enum drbd_after_sb_p after_sb_2p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002671
Philipp Reisner44ed1672011-04-19 17:10:19 +02002672 rcu_read_lock();
2673 after_sb_2p = rcu_dereference(mdev->tconn->net_conf)->after_sb_2p;
2674 rcu_read_unlock();
2675 switch (after_sb_2p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002676 case ASB_DISCARD_YOUNGER_PRI:
2677 case ASB_DISCARD_OLDER_PRI:
2678 case ASB_DISCARD_LEAST_CHG:
2679 case ASB_DISCARD_LOCAL:
2680 case ASB_DISCARD_REMOTE:
2681 case ASB_CONSENSUS:
2682 case ASB_DISCARD_SECONDARY:
Philipp Reisner44ed1672011-04-19 17:10:19 +02002683 case ASB_DISCARD_ZERO_CHG:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002684 dev_err(DEV, "Configuration error.\n");
2685 break;
2686 case ASB_VIOLENTLY:
2687 rv = drbd_asb_recover_0p(mdev);
2688 break;
2689 case ASB_DISCONNECT:
2690 break;
2691 case ASB_CALL_HELPER:
2692 hg = drbd_asb_recover_0p(mdev);
2693 if (hg == -1) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002694 enum drbd_state_rv rv2;
2695
Philipp Reisnerb411b362009-09-25 16:07:19 -07002696 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2697 * we might be here in C_WF_REPORT_PARAMS which is transient.
2698 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002699 rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
2700 if (rv2 != SS_SUCCESS) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002701 drbd_khelper(mdev, "pri-lost-after-sb");
2702 } else {
2703 dev_warn(DEV, "Successfully gave up primary role.\n");
2704 rv = hg;
2705 }
2706 } else
2707 rv = hg;
2708 }
2709
2710 return rv;
2711}
2712
2713static void drbd_uuid_dump(struct drbd_conf *mdev, char *text, u64 *uuid,
2714 u64 bits, u64 flags)
2715{
2716 if (!uuid) {
2717 dev_info(DEV, "%s uuid info vanished while I was looking!\n", text);
2718 return;
2719 }
2720 dev_info(DEV, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n",
2721 text,
2722 (unsigned long long)uuid[UI_CURRENT],
2723 (unsigned long long)uuid[UI_BITMAP],
2724 (unsigned long long)uuid[UI_HISTORY_START],
2725 (unsigned long long)uuid[UI_HISTORY_END],
2726 (unsigned long long)bits,
2727 (unsigned long long)flags);
2728}
2729
2730/*
2731 100 after split brain try auto recover
2732 2 C_SYNC_SOURCE set BitMap
2733 1 C_SYNC_SOURCE use BitMap
2734 0 no Sync
2735 -1 C_SYNC_TARGET use BitMap
2736 -2 C_SYNC_TARGET set BitMap
2737 -100 after split brain, disconnect
2738-1000 unrelated data
Philipp Reisner4a23f262011-01-11 17:42:17 +01002739-1091 requires proto 91
2740-1096 requires proto 96
Philipp Reisnerb411b362009-09-25 16:07:19 -07002741 */
2742static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(local)
2743{
2744 u64 self, peer;
2745 int i, j;
2746
2747 self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2748 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
2749
2750 *rule_nr = 10;
2751 if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED)
2752 return 0;
2753
2754 *rule_nr = 20;
2755 if ((self == UUID_JUST_CREATED || self == (u64)0) &&
2756 peer != UUID_JUST_CREATED)
2757 return -2;
2758
2759 *rule_nr = 30;
2760 if (self != UUID_JUST_CREATED &&
2761 (peer == UUID_JUST_CREATED || peer == (u64)0))
2762 return 2;
2763
2764 if (self == peer) {
2765 int rct, dc; /* roles at crash time */
2766
2767 if (mdev->p_uuid[UI_BITMAP] == (u64)0 && mdev->ldev->md.uuid[UI_BITMAP] != (u64)0) {
2768
Philipp Reisner31890f42011-01-19 14:12:51 +01002769 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002770 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002771
2772 if ((mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
2773 (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
2774 dev_info(DEV, "was SyncSource, missed the resync finished event, corrected myself:\n");
Philipp Reisner9f2247b2012-08-16 14:25:58 +02002775 drbd_uuid_move_history(mdev);
2776 mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[UI_BITMAP];
2777 mdev->ldev->md.uuid[UI_BITMAP] = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002778
2779 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
2780 mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
2781 *rule_nr = 34;
2782 } else {
2783 dev_info(DEV, "was SyncSource (peer failed to write sync_uuid)\n");
2784 *rule_nr = 36;
2785 }
2786
2787 return 1;
2788 }
2789
2790 if (mdev->ldev->md.uuid[UI_BITMAP] == (u64)0 && mdev->p_uuid[UI_BITMAP] != (u64)0) {
2791
Philipp Reisner31890f42011-01-19 14:12:51 +01002792 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002793 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002794
2795 if ((mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_BITMAP] & ~((u64)1)) &&
2796 (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
2797 dev_info(DEV, "was SyncTarget, peer missed the resync finished event, corrected peer:\n");
2798
2799 mdev->p_uuid[UI_HISTORY_START + 1] = mdev->p_uuid[UI_HISTORY_START];
2800 mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_BITMAP];
2801 mdev->p_uuid[UI_BITMAP] = 0UL;
2802
2803 drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2804 *rule_nr = 35;
2805 } else {
2806 dev_info(DEV, "was SyncTarget (failed to write sync_uuid)\n");
2807 *rule_nr = 37;
2808 }
2809
2810 return -1;
2811 }
2812
2813 /* Common power [off|failure] */
2814 rct = (test_bit(CRASHED_PRIMARY, &mdev->flags) ? 1 : 0) +
2815 (mdev->p_uuid[UI_FLAGS] & 2);
2816 /* lowest bit is set when we were primary,
2817 * next bit (weight 2) is set when peer was primary */
2818 *rule_nr = 40;
2819
2820 switch (rct) {
2821 case 0: /* !self_pri && !peer_pri */ return 0;
2822 case 1: /* self_pri && !peer_pri */ return 1;
2823 case 2: /* !self_pri && peer_pri */ return -1;
2824 case 3: /* self_pri && peer_pri */
Lars Ellenberg427c0432012-08-01 12:43:01 +02002825 dc = test_bit(RESOLVE_CONFLICTS, &mdev->tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002826 return dc ? -1 : 1;
2827 }
2828 }
2829
2830 *rule_nr = 50;
2831 peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1);
2832 if (self == peer)
2833 return -1;
2834
2835 *rule_nr = 51;
2836 peer = mdev->p_uuid[UI_HISTORY_START] & ~((u64)1);
2837 if (self == peer) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002838 if (mdev->tconn->agreed_pro_version < 96 ?
Philipp Reisner4a23f262011-01-11 17:42:17 +01002839 (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
2840 (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
2841 peer + UUID_NEW_BM_OFFSET == (mdev->p_uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002842 /* The last P_SYNC_UUID did not get though. Undo the last start of
2843 resync as sync source modifications of the peer's UUIDs. */
2844
Philipp Reisner31890f42011-01-19 14:12:51 +01002845 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002846 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002847
2848 mdev->p_uuid[UI_BITMAP] = mdev->p_uuid[UI_HISTORY_START];
2849 mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_HISTORY_START + 1];
Philipp Reisner4a23f262011-01-11 17:42:17 +01002850
Lars Ellenberg92b4ca22012-04-30 12:53:52 +02002851 dev_info(DEV, "Lost last syncUUID packet, corrected:\n");
Philipp Reisner4a23f262011-01-11 17:42:17 +01002852 drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2853
Philipp Reisnerb411b362009-09-25 16:07:19 -07002854 return -1;
2855 }
2856 }
2857
2858 *rule_nr = 60;
2859 self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2860 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2861 peer = mdev->p_uuid[i] & ~((u64)1);
2862 if (self == peer)
2863 return -2;
2864 }
2865
2866 *rule_nr = 70;
2867 self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2868 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
2869 if (self == peer)
2870 return 1;
2871
2872 *rule_nr = 71;
2873 self = mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
2874 if (self == peer) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002875 if (mdev->tconn->agreed_pro_version < 96 ?
Philipp Reisner4a23f262011-01-11 17:42:17 +01002876 (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
2877 (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
2878 self + UUID_NEW_BM_OFFSET == (mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002879 /* The last P_SYNC_UUID did not get though. Undo the last start of
2880 resync as sync source modifications of our UUIDs. */
2881
Philipp Reisner31890f42011-01-19 14:12:51 +01002882 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002883 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002884
Philipp Reisner9f2247b2012-08-16 14:25:58 +02002885 __drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_HISTORY_START]);
2886 __drbd_uuid_set(mdev, UI_HISTORY_START, mdev->ldev->md.uuid[UI_HISTORY_START + 1]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002887
Philipp Reisner4a23f262011-01-11 17:42:17 +01002888 dev_info(DEV, "Last syncUUID did not get through, corrected:\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002889 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
2890 mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
2891
2892 return 1;
2893 }
2894 }
2895
2896
2897 *rule_nr = 80;
Philipp Reisnerd8c2a362009-11-18 15:52:51 +01002898 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002899 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2900 self = mdev->ldev->md.uuid[i] & ~((u64)1);
2901 if (self == peer)
2902 return 2;
2903 }
2904
2905 *rule_nr = 90;
2906 self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2907 peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1);
2908 if (self == peer && self != ((u64)0))
2909 return 100;
2910
2911 *rule_nr = 100;
2912 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2913 self = mdev->ldev->md.uuid[i] & ~((u64)1);
2914 for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) {
2915 peer = mdev->p_uuid[j] & ~((u64)1);
2916 if (self == peer)
2917 return -100;
2918 }
2919 }
2920
2921 return -1000;
2922}
2923
2924/* drbd_sync_handshake() returns the new conn state on success, or
2925 CONN_MASK (-1) on failure.
2926 */
2927static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_role peer_role,
2928 enum drbd_disk_state peer_disk) __must_hold(local)
2929{
Philipp Reisnerb411b362009-09-25 16:07:19 -07002930 enum drbd_conns rv = C_MASK;
2931 enum drbd_disk_state mydisk;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002932 struct net_conf *nc;
Andreas Gruenbacher6dff2902011-06-28 14:18:12 +02002933 int hg, rule_nr, rr_conflict, tentative;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002934
2935 mydisk = mdev->state.disk;
2936 if (mydisk == D_NEGOTIATING)
2937 mydisk = mdev->new_state_tmp.disk;
2938
2939 dev_info(DEV, "drbd_sync_handshake:\n");
Philipp Reisner9f2247b2012-08-16 14:25:58 +02002940
2941 spin_lock_irq(&mdev->ldev->md.uuid_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002942 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid, mdev->comm_bm_set, 0);
2943 drbd_uuid_dump(mdev, "peer", mdev->p_uuid,
2944 mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2945
2946 hg = drbd_uuid_compare(mdev, &rule_nr);
Philipp Reisner9f2247b2012-08-16 14:25:58 +02002947 spin_unlock_irq(&mdev->ldev->md.uuid_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002948
2949 dev_info(DEV, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
2950
2951 if (hg == -1000) {
2952 dev_alert(DEV, "Unrelated data, aborting!\n");
2953 return C_MASK;
2954 }
Philipp Reisner4a23f262011-01-11 17:42:17 +01002955 if (hg < -1000) {
2956 dev_alert(DEV, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002957 return C_MASK;
2958 }
2959
2960 if ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) ||
2961 (peer_disk == D_INCONSISTENT && mydisk > D_INCONSISTENT)) {
2962 int f = (hg == -100) || abs(hg) == 2;
2963 hg = mydisk > D_INCONSISTENT ? 1 : -1;
2964 if (f)
2965 hg = hg*2;
2966 dev_info(DEV, "Becoming sync %s due to disk states.\n",
2967 hg > 0 ? "source" : "target");
2968 }
2969
Adam Gandelman3a11a482010-04-08 16:48:23 -07002970 if (abs(hg) == 100)
2971 drbd_khelper(mdev, "initial-split-brain");
2972
Philipp Reisner44ed1672011-04-19 17:10:19 +02002973 rcu_read_lock();
2974 nc = rcu_dereference(mdev->tconn->net_conf);
2975
2976 if (hg == 100 || (hg == -100 && nc->always_asbp)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002977 int pcount = (mdev->state.role == R_PRIMARY)
2978 + (peer_role == R_PRIMARY);
2979 int forced = (hg == -100);
2980
2981 switch (pcount) {
2982 case 0:
2983 hg = drbd_asb_recover_0p(mdev);
2984 break;
2985 case 1:
2986 hg = drbd_asb_recover_1p(mdev);
2987 break;
2988 case 2:
2989 hg = drbd_asb_recover_2p(mdev);
2990 break;
2991 }
2992 if (abs(hg) < 100) {
2993 dev_warn(DEV, "Split-Brain detected, %d primaries, "
2994 "automatically solved. Sync from %s node\n",
2995 pcount, (hg < 0) ? "peer" : "this");
2996 if (forced) {
2997 dev_warn(DEV, "Doing a full sync, since"
2998 " UUIDs where ambiguous.\n");
2999 hg = hg*2;
3000 }
3001 }
3002 }
3003
3004 if (hg == -100) {
Philipp Reisner08b165b2011-09-05 16:22:33 +02003005 if (test_bit(DISCARD_MY_DATA, &mdev->flags) && !(mdev->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003006 hg = -1;
Philipp Reisner08b165b2011-09-05 16:22:33 +02003007 if (!test_bit(DISCARD_MY_DATA, &mdev->flags) && (mdev->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003008 hg = 1;
3009
3010 if (abs(hg) < 100)
3011 dev_warn(DEV, "Split-Brain detected, manually solved. "
3012 "Sync from %s node\n",
3013 (hg < 0) ? "peer" : "this");
3014 }
Philipp Reisner44ed1672011-04-19 17:10:19 +02003015 rr_conflict = nc->rr_conflict;
Andreas Gruenbacher6dff2902011-06-28 14:18:12 +02003016 tentative = nc->tentative;
Philipp Reisner44ed1672011-04-19 17:10:19 +02003017 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07003018
3019 if (hg == -100) {
Lars Ellenberg580b9762010-02-26 23:15:23 +01003020 /* FIXME this log message is not correct if we end up here
3021 * after an attempted attach on a diskless node.
3022 * We just refuse to attach -- well, we drop the "connection"
3023 * to that disk, in a way... */
Adam Gandelman3a11a482010-04-08 16:48:23 -07003024 dev_alert(DEV, "Split-Brain detected but unresolved, dropping connection!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003025 drbd_khelper(mdev, "split-brain");
3026 return C_MASK;
3027 }
3028
3029 if (hg > 0 && mydisk <= D_INCONSISTENT) {
3030 dev_err(DEV, "I shall become SyncSource, but I am inconsistent!\n");
3031 return C_MASK;
3032 }
3033
3034 if (hg < 0 && /* by intention we do not use mydisk here. */
3035 mdev->state.role == R_PRIMARY && mdev->state.disk >= D_CONSISTENT) {
Philipp Reisner44ed1672011-04-19 17:10:19 +02003036 switch (rr_conflict) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003037 case ASB_CALL_HELPER:
3038 drbd_khelper(mdev, "pri-lost");
3039 /* fall through */
3040 case ASB_DISCONNECT:
3041 dev_err(DEV, "I shall become SyncTarget, but I am primary!\n");
3042 return C_MASK;
3043 case ASB_VIOLENTLY:
3044 dev_warn(DEV, "Becoming SyncTarget, violating the stable-data"
3045 "assumption\n");
3046 }
3047 }
3048
Andreas Gruenbacher6dff2902011-06-28 14:18:12 +02003049 if (tentative || test_bit(CONN_DRY_RUN, &mdev->tconn->flags)) {
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003050 if (hg == 0)
3051 dev_info(DEV, "dry-run connect: No resync, would become Connected immediately.\n");
3052 else
3053 dev_info(DEV, "dry-run connect: Would become %s, doing a %s resync.",
3054 drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET),
3055 abs(hg) >= 2 ? "full" : "bit-map based");
3056 return C_MASK;
3057 }
3058
Philipp Reisnerb411b362009-09-25 16:07:19 -07003059 if (abs(hg) >= 2) {
3060 dev_info(DEV, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003061 if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
3062 BM_LOCKED_SET_ALLOWED))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003063 return C_MASK;
3064 }
3065
3066 if (hg > 0) { /* become sync source. */
3067 rv = C_WF_BITMAP_S;
3068 } else if (hg < 0) { /* become sync target */
3069 rv = C_WF_BITMAP_T;
3070 } else {
3071 rv = C_CONNECTED;
3072 if (drbd_bm_total_weight(mdev)) {
3073 dev_info(DEV, "No resync, but %lu bits in bitmap!\n",
3074 drbd_bm_total_weight(mdev));
3075 }
3076 }
3077
3078 return rv;
3079}
3080
Philipp Reisnerf179d762011-05-16 17:31:47 +02003081static enum drbd_after_sb_p convert_after_sb(enum drbd_after_sb_p peer)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003082{
3083 /* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */
Philipp Reisnerf179d762011-05-16 17:31:47 +02003084 if (peer == ASB_DISCARD_REMOTE)
3085 return ASB_DISCARD_LOCAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003086
3087 /* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */
Philipp Reisnerf179d762011-05-16 17:31:47 +02003088 if (peer == ASB_DISCARD_LOCAL)
3089 return ASB_DISCARD_REMOTE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003090
3091 /* everything else is valid if they are equal on both sides. */
Philipp Reisnerf179d762011-05-16 17:31:47 +02003092 return peer;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003093}
3094
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003095static int receive_protocol(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003096{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003097 struct p_protocol *p = pi->data;
Philipp Reisner036b17e2011-05-16 17:38:11 +02003098 enum drbd_after_sb_p p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
3099 int p_proto, p_discard_my_data, p_two_primaries, cf;
3100 struct net_conf *nc, *old_net_conf, *new_net_conf = NULL;
3101 char integrity_alg[SHARED_SECRET_MAX] = "";
Andreas Gruenbacheraccdbcc2011-07-15 17:41:09 +02003102 struct crypto_hash *peer_integrity_tfm = NULL;
Philipp Reisner7aca6c72011-05-17 10:12:56 +02003103 void *int_dig_in = NULL, *int_dig_vv = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003104
Philipp Reisnerb411b362009-09-25 16:07:19 -07003105 p_proto = be32_to_cpu(p->protocol);
3106 p_after_sb_0p = be32_to_cpu(p->after_sb_0p);
3107 p_after_sb_1p = be32_to_cpu(p->after_sb_1p);
3108 p_after_sb_2p = be32_to_cpu(p->after_sb_2p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003109 p_two_primaries = be32_to_cpu(p->two_primaries);
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003110 cf = be32_to_cpu(p->conn_flags);
Andreas Gruenbacher6139f602011-05-06 20:00:02 +02003111 p_discard_my_data = cf & CF_DISCARD_MY_DATA;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003112
Andreas Gruenbacher86db0612011-04-28 15:24:18 +02003113 if (tconn->agreed_pro_version >= 87) {
3114 int err;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003115
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02003116 if (pi->size > sizeof(integrity_alg))
Andreas Gruenbacher86db0612011-04-28 15:24:18 +02003117 return -EIO;
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02003118 err = drbd_recv_all(tconn, integrity_alg, pi->size);
Andreas Gruenbacher86db0612011-04-28 15:24:18 +02003119 if (err)
3120 return err;
Philipp Reisner036b17e2011-05-16 17:38:11 +02003121 integrity_alg[SHARED_SECRET_MAX - 1] = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003122 }
3123
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003124 if (pi->cmd != P_PROTOCOL_UPDATE) {
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003125 clear_bit(CONN_DRY_RUN, &tconn->flags);
Philipp Reisner036b17e2011-05-16 17:38:11 +02003126
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003127 if (cf & CF_DRY_RUN)
3128 set_bit(CONN_DRY_RUN, &tconn->flags);
3129
3130 rcu_read_lock();
3131 nc = rcu_dereference(tconn->net_conf);
3132
3133 if (p_proto != nc->wire_protocol) {
Andreas Gruenbacherd505d9b2011-07-15 17:19:18 +02003134 conn_err(tconn, "incompatible %s settings\n", "protocol");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003135 goto disconnect_rcu_unlock;
3136 }
3137
3138 if (convert_after_sb(p_after_sb_0p) != nc->after_sb_0p) {
Andreas Gruenbacherd505d9b2011-07-15 17:19:18 +02003139 conn_err(tconn, "incompatible %s settings\n", "after-sb-0pri");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003140 goto disconnect_rcu_unlock;
3141 }
3142
3143 if (convert_after_sb(p_after_sb_1p) != nc->after_sb_1p) {
Andreas Gruenbacherd505d9b2011-07-15 17:19:18 +02003144 conn_err(tconn, "incompatible %s settings\n", "after-sb-1pri");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003145 goto disconnect_rcu_unlock;
3146 }
3147
3148 if (convert_after_sb(p_after_sb_2p) != nc->after_sb_2p) {
Andreas Gruenbacherd505d9b2011-07-15 17:19:18 +02003149 conn_err(tconn, "incompatible %s settings\n", "after-sb-2pri");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003150 goto disconnect_rcu_unlock;
3151 }
3152
3153 if (p_discard_my_data && nc->discard_my_data) {
Andreas Gruenbacherd505d9b2011-07-15 17:19:18 +02003154 conn_err(tconn, "incompatible %s settings\n", "discard-my-data");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003155 goto disconnect_rcu_unlock;
3156 }
3157
3158 if (p_two_primaries != nc->two_primaries) {
Andreas Gruenbacherd505d9b2011-07-15 17:19:18 +02003159 conn_err(tconn, "incompatible %s settings\n", "allow-two-primaries");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003160 goto disconnect_rcu_unlock;
3161 }
3162
3163 if (strcmp(integrity_alg, nc->integrity_alg)) {
Andreas Gruenbacherd505d9b2011-07-15 17:19:18 +02003164 conn_err(tconn, "incompatible %s settings\n", "data-integrity-alg");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003165 goto disconnect_rcu_unlock;
3166 }
3167
3168 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07003169 }
3170
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003171 if (integrity_alg[0]) {
3172 int hash_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003173
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003174 /*
3175 * We can only change the peer data integrity algorithm
3176 * here. Changing our own data integrity algorithm
3177 * requires that we send a P_PROTOCOL_UPDATE packet at
3178 * the same time; otherwise, the peer has no way to
3179 * tell between which packets the algorithm should
3180 * change.
3181 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003182
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003183 peer_integrity_tfm = crypto_alloc_hash(integrity_alg, 0, CRYPTO_ALG_ASYNC);
3184 if (!peer_integrity_tfm) {
3185 conn_err(tconn, "peer data-integrity-alg %s not supported\n",
3186 integrity_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003187 goto disconnect;
3188 }
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003189
3190 hash_size = crypto_hash_digestsize(peer_integrity_tfm);
3191 int_dig_in = kmalloc(hash_size, GFP_KERNEL);
3192 int_dig_vv = kmalloc(hash_size, GFP_KERNEL);
3193 if (!(int_dig_in && int_dig_vv)) {
3194 conn_err(tconn, "Allocation of buffers for data integrity checking failed\n");
3195 goto disconnect;
3196 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003197 }
3198
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003199 new_net_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL);
3200 if (!new_net_conf) {
3201 conn_err(tconn, "Allocation of new net_conf failed\n");
3202 goto disconnect;
3203 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003204
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003205 mutex_lock(&tconn->data.mutex);
3206 mutex_lock(&tconn->conf_update);
3207 old_net_conf = tconn->net_conf;
3208 *new_net_conf = *old_net_conf;
3209
3210 new_net_conf->wire_protocol = p_proto;
3211 new_net_conf->after_sb_0p = convert_after_sb(p_after_sb_0p);
3212 new_net_conf->after_sb_1p = convert_after_sb(p_after_sb_1p);
3213 new_net_conf->after_sb_2p = convert_after_sb(p_after_sb_2p);
3214 new_net_conf->two_primaries = p_two_primaries;
3215
3216 rcu_assign_pointer(tconn->net_conf, new_net_conf);
3217 mutex_unlock(&tconn->conf_update);
3218 mutex_unlock(&tconn->data.mutex);
3219
3220 crypto_free_hash(tconn->peer_integrity_tfm);
3221 kfree(tconn->int_dig_in);
3222 kfree(tconn->int_dig_vv);
3223 tconn->peer_integrity_tfm = peer_integrity_tfm;
3224 tconn->int_dig_in = int_dig_in;
3225 tconn->int_dig_vv = int_dig_vv;
3226
3227 if (strcmp(old_net_conf->integrity_alg, integrity_alg))
3228 conn_info(tconn, "peer data-integrity-alg: %s\n",
3229 integrity_alg[0] ? integrity_alg : "(none)");
3230
3231 synchronize_rcu();
3232 kfree(old_net_conf);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003233 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003234
Philipp Reisner44ed1672011-04-19 17:10:19 +02003235disconnect_rcu_unlock:
3236 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07003237disconnect:
Andreas Gruenbacherb792c352011-07-15 16:48:49 +02003238 crypto_free_hash(peer_integrity_tfm);
Philipp Reisner036b17e2011-05-16 17:38:11 +02003239 kfree(int_dig_in);
3240 kfree(int_dig_vv);
Philipp Reisner72046242011-03-15 18:51:47 +01003241 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003242 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003243}
3244
3245/* helper function
3246 * input: alg name, feature name
3247 * return: NULL (alg name was "")
3248 * ERR_PTR(error) if something goes wrong
3249 * or the crypto hash ptr, if it worked out ok. */
3250struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_conf *mdev,
3251 const char *alg, const char *name)
3252{
3253 struct crypto_hash *tfm;
3254
3255 if (!alg[0])
3256 return NULL;
3257
3258 tfm = crypto_alloc_hash(alg, 0, CRYPTO_ALG_ASYNC);
3259 if (IS_ERR(tfm)) {
3260 dev_err(DEV, "Can not allocate \"%s\" as %s (reason: %ld)\n",
3261 alg, name, PTR_ERR(tfm));
3262 return tfm;
3263 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003264 return tfm;
3265}
3266
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003267static int ignore_remaining_packet(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003268{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003269 void *buffer = tconn->data.rbuf;
3270 int size = pi->size;
3271
3272 while (size) {
3273 int s = min_t(int, size, DRBD_SOCKET_BUFFER_SIZE);
3274 s = drbd_recv(tconn, buffer, s);
3275 if (s <= 0) {
3276 if (s < 0)
3277 return s;
3278 break;
3279 }
3280 size -= s;
3281 }
3282 if (size)
3283 return -EIO;
3284 return 0;
3285}
3286
3287/*
3288 * config_unknown_volume - device configuration command for unknown volume
3289 *
3290 * When a device is added to an existing connection, the node on which the
3291 * device is added first will send configuration commands to its peer but the
3292 * peer will not know about the device yet. It will warn and ignore these
3293 * commands. Once the device is added on the second node, the second node will
3294 * send the same device configuration commands, but in the other direction.
3295 *
3296 * (We can also end up here if drbd is misconfigured.)
3297 */
3298static int config_unknown_volume(struct drbd_tconn *tconn, struct packet_info *pi)
3299{
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02003300 conn_warn(tconn, "%s packet received for volume %u, which is not configured locally\n",
3301 cmdname(pi->cmd), pi->vnr);
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003302 return ignore_remaining_packet(tconn, pi);
3303}
3304
3305static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi)
3306{
3307 struct drbd_conf *mdev;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003308 struct p_rs_param_95 *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003309 unsigned int header_size, data_size, exp_max_sz;
3310 struct crypto_hash *verify_tfm = NULL;
3311 struct crypto_hash *csums_tfm = NULL;
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003312 struct net_conf *old_net_conf, *new_net_conf = NULL;
Philipp Reisner813472c2011-05-03 16:47:02 +02003313 struct disk_conf *old_disk_conf = NULL, *new_disk_conf = NULL;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003314 const int apv = tconn->agreed_pro_version;
Philipp Reisner813472c2011-05-03 16:47:02 +02003315 struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
Philipp Reisner778f2712010-07-06 11:14:00 +02003316 int fifo_size = 0;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003317 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003318
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003319 mdev = vnr_to_mdev(tconn, pi->vnr);
3320 if (!mdev)
3321 return config_unknown_volume(tconn, pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003322
3323 exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param)
3324 : apv == 88 ? sizeof(struct p_rs_param)
3325 + SHARED_SECRET_MAX
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003326 : apv <= 94 ? sizeof(struct p_rs_param_89)
3327 : /* apv >= 95 */ sizeof(struct p_rs_param_95);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003328
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003329 if (pi->size > exp_max_sz) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003330 dev_err(DEV, "SyncParam packet too long: received %u, expected <= %u bytes\n",
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003331 pi->size, exp_max_sz);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003332 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003333 }
3334
3335 if (apv <= 88) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003336 header_size = sizeof(struct p_rs_param);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003337 data_size = pi->size - header_size;
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003338 } else if (apv <= 94) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003339 header_size = sizeof(struct p_rs_param_89);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003340 data_size = pi->size - header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003341 D_ASSERT(data_size == 0);
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003342 } else {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003343 header_size = sizeof(struct p_rs_param_95);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003344 data_size = pi->size - header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003345 D_ASSERT(data_size == 0);
3346 }
3347
3348 /* initialize verify_alg and csums_alg */
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003349 p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003350 memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
3351
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003352 err = drbd_recv_all(mdev->tconn, p, header_size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003353 if (err)
3354 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003355
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003356 mutex_lock(&mdev->tconn->conf_update);
3357 old_net_conf = mdev->tconn->net_conf;
Philipp Reisner813472c2011-05-03 16:47:02 +02003358 if (get_ldev(mdev)) {
3359 new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3360 if (!new_disk_conf) {
3361 put_ldev(mdev);
3362 mutex_unlock(&mdev->tconn->conf_update);
3363 dev_err(DEV, "Allocation of new disk_conf failed\n");
3364 return -ENOMEM;
3365 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003366
Philipp Reisner813472c2011-05-03 16:47:02 +02003367 old_disk_conf = mdev->ldev->disk_conf;
3368 *new_disk_conf = *old_disk_conf;
3369
Andreas Gruenbacher6394b932011-05-11 14:29:52 +02003370 new_disk_conf->resync_rate = be32_to_cpu(p->resync_rate);
Philipp Reisner813472c2011-05-03 16:47:02 +02003371 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003372
3373 if (apv >= 88) {
3374 if (apv == 88) {
Philipp Reisner5de73822012-03-28 10:17:32 +02003375 if (data_size > SHARED_SECRET_MAX || data_size == 0) {
3376 dev_err(DEV, "verify-alg of wrong size, "
3377 "peer wants %u, accepting only up to %u byte\n",
3378 data_size, SHARED_SECRET_MAX);
Philipp Reisner813472c2011-05-03 16:47:02 +02003379 err = -EIO;
3380 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003381 }
3382
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003383 err = drbd_recv_all(mdev->tconn, p->verify_alg, data_size);
Philipp Reisner813472c2011-05-03 16:47:02 +02003384 if (err)
3385 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003386 /* we expect NUL terminated string */
3387 /* but just in case someone tries to be evil */
3388 D_ASSERT(p->verify_alg[data_size-1] == 0);
3389 p->verify_alg[data_size-1] = 0;
3390
3391 } else /* apv >= 89 */ {
3392 /* we still expect NUL terminated strings */
3393 /* but just in case someone tries to be evil */
3394 D_ASSERT(p->verify_alg[SHARED_SECRET_MAX-1] == 0);
3395 D_ASSERT(p->csums_alg[SHARED_SECRET_MAX-1] == 0);
3396 p->verify_alg[SHARED_SECRET_MAX-1] = 0;
3397 p->csums_alg[SHARED_SECRET_MAX-1] = 0;
3398 }
3399
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003400 if (strcmp(old_net_conf->verify_alg, p->verify_alg)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003401 if (mdev->state.conn == C_WF_REPORT_PARAMS) {
3402 dev_err(DEV, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003403 old_net_conf->verify_alg, p->verify_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003404 goto disconnect;
3405 }
3406 verify_tfm = drbd_crypto_alloc_digest_safe(mdev,
3407 p->verify_alg, "verify-alg");
3408 if (IS_ERR(verify_tfm)) {
3409 verify_tfm = NULL;
3410 goto disconnect;
3411 }
3412 }
3413
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003414 if (apv >= 89 && strcmp(old_net_conf->csums_alg, p->csums_alg)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003415 if (mdev->state.conn == C_WF_REPORT_PARAMS) {
3416 dev_err(DEV, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003417 old_net_conf->csums_alg, p->csums_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003418 goto disconnect;
3419 }
3420 csums_tfm = drbd_crypto_alloc_digest_safe(mdev,
3421 p->csums_alg, "csums-alg");
3422 if (IS_ERR(csums_tfm)) {
3423 csums_tfm = NULL;
3424 goto disconnect;
3425 }
3426 }
3427
Philipp Reisner813472c2011-05-03 16:47:02 +02003428 if (apv > 94 && new_disk_conf) {
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003429 new_disk_conf->c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
3430 new_disk_conf->c_delay_target = be32_to_cpu(p->c_delay_target);
3431 new_disk_conf->c_fill_target = be32_to_cpu(p->c_fill_target);
3432 new_disk_conf->c_max_rate = be32_to_cpu(p->c_max_rate);
Philipp Reisner778f2712010-07-06 11:14:00 +02003433
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003434 fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
Philipp Reisner9958c852011-05-03 16:19:31 +02003435 if (fifo_size != mdev->rs_plan_s->size) {
Philipp Reisner813472c2011-05-03 16:47:02 +02003436 new_plan = fifo_alloc(fifo_size);
3437 if (!new_plan) {
Philipp Reisner778f2712010-07-06 11:14:00 +02003438 dev_err(DEV, "kmalloc of fifo_buffer failed");
Lars Ellenbergf3990022011-03-23 14:31:09 +01003439 put_ldev(mdev);
Philipp Reisner778f2712010-07-06 11:14:00 +02003440 goto disconnect;
3441 }
3442 }
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003443 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003444
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003445 if (verify_tfm || csums_tfm) {
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003446 new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
3447 if (!new_net_conf) {
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003448 dev_err(DEV, "Allocation of new net_conf failed\n");
3449 goto disconnect;
3450 }
3451
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003452 *new_net_conf = *old_net_conf;
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003453
3454 if (verify_tfm) {
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003455 strcpy(new_net_conf->verify_alg, p->verify_alg);
3456 new_net_conf->verify_alg_len = strlen(p->verify_alg) + 1;
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003457 crypto_free_hash(mdev->tconn->verify_tfm);
3458 mdev->tconn->verify_tfm = verify_tfm;
3459 dev_info(DEV, "using verify-alg: \"%s\"\n", p->verify_alg);
3460 }
3461 if (csums_tfm) {
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003462 strcpy(new_net_conf->csums_alg, p->csums_alg);
3463 new_net_conf->csums_alg_len = strlen(p->csums_alg) + 1;
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003464 crypto_free_hash(mdev->tconn->csums_tfm);
3465 mdev->tconn->csums_tfm = csums_tfm;
3466 dev_info(DEV, "using csums-alg: \"%s\"\n", p->csums_alg);
3467 }
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003468 rcu_assign_pointer(tconn->net_conf, new_net_conf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003469 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003470 }
3471
Philipp Reisner813472c2011-05-03 16:47:02 +02003472 if (new_disk_conf) {
3473 rcu_assign_pointer(mdev->ldev->disk_conf, new_disk_conf);
3474 put_ldev(mdev);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003475 }
Philipp Reisner813472c2011-05-03 16:47:02 +02003476
3477 if (new_plan) {
3478 old_plan = mdev->rs_plan_s;
3479 rcu_assign_pointer(mdev->rs_plan_s, new_plan);
3480 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003481
3482 mutex_unlock(&mdev->tconn->conf_update);
3483 synchronize_rcu();
3484 if (new_net_conf)
3485 kfree(old_net_conf);
3486 kfree(old_disk_conf);
Philipp Reisner813472c2011-05-03 16:47:02 +02003487 kfree(old_plan);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003488
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003489 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003490
Philipp Reisner813472c2011-05-03 16:47:02 +02003491reconnect:
3492 if (new_disk_conf) {
3493 put_ldev(mdev);
3494 kfree(new_disk_conf);
3495 }
3496 mutex_unlock(&mdev->tconn->conf_update);
3497 return -EIO;
3498
Philipp Reisnerb411b362009-09-25 16:07:19 -07003499disconnect:
Philipp Reisner813472c2011-05-03 16:47:02 +02003500 kfree(new_plan);
3501 if (new_disk_conf) {
3502 put_ldev(mdev);
3503 kfree(new_disk_conf);
3504 }
Philipp Reisnera0095502011-05-03 13:14:15 +02003505 mutex_unlock(&mdev->tconn->conf_update);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003506 /* just for completeness: actually not needed,
3507 * as this is not reached if csums_tfm was ok. */
3508 crypto_free_hash(csums_tfm);
3509 /* but free the verify_tfm again, if csums_tfm did not work out */
3510 crypto_free_hash(verify_tfm);
Philipp Reisner38fa9982011-03-15 18:24:49 +01003511 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003512 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003513}
3514
Philipp Reisnerb411b362009-09-25 16:07:19 -07003515/* warn if the arguments differ by more than 12.5% */
3516static void warn_if_differ_considerably(struct drbd_conf *mdev,
3517 const char *s, sector_t a, sector_t b)
3518{
3519 sector_t d;
3520 if (a == 0 || b == 0)
3521 return;
3522 d = (a > b) ? (a - b) : (b - a);
3523 if (d > (a>>3) || d > (b>>3))
3524 dev_warn(DEV, "Considerable difference in %s: %llus vs. %llus\n", s,
3525 (unsigned long long)a, (unsigned long long)b);
3526}
3527
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003528static int receive_sizes(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003529{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003530 struct drbd_conf *mdev;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003531 struct p_sizes *p = pi->data;
Philipp Reisnere96c9632013-06-25 16:50:07 +02003532 enum determine_dev_size dd = DS_UNCHANGED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003533 sector_t p_size, p_usize, my_usize;
3534 int ldsc = 0; /* local disk size changed */
Philipp Reisnere89b5912010-03-24 17:11:33 +01003535 enum dds_flags ddsf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003536
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003537 mdev = vnr_to_mdev(tconn, pi->vnr);
3538 if (!mdev)
3539 return config_unknown_volume(tconn, pi);
3540
Philipp Reisnerb411b362009-09-25 16:07:19 -07003541 p_size = be64_to_cpu(p->d_size);
3542 p_usize = be64_to_cpu(p->u_size);
3543
Philipp Reisnerb411b362009-09-25 16:07:19 -07003544 /* just store the peer's disk size for now.
3545 * we still need to figure out whether we accept that. */
3546 mdev->p_size = p_size;
3547
Philipp Reisnerb411b362009-09-25 16:07:19 -07003548 if (get_ldev(mdev)) {
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003549 rcu_read_lock();
3550 my_usize = rcu_dereference(mdev->ldev->disk_conf)->disk_size;
3551 rcu_read_unlock();
3552
Philipp Reisnerb411b362009-09-25 16:07:19 -07003553 warn_if_differ_considerably(mdev, "lower level device sizes",
3554 p_size, drbd_get_max_capacity(mdev->ldev));
3555 warn_if_differ_considerably(mdev, "user requested size",
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003556 p_usize, my_usize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003557
3558 /* if this is the first connect, or an otherwise expected
3559 * param exchange, choose the minimum */
3560 if (mdev->state.conn == C_WF_REPORT_PARAMS)
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003561 p_usize = min_not_zero(my_usize, p_usize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003562
3563 /* Never shrink a device with usable data during connect.
3564 But allow online shrinking if we are connected. */
Philipp Reisneref5e44a2011-05-03 13:27:43 +02003565 if (drbd_new_dev_size(mdev, mdev->ldev, p_usize, 0) <
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003566 drbd_get_capacity(mdev->this_bdev) &&
3567 mdev->state.disk >= D_OUTDATED &&
3568 mdev->state.conn < C_CONNECTED) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003569 dev_err(DEV, "The peer's disk size is too small!\n");
Philipp Reisner38fa9982011-03-15 18:24:49 +01003570 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003571 put_ldev(mdev);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003572 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003573 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003574
3575 if (my_usize != p_usize) {
3576 struct disk_conf *old_disk_conf, *new_disk_conf = NULL;
3577
3578 new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3579 if (!new_disk_conf) {
3580 dev_err(DEV, "Allocation of new disk_conf failed\n");
3581 put_ldev(mdev);
3582 return -ENOMEM;
3583 }
3584
3585 mutex_lock(&mdev->tconn->conf_update);
3586 old_disk_conf = mdev->ldev->disk_conf;
3587 *new_disk_conf = *old_disk_conf;
3588 new_disk_conf->disk_size = p_usize;
3589
3590 rcu_assign_pointer(mdev->ldev->disk_conf, new_disk_conf);
3591 mutex_unlock(&mdev->tconn->conf_update);
3592 synchronize_rcu();
3593 kfree(old_disk_conf);
3594
3595 dev_info(DEV, "Peer sets u_size to %lu sectors\n",
3596 (unsigned long)my_usize);
3597 }
3598
Philipp Reisnerb411b362009-09-25 16:07:19 -07003599 put_ldev(mdev);
3600 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003601
Philipp Reisnere89b5912010-03-24 17:11:33 +01003602 ddsf = be16_to_cpu(p->dds_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003603 if (get_ldev(mdev)) {
Philipp Reisnerd752b262013-06-25 16:50:08 +02003604 dd = drbd_determine_dev_size(mdev, ddsf, NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003605 put_ldev(mdev);
Philipp Reisnere96c9632013-06-25 16:50:07 +02003606 if (dd == DS_ERROR)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003607 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003608 drbd_md_sync(mdev);
3609 } else {
3610 /* I am diskless, need to accept the peer's size. */
3611 drbd_set_my_capacity(mdev, p_size);
3612 }
3613
Philipp Reisner99432fc2011-05-20 16:39:13 +02003614 mdev->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
3615 drbd_reconsider_max_bio_size(mdev);
3616
Philipp Reisnerb411b362009-09-25 16:07:19 -07003617 if (get_ldev(mdev)) {
3618 if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) {
3619 mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev);
3620 ldsc = 1;
3621 }
3622
Philipp Reisnerb411b362009-09-25 16:07:19 -07003623 put_ldev(mdev);
3624 }
3625
3626 if (mdev->state.conn > C_WF_REPORT_PARAMS) {
3627 if (be64_to_cpu(p->c_size) !=
3628 drbd_get_capacity(mdev->this_bdev) || ldsc) {
3629 /* we have different sizes, probably peer
3630 * needs to know my new size... */
Philipp Reisnere89b5912010-03-24 17:11:33 +01003631 drbd_send_sizes(mdev, 0, ddsf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003632 }
3633 if (test_and_clear_bit(RESIZE_PENDING, &mdev->flags) ||
Philipp Reisnere96c9632013-06-25 16:50:07 +02003634 (dd == DS_GREW && mdev->state.conn == C_CONNECTED)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003635 if (mdev->state.pdsk >= D_INCONSISTENT &&
Philipp Reisnere89b5912010-03-24 17:11:33 +01003636 mdev->state.disk >= D_INCONSISTENT) {
3637 if (ddsf & DDSF_NO_RESYNC)
3638 dev_info(DEV, "Resync of new storage suppressed with --assume-clean\n");
3639 else
3640 resync_after_online_grow(mdev);
3641 } else
Philipp Reisnerb411b362009-09-25 16:07:19 -07003642 set_bit(RESYNC_AFTER_NEG, &mdev->flags);
3643 }
3644 }
3645
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003646 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003647}
3648
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003649static int receive_uuids(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003650{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003651 struct drbd_conf *mdev;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003652 struct p_uuids *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003653 u64 *p_uuid;
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003654 int i, updated_uuids = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003655
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003656 mdev = vnr_to_mdev(tconn, pi->vnr);
3657 if (!mdev)
3658 return config_unknown_volume(tconn, pi);
3659
Philipp Reisnerb411b362009-09-25 16:07:19 -07003660 p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO);
Jing Wang063eacf2012-10-25 15:00:56 +08003661 if (!p_uuid) {
3662 dev_err(DEV, "kmalloc of p_uuid failed\n");
3663 return false;
3664 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003665
3666 for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
3667 p_uuid[i] = be64_to_cpu(p->uuid[i]);
3668
3669 kfree(mdev->p_uuid);
3670 mdev->p_uuid = p_uuid;
3671
3672 if (mdev->state.conn < C_CONNECTED &&
3673 mdev->state.disk < D_INCONSISTENT &&
3674 mdev->state.role == R_PRIMARY &&
3675 (mdev->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
3676 dev_err(DEV, "Can only connect to data with current UUID=%016llX\n",
3677 (unsigned long long)mdev->ed_uuid);
Philipp Reisner38fa9982011-03-15 18:24:49 +01003678 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003679 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003680 }
3681
3682 if (get_ldev(mdev)) {
3683 int skip_initial_sync =
3684 mdev->state.conn == C_CONNECTED &&
Philipp Reisner31890f42011-01-19 14:12:51 +01003685 mdev->tconn->agreed_pro_version >= 90 &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003686 mdev->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
3687 (p_uuid[UI_FLAGS] & 8);
3688 if (skip_initial_sync) {
3689 dev_info(DEV, "Accepted new current UUID, preparing to skip initial sync\n");
3690 drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write,
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003691 "clear_n_write from receive_uuids",
3692 BM_LOCKED_TEST_ALLOWED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003693 _drbd_uuid_set(mdev, UI_CURRENT, p_uuid[UI_CURRENT]);
3694 _drbd_uuid_set(mdev, UI_BITMAP, 0);
3695 _drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
3696 CS_VERBOSE, NULL);
3697 drbd_md_sync(mdev);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003698 updated_uuids = 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003699 }
3700 put_ldev(mdev);
Philipp Reisner18a50fa2010-06-21 14:14:15 +02003701 } else if (mdev->state.disk < D_INCONSISTENT &&
3702 mdev->state.role == R_PRIMARY) {
3703 /* I am a diskless primary, the peer just created a new current UUID
3704 for me. */
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003705 updated_uuids = drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003706 }
3707
3708 /* Before we test for the disk state, we should wait until an eventually
3709 ongoing cluster wide state change is finished. That is important if
3710 we are primary and are detaching from our disk. We need to see the
3711 new disk state... */
Philipp Reisner8410da82011-02-11 20:11:10 +01003712 mutex_lock(mdev->state_mutex);
3713 mutex_unlock(mdev->state_mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003714 if (mdev->state.conn >= C_CONNECTED && mdev->state.disk < D_INCONSISTENT)
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003715 updated_uuids |= drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
3716
3717 if (updated_uuids)
3718 drbd_print_uuids(mdev, "receiver updated UUIDs to");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003719
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003720 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003721}
3722
3723/**
3724 * convert_state() - Converts the peer's view of the cluster state to our point of view
3725 * @ps: The state as seen by the peer.
3726 */
3727static union drbd_state convert_state(union drbd_state ps)
3728{
3729 union drbd_state ms;
3730
3731 static enum drbd_conns c_tab[] = {
Philipp Reisner369bea62011-07-06 23:04:44 +02003732 [C_WF_REPORT_PARAMS] = C_WF_REPORT_PARAMS,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003733 [C_CONNECTED] = C_CONNECTED,
3734
3735 [C_STARTING_SYNC_S] = C_STARTING_SYNC_T,
3736 [C_STARTING_SYNC_T] = C_STARTING_SYNC_S,
3737 [C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */
3738 [C_VERIFY_S] = C_VERIFY_T,
3739 [C_MASK] = C_MASK,
3740 };
3741
3742 ms.i = ps.i;
3743
3744 ms.conn = c_tab[ps.conn];
3745 ms.peer = ps.role;
3746 ms.role = ps.peer;
3747 ms.pdsk = ps.disk;
3748 ms.disk = ps.pdsk;
3749 ms.peer_isp = (ps.aftr_isp | ps.user_isp);
3750
3751 return ms;
3752}
3753
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003754static int receive_req_state(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003755{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003756 struct drbd_conf *mdev;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003757 struct p_req_state *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003758 union drbd_state mask, val;
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +01003759 enum drbd_state_rv rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003760
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003761 mdev = vnr_to_mdev(tconn, pi->vnr);
3762 if (!mdev)
3763 return -EIO;
3764
Philipp Reisnerb411b362009-09-25 16:07:19 -07003765 mask.i = be32_to_cpu(p->mask);
3766 val.i = be32_to_cpu(p->val);
3767
Lars Ellenberg427c0432012-08-01 12:43:01 +02003768 if (test_bit(RESOLVE_CONFLICTS, &mdev->tconn->flags) &&
Philipp Reisner8410da82011-02-11 20:11:10 +01003769 mutex_is_locked(mdev->state_mutex)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003770 drbd_send_sr_reply(mdev, SS_CONCURRENT_ST_CHG);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003771 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003772 }
3773
3774 mask = convert_state(mask);
3775 val = convert_state(val);
3776
3777 rv = drbd_change_state(mdev, CS_VERBOSE, mask, val);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003778 drbd_send_sr_reply(mdev, rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003779
Philipp Reisnerb411b362009-09-25 16:07:19 -07003780 drbd_md_sync(mdev);
3781
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003782 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003783}
3784
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003785static int receive_req_conn_state(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003786{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003787 struct p_req_state *p = pi->data;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003788 union drbd_state mask, val;
3789 enum drbd_state_rv rv;
3790
3791 mask.i = be32_to_cpu(p->mask);
3792 val.i = be32_to_cpu(p->val);
3793
Lars Ellenberg427c0432012-08-01 12:43:01 +02003794 if (test_bit(RESOLVE_CONFLICTS, &tconn->flags) &&
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003795 mutex_is_locked(&tconn->cstate_mutex)) {
3796 conn_send_sr_reply(tconn, SS_CONCURRENT_ST_CHG);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003797 return 0;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003798 }
3799
3800 mask = convert_state(mask);
3801 val = convert_state(val);
3802
Philipp Reisner778bcf22011-03-28 12:55:03 +02003803 rv = conn_request_state(tconn, mask, val, CS_VERBOSE | CS_LOCAL_ONLY | CS_IGN_OUTD_FAIL);
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003804 conn_send_sr_reply(tconn, rv);
3805
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003806 return 0;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003807}
3808
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003809static int receive_state(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003810{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003811 struct drbd_conf *mdev;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003812 struct p_state *p = pi->data;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003813 union drbd_state os, ns, peer_state;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003814 enum drbd_disk_state real_peer_disk;
Philipp Reisner65d922c2010-06-16 16:18:09 +02003815 enum chg_state_flags cs_flags;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003816 int rv;
3817
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003818 mdev = vnr_to_mdev(tconn, pi->vnr);
3819 if (!mdev)
3820 return config_unknown_volume(tconn, pi);
3821
Philipp Reisnerb411b362009-09-25 16:07:19 -07003822 peer_state.i = be32_to_cpu(p->state);
3823
3824 real_peer_disk = peer_state.disk;
3825 if (peer_state.disk == D_NEGOTIATING) {
3826 real_peer_disk = mdev->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT;
3827 dev_info(DEV, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
3828 }
3829
Philipp Reisner87eeee42011-01-19 14:16:30 +01003830 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003831 retry:
Philipp Reisner78bae592011-03-28 15:40:12 +02003832 os = ns = drbd_read_state(mdev);
Philipp Reisner87eeee42011-01-19 14:16:30 +01003833 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003834
Lars Ellenberg545752d2011-12-05 14:39:25 +01003835 /* If some other part of the code (asender thread, timeout)
3836 * already decided to close the connection again,
3837 * we must not "re-establish" it here. */
3838 if (os.conn <= C_TEAR_DOWN)
Lars Ellenberg58ffa582012-07-26 14:09:49 +02003839 return -ECONNRESET;
Lars Ellenberg545752d2011-12-05 14:39:25 +01003840
Lars Ellenberg40424e42011-09-26 15:24:56 +02003841 /* If this is the "end of sync" confirmation, usually the peer disk
3842 * transitions from D_INCONSISTENT to D_UP_TO_DATE. For empty (0 bits
3843 * set) resync started in PausedSyncT, or if the timing of pause-/
3844 * unpause-sync events has been "just right", the peer disk may
3845 * transition from D_CONSISTENT to D_UP_TO_DATE as well.
3846 */
3847 if ((os.pdsk == D_INCONSISTENT || os.pdsk == D_CONSISTENT) &&
3848 real_peer_disk == D_UP_TO_DATE &&
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003849 os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
3850 /* If we are (becoming) SyncSource, but peer is still in sync
3851 * preparation, ignore its uptodate-ness to avoid flapping, it
3852 * will change to inconsistent once the peer reaches active
3853 * syncing states.
3854 * It may have changed syncer-paused flags, however, so we
3855 * cannot ignore this completely. */
3856 if (peer_state.conn > C_CONNECTED &&
3857 peer_state.conn < C_SYNC_SOURCE)
3858 real_peer_disk = D_INCONSISTENT;
3859
3860 /* if peer_state changes to connected at the same time,
3861 * it explicitly notifies us that it finished resync.
3862 * Maybe we should finish it up, too? */
3863 else if (os.conn >= C_SYNC_SOURCE &&
3864 peer_state.conn == C_CONNECTED) {
3865 if (drbd_bm_total_weight(mdev) <= mdev->rs_failed)
3866 drbd_resync_finished(mdev);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003867 return 0;
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003868 }
3869 }
3870
Lars Ellenberg02b91b52012-06-28 18:26:52 +02003871 /* explicit verify finished notification, stop sector reached. */
3872 if (os.conn == C_VERIFY_T && os.disk == D_UP_TO_DATE &&
3873 peer_state.conn == C_CONNECTED && real_peer_disk == D_UP_TO_DATE) {
Lars Ellenberg58ffa582012-07-26 14:09:49 +02003874 ov_out_of_sync_print(mdev);
Lars Ellenberg02b91b52012-06-28 18:26:52 +02003875 drbd_resync_finished(mdev);
Lars Ellenberg58ffa582012-07-26 14:09:49 +02003876 return 0;
Lars Ellenberg02b91b52012-06-28 18:26:52 +02003877 }
3878
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003879 /* peer says his disk is inconsistent, while we think it is uptodate,
3880 * and this happens while the peer still thinks we have a sync going on,
3881 * but we think we are already done with the sync.
3882 * We ignore this to avoid flapping pdsk.
3883 * This should not happen, if the peer is a recent version of drbd. */
3884 if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT &&
3885 os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE)
3886 real_peer_disk = D_UP_TO_DATE;
3887
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003888 if (ns.conn == C_WF_REPORT_PARAMS)
3889 ns.conn = C_CONNECTED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003890
Philipp Reisner67531712010-10-27 12:21:30 +02003891 if (peer_state.conn == C_AHEAD)
3892 ns.conn = C_BEHIND;
3893
Philipp Reisnerb411b362009-09-25 16:07:19 -07003894 if (mdev->p_uuid && peer_state.disk >= D_NEGOTIATING &&
3895 get_ldev_if_state(mdev, D_NEGOTIATING)) {
3896 int cr; /* consider resync */
3897
3898 /* if we established a new connection */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003899 cr = (os.conn < C_CONNECTED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003900 /* if we had an established connection
3901 * and one of the nodes newly attaches a disk */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003902 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003903 (peer_state.disk == D_NEGOTIATING ||
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003904 os.disk == D_NEGOTIATING));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003905 /* if we have both been inconsistent, and the peer has been
3906 * forced to be UpToDate with --overwrite-data */
3907 cr |= test_bit(CONSIDER_RESYNC, &mdev->flags);
3908 /* if we had been plain connected, and the admin requested to
3909 * start a sync by "invalidate" or "invalidate-remote" */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003910 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003911 (peer_state.conn >= C_STARTING_SYNC_S &&
3912 peer_state.conn <= C_WF_BITMAP_T));
3913
3914 if (cr)
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003915 ns.conn = drbd_sync_handshake(mdev, peer_state.role, real_peer_disk);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003916
3917 put_ldev(mdev);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003918 if (ns.conn == C_MASK) {
3919 ns.conn = C_CONNECTED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003920 if (mdev->state.disk == D_NEGOTIATING) {
Lars Ellenberg82f59cc2010-10-16 12:13:47 +02003921 drbd_force_state(mdev, NS(disk, D_FAILED));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003922 } else if (peer_state.disk == D_NEGOTIATING) {
3923 dev_err(DEV, "Disk attach process on the peer node was aborted.\n");
3924 peer_state.disk = D_DISKLESS;
Lars Ellenberg580b9762010-02-26 23:15:23 +01003925 real_peer_disk = D_DISKLESS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003926 } else {
Philipp Reisner8169e412011-03-15 18:40:27 +01003927 if (test_and_clear_bit(CONN_DRY_RUN, &mdev->tconn->flags))
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003928 return -EIO;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003929 D_ASSERT(os.conn == C_WF_REPORT_PARAMS);
Philipp Reisner38fa9982011-03-15 18:24:49 +01003930 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003931 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003932 }
3933 }
3934 }
3935
Philipp Reisner87eeee42011-01-19 14:16:30 +01003936 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisner78bae592011-03-28 15:40:12 +02003937 if (os.i != drbd_read_state(mdev).i)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003938 goto retry;
3939 clear_bit(CONSIDER_RESYNC, &mdev->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003940 ns.peer = peer_state.role;
3941 ns.pdsk = real_peer_disk;
3942 ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003943 if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003944 ns.disk = mdev->new_state_tmp.disk;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003945 cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
Philipp Reisner2aebfab2011-03-28 16:48:11 +02003946 if (ns.pdsk == D_CONSISTENT && drbd_suspended(mdev) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
Philipp Reisner481c6f52010-06-22 14:03:27 +02003947 test_bit(NEW_CUR_UUID, &mdev->flags)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01003948 /* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this
Philipp Reisner481c6f52010-06-22 14:03:27 +02003949 for temporal network outages! */
Philipp Reisner87eeee42011-01-19 14:16:30 +01003950 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisner481c6f52010-06-22 14:03:27 +02003951 dev_err(DEV, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
Philipp Reisner2f5cdd02011-02-21 14:29:27 +01003952 tl_clear(mdev->tconn);
Philipp Reisner481c6f52010-06-22 14:03:27 +02003953 drbd_uuid_new_current(mdev);
3954 clear_bit(NEW_CUR_UUID, &mdev->flags);
Philipp Reisner38fa9982011-03-15 18:24:49 +01003955 conn_request_state(mdev->tconn, NS2(conn, C_PROTOCOL_ERROR, susp, 0), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003956 return -EIO;
Philipp Reisner481c6f52010-06-22 14:03:27 +02003957 }
Philipp Reisner65d922c2010-06-16 16:18:09 +02003958 rv = _drbd_set_state(mdev, ns, cs_flags, NULL);
Philipp Reisner78bae592011-03-28 15:40:12 +02003959 ns = drbd_read_state(mdev);
Philipp Reisner87eeee42011-01-19 14:16:30 +01003960 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003961
3962 if (rv < SS_SUCCESS) {
Philipp Reisner38fa9982011-03-15 18:24:49 +01003963 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003964 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003965 }
3966
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003967 if (os.conn > C_WF_REPORT_PARAMS) {
3968 if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003969 peer_state.disk != D_NEGOTIATING ) {
3970 /* we want resync, peer has not yet decided to sync... */
3971 /* Nowadays only used when forcing a node into primary role and
3972 setting its disk to UpToDate with that */
3973 drbd_send_uuids(mdev);
Lars Ellenbergf479ea02011-10-27 16:52:30 +02003974 drbd_send_current_state(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003975 }
3976 }
3977
Philipp Reisner08b165b2011-09-05 16:22:33 +02003978 clear_bit(DISCARD_MY_DATA, &mdev->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003979
Lars Ellenbergcccac982013-03-19 18:16:46 +01003980 drbd_md_sync(mdev); /* update connected indicator, la_size_sect, ... */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003981
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003982 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003983}
3984
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003985static int receive_sync_uuid(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003986{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003987 struct drbd_conf *mdev;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003988 struct p_rs_uuid *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003989
3990 mdev = vnr_to_mdev(tconn, pi->vnr);
3991 if (!mdev)
3992 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003993
3994 wait_event(mdev->misc_wait,
3995 mdev->state.conn == C_WF_SYNC_UUID ||
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02003996 mdev->state.conn == C_BEHIND ||
Philipp Reisnerb411b362009-09-25 16:07:19 -07003997 mdev->state.conn < C_CONNECTED ||
3998 mdev->state.disk < D_NEGOTIATING);
3999
4000 /* D_ASSERT( mdev->state.conn == C_WF_SYNC_UUID ); */
4001
Philipp Reisnerb411b362009-09-25 16:07:19 -07004002 /* Here the _drbd_uuid_ functions are right, current should
4003 _not_ be rotated into the history */
4004 if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
4005 _drbd_uuid_set(mdev, UI_CURRENT, be64_to_cpu(p->uuid));
4006 _drbd_uuid_set(mdev, UI_BITMAP, 0UL);
4007
Lars Ellenberg62b0da32011-01-20 13:25:21 +01004008 drbd_print_uuids(mdev, "updated sync uuid");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004009 drbd_start_resync(mdev, C_SYNC_TARGET);
4010
4011 put_ldev(mdev);
4012 } else
4013 dev_err(DEV, "Ignoring SyncUUID packet!\n");
4014
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004015 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004016}
4017
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004018/**
4019 * receive_bitmap_plain
4020 *
4021 * Return 0 when done, 1 when another iteration is needed, and a negative error
4022 * code upon failure.
4023 */
4024static int
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004025receive_bitmap_plain(struct drbd_conf *mdev, unsigned int size,
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004026 unsigned long *p, struct bm_xfer_ctx *c)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004027{
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004028 unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE -
4029 drbd_header_size(mdev->tconn);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004030 unsigned int num_words = min_t(size_t, data_size / sizeof(*p),
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004031 c->bm_words - c->word_offset);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004032 unsigned int want = num_words * sizeof(*p);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004033 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004034
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004035 if (want != size) {
4036 dev_err(DEV, "%s:want (%u) != size (%u)\n", __func__, want, size);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004037 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004038 }
4039 if (want == 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004040 return 0;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004041 err = drbd_recv_all(mdev->tconn, p, want);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004042 if (err)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004043 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004044
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004045 drbd_bm_merge_lel(mdev, c->word_offset, num_words, p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004046
4047 c->word_offset += num_words;
4048 c->bit_offset = c->word_offset * BITS_PER_LONG;
4049 if (c->bit_offset > c->bm_bits)
4050 c->bit_offset = c->bm_bits;
4051
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004052 return 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004053}
4054
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004055static enum drbd_bitmap_code dcbp_get_code(struct p_compressed_bm *p)
4056{
4057 return (enum drbd_bitmap_code)(p->encoding & 0x0f);
4058}
4059
4060static int dcbp_get_start(struct p_compressed_bm *p)
4061{
4062 return (p->encoding & 0x80) != 0;
4063}
4064
4065static int dcbp_get_pad_bits(struct p_compressed_bm *p)
4066{
4067 return (p->encoding >> 4) & 0x7;
4068}
4069
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004070/**
4071 * recv_bm_rle_bits
4072 *
4073 * Return 0 when done, 1 when another iteration is needed, and a negative error
4074 * code upon failure.
4075 */
4076static int
Philipp Reisnerb411b362009-09-25 16:07:19 -07004077recv_bm_rle_bits(struct drbd_conf *mdev,
4078 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01004079 struct bm_xfer_ctx *c,
4080 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004081{
4082 struct bitstream bs;
4083 u64 look_ahead;
4084 u64 rl;
4085 u64 tmp;
4086 unsigned long s = c->bit_offset;
4087 unsigned long e;
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004088 int toggle = dcbp_get_start(p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004089 int have;
4090 int bits;
4091
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004092 bitstream_init(&bs, p->code, len, dcbp_get_pad_bits(p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004093
4094 bits = bitstream_get_bits(&bs, &look_ahead, 64);
4095 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004096 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004097
4098 for (have = bits; have > 0; s += rl, toggle = !toggle) {
4099 bits = vli_decode_bits(&rl, look_ahead);
4100 if (bits <= 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004101 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004102
4103 if (toggle) {
4104 e = s + rl -1;
4105 if (e >= c->bm_bits) {
4106 dev_err(DEV, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004107 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004108 }
4109 _drbd_bm_set_bits(mdev, s, e);
4110 }
4111
4112 if (have < bits) {
4113 dev_err(DEV, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n",
4114 have, bits, look_ahead,
4115 (unsigned int)(bs.cur.b - p->code),
4116 (unsigned int)bs.buf_len);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004117 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004118 }
Lars Ellenbergd2da5b02013-10-23 10:59:18 +02004119 /* if we consumed all 64 bits, assign 0; >> 64 is "undefined"; */
4120 if (likely(bits < 64))
4121 look_ahead >>= bits;
4122 else
4123 look_ahead = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004124 have -= bits;
4125
4126 bits = bitstream_get_bits(&bs, &tmp, 64 - have);
4127 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004128 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004129 look_ahead |= tmp << have;
4130 have += bits;
4131 }
4132
4133 c->bit_offset = s;
4134 bm_xfer_ctx_bit_to_word_offset(c);
4135
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004136 return (s != c->bm_bits);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004137}
4138
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004139/**
4140 * decode_bitmap_c
4141 *
4142 * Return 0 when done, 1 when another iteration is needed, and a negative error
4143 * code upon failure.
4144 */
4145static int
Philipp Reisnerb411b362009-09-25 16:07:19 -07004146decode_bitmap_c(struct drbd_conf *mdev,
4147 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01004148 struct bm_xfer_ctx *c,
4149 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004150{
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004151 if (dcbp_get_code(p) == RLE_VLI_Bits)
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004152 return recv_bm_rle_bits(mdev, p, c, len - sizeof(*p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004153
4154 /* other variants had been implemented for evaluation,
4155 * but have been dropped as this one turned out to be "best"
4156 * during all our tests. */
4157
4158 dev_err(DEV, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
Philipp Reisner38fa9982011-03-15 18:24:49 +01004159 conn_request_state(mdev->tconn, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004160 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004161}
4162
4163void INFO_bm_xfer_stats(struct drbd_conf *mdev,
4164 const char *direction, struct bm_xfer_ctx *c)
4165{
4166 /* what would it take to transfer it "plaintext" */
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004167 unsigned int header_size = drbd_header_size(mdev->tconn);
4168 unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE - header_size;
4169 unsigned int plain =
4170 header_size * (DIV_ROUND_UP(c->bm_words, data_size) + 1) +
4171 c->bm_words * sizeof(unsigned long);
4172 unsigned int total = c->bytes[0] + c->bytes[1];
4173 unsigned int r;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004174
4175 /* total can not be zero. but just in case: */
4176 if (total == 0)
4177 return;
4178
4179 /* don't report if not compressed */
4180 if (total >= plain)
4181 return;
4182
4183 /* total < plain. check for overflow, still */
4184 r = (total > UINT_MAX/1000) ? (total / (plain/1000))
4185 : (1000 * total / plain);
4186
4187 if (r > 1000)
4188 r = 1000;
4189
4190 r = 1000 - r;
4191 dev_info(DEV, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
4192 "total %u; compression: %u.%u%%\n",
4193 direction,
4194 c->bytes[1], c->packets[1],
4195 c->bytes[0], c->packets[0],
4196 total, r/10, r % 10);
4197}
4198
4199/* Since we are processing the bitfield from lower addresses to higher,
4200 it does not matter if the process it in 32 bit chunks or 64 bit
4201 chunks as long as it is little endian. (Understand it as byte stream,
4202 beginning with the lowest byte...) If we would use big endian
4203 we would need to process it from the highest address to the lowest,
4204 in order to be agnostic to the 32 vs 64 bits issue.
4205
4206 returns 0 on failure, 1 if we successfully received it. */
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004207static int receive_bitmap(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004208{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004209 struct drbd_conf *mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004210 struct bm_xfer_ctx c;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004211 int err;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004212
4213 mdev = vnr_to_mdev(tconn, pi->vnr);
4214 if (!mdev)
4215 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004216
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004217 drbd_bm_lock(mdev, "receive bitmap", BM_LOCKED_SET_ALLOWED);
4218 /* you are supposed to send additional out-of-sync information
4219 * if you actually set bits during this phase */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004220
Philipp Reisnerb411b362009-09-25 16:07:19 -07004221 c = (struct bm_xfer_ctx) {
4222 .bm_bits = drbd_bm_bits(mdev),
4223 .bm_words = drbd_bm_words(mdev),
4224 };
4225
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004226 for(;;) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004227 if (pi->cmd == P_BITMAP)
4228 err = receive_bitmap_plain(mdev, pi->size, pi->data, &c);
4229 else if (pi->cmd == P_COMPRESSED_BITMAP) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004230 /* MAYBE: sanity check that we speak proto >= 90,
4231 * and the feature is enabled! */
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004232 struct p_compressed_bm *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004233
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004234 if (pi->size > DRBD_SOCKET_BUFFER_SIZE - drbd_header_size(tconn)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004235 dev_err(DEV, "ReportCBitmap packet too large\n");
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004236 err = -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004237 goto out;
4238 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004239 if (pi->size <= sizeof(*p)) {
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004240 dev_err(DEV, "ReportCBitmap packet too small (l:%u)\n", pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004241 err = -EIO;
Andreas Gruenbacher78fcbda2010-12-10 22:18:27 +01004242 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004243 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004244 err = drbd_recv_all(mdev->tconn, p, pi->size);
4245 if (err)
4246 goto out;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004247 err = decode_bitmap_c(mdev, p, &c, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004248 } else {
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004249 dev_warn(DEV, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", pi->cmd);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004250 err = -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004251 goto out;
4252 }
4253
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004254 c.packets[pi->cmd == P_BITMAP]++;
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004255 c.bytes[pi->cmd == P_BITMAP] += drbd_header_size(tconn) + pi->size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004256
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004257 if (err <= 0) {
4258 if (err < 0)
4259 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004260 break;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004261 }
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004262 err = drbd_recv_header(mdev->tconn, pi);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004263 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004264 goto out;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004265 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004266
4267 INFO_bm_xfer_stats(mdev, "receive", &c);
4268
4269 if (mdev->state.conn == C_WF_BITMAP_T) {
Andreas Gruenbacherde1f8e42010-12-10 21:04:00 +01004270 enum drbd_state_rv rv;
4271
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004272 err = drbd_send_bitmap(mdev);
4273 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004274 goto out;
4275 /* Omit CS_ORDERED with this state transition to avoid deadlocks. */
Andreas Gruenbacherde1f8e42010-12-10 21:04:00 +01004276 rv = _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
4277 D_ASSERT(rv == SS_SUCCESS);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004278 } else if (mdev->state.conn != C_WF_BITMAP_S) {
4279 /* admin may have requested C_DISCONNECTING,
4280 * other threads may have noticed network errors */
4281 dev_info(DEV, "unexpected cstate (%s) in receive_bitmap\n",
4282 drbd_conn_str(mdev->state.conn));
4283 }
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004284 err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004285
Philipp Reisnerb411b362009-09-25 16:07:19 -07004286 out:
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004287 drbd_bm_unlock(mdev);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004288 if (!err && mdev->state.conn == C_WF_BITMAP_S)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004289 drbd_start_resync(mdev, C_SYNC_SOURCE);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004290 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004291}
4292
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004293static int receive_skip(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004294{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004295 conn_warn(tconn, "skipping unknown optional packet type %d, l: %d!\n",
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004296 pi->cmd, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004297
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004298 return ignore_remaining_packet(tconn, pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004299}
4300
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004301static int receive_UnplugRemote(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004302{
Philipp Reisnerb411b362009-09-25 16:07:19 -07004303 /* Make sure we've acked all the TCP data associated
4304 * with the data requests being unplugged */
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004305 drbd_tcp_quickack(tconn->data.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004306
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004307 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004308}
4309
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004310static int receive_out_of_sync(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisner73a01a12010-10-27 14:33:00 +02004311{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004312 struct drbd_conf *mdev;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004313 struct p_block_desc *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004314
4315 mdev = vnr_to_mdev(tconn, pi->vnr);
4316 if (!mdev)
4317 return -EIO;
Philipp Reisner73a01a12010-10-27 14:33:00 +02004318
Lars Ellenbergf735e3632010-12-17 21:06:18 +01004319 switch (mdev->state.conn) {
4320 case C_WF_SYNC_UUID:
4321 case C_WF_BITMAP_T:
4322 case C_BEHIND:
4323 break;
4324 default:
4325 dev_err(DEV, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n",
4326 drbd_conn_str(mdev->state.conn));
4327 }
4328
Philipp Reisner73a01a12010-10-27 14:33:00 +02004329 drbd_set_out_of_sync(mdev, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
4330
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004331 return 0;
Philipp Reisner73a01a12010-10-27 14:33:00 +02004332}
4333
Philipp Reisner02918be2010-08-20 14:35:10 +02004334struct data_cmd {
4335 int expect_payload;
4336 size_t pkt_size;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004337 int (*fn)(struct drbd_tconn *, struct packet_info *);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004338};
4339
Philipp Reisner02918be2010-08-20 14:35:10 +02004340static struct data_cmd drbd_cmd_handler[] = {
4341 [P_DATA] = { 1, sizeof(struct p_data), receive_Data },
4342 [P_DATA_REPLY] = { 1, sizeof(struct p_data), receive_DataReply },
4343 [P_RS_DATA_REPLY] = { 1, sizeof(struct p_data), receive_RSDataReply } ,
4344 [P_BARRIER] = { 0, sizeof(struct p_barrier), receive_Barrier } ,
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004345 [P_BITMAP] = { 1, 0, receive_bitmap } ,
4346 [P_COMPRESSED_BITMAP] = { 1, 0, receive_bitmap } ,
4347 [P_UNPLUG_REMOTE] = { 0, 0, receive_UnplugRemote },
Philipp Reisner02918be2010-08-20 14:35:10 +02004348 [P_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
4349 [P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004350 [P_SYNC_PARAM] = { 1, 0, receive_SyncParam },
4351 [P_SYNC_PARAM89] = { 1, 0, receive_SyncParam },
Philipp Reisner02918be2010-08-20 14:35:10 +02004352 [P_PROTOCOL] = { 1, sizeof(struct p_protocol), receive_protocol },
4353 [P_UUIDS] = { 0, sizeof(struct p_uuids), receive_uuids },
4354 [P_SIZES] = { 0, sizeof(struct p_sizes), receive_sizes },
4355 [P_STATE] = { 0, sizeof(struct p_state), receive_state },
4356 [P_STATE_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_state },
4357 [P_SYNC_UUID] = { 0, sizeof(struct p_rs_uuid), receive_sync_uuid },
4358 [P_OV_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
4359 [P_OV_REPLY] = { 1, sizeof(struct p_block_req), receive_DataRequest },
4360 [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest },
4361 [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip },
Philipp Reisner73a01a12010-10-27 14:33:00 +02004362 [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004363 [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_conn_state },
Philipp Reisner036b17e2011-05-16 17:38:11 +02004364 [P_PROTOCOL_UPDATE] = { 1, sizeof(struct p_protocol), receive_protocol },
Philipp Reisner02918be2010-08-20 14:35:10 +02004365};
4366
Philipp Reisnereefc2f72011-02-08 12:55:24 +01004367static void drbdd(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004368{
Philipp Reisner77351055b2011-02-07 17:24:26 +01004369 struct packet_info pi;
Philipp Reisner02918be2010-08-20 14:35:10 +02004370 size_t shs; /* sub header size */
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004371 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004372
Philipp Reisnereefc2f72011-02-08 12:55:24 +01004373 while (get_t_state(&tconn->receiver) == RUNNING) {
Andreas Gruenbacherdeebe192011-03-25 00:01:04 +01004374 struct data_cmd *cmd;
4375
Philipp Reisnereefc2f72011-02-08 12:55:24 +01004376 drbd_thread_current_set_cpu(&tconn->receiver);
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004377 if (drbd_recv_header(tconn, &pi))
Philipp Reisner02918be2010-08-20 14:35:10 +02004378 goto err_out;
4379
Andreas Gruenbacherdeebe192011-03-25 00:01:04 +01004380 cmd = &drbd_cmd_handler[pi.cmd];
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004381 if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) || !cmd->fn)) {
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004382 conn_err(tconn, "Unexpected data packet %s (0x%04x)",
4383 cmdname(pi.cmd), pi.cmd);
Philipp Reisner02918be2010-08-20 14:35:10 +02004384 goto err_out;
Lars Ellenberg0b33a912009-11-16 15:58:04 +01004385 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004386
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004387 shs = cmd->pkt_size;
4388 if (pi.size > shs && !cmd->expect_payload) {
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004389 conn_err(tconn, "No payload expected %s l:%d\n",
4390 cmdname(pi.cmd), pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02004391 goto err_out;
4392 }
4393
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02004394 if (shs) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004395 err = drbd_recv_all_warn(tconn, pi.data, shs);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004396 if (err)
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02004397 goto err_out;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004398 pi.size -= shs;
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02004399 }
4400
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004401 err = cmd->fn(tconn, &pi);
4402 if (err) {
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004403 conn_err(tconn, "error receiving %s, e: %d l: %d!\n",
4404 cmdname(pi.cmd), err, pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02004405 goto err_out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004406 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004407 }
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004408 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004409
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004410 err_out:
4411 conn_request_state(tconn, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004412}
4413
Philipp Reisner0e29d162011-02-18 14:23:11 +01004414void conn_flush_workqueue(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004415{
4416 struct drbd_wq_barrier barr;
4417
4418 barr.w.cb = w_prev_work_done;
Philipp Reisner0e29d162011-02-18 14:23:11 +01004419 barr.w.tconn = tconn;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004420 init_completion(&barr.done);
Lars Ellenbergd5b27b02011-11-14 15:42:37 +01004421 drbd_queue_work(&tconn->sender_work, &barr.w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004422 wait_for_completion(&barr.done);
4423}
4424
Philipp Reisner81fa2e62011-05-04 15:10:30 +02004425static void conn_disconnect(struct drbd_tconn *tconn)
Philipp Reisnerf70b35112010-06-24 14:34:40 +02004426{
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02004427 struct drbd_conf *mdev;
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004428 enum drbd_conns oc;
Philipp Reisner376694a2011-11-07 10:54:28 +01004429 int vnr;
Philipp Reisnerf70b35112010-06-24 14:34:40 +02004430
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004431 if (tconn->cstate == C_STANDALONE)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004432 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004433
Lars Ellenberg545752d2011-12-05 14:39:25 +01004434 /* We are about to start the cleanup after connection loss.
4435 * Make sure drbd_make_request knows about that.
4436 * Usually we should be in some network failure state already,
4437 * but just in case we are not, we fix it up here.
4438 */
Philipp Reisnerb8853db2011-12-13 11:09:16 +01004439 conn_request_state(tconn, NS(conn, C_NETWORK_FAILURE), CS_HARD);
Lars Ellenberg545752d2011-12-05 14:39:25 +01004440
Philipp Reisnerb411b362009-09-25 16:07:19 -07004441 /* asender does not clean up anything. it must not interfere, either */
Philipp Reisner360cc742011-02-08 14:29:53 +01004442 drbd_thread_stop(&tconn->asender);
4443 drbd_free_sock(tconn);
4444
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02004445 rcu_read_lock();
4446 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
4447 kref_get(&mdev->kref);
4448 rcu_read_unlock();
4449 drbd_disconnected(mdev);
4450 kref_put(&mdev->kref, &drbd_minor_destroy);
4451 rcu_read_lock();
4452 }
4453 rcu_read_unlock();
4454
Philipp Reisner12038a32011-11-09 19:18:00 +01004455 if (!list_empty(&tconn->current_epoch->list))
4456 conn_err(tconn, "ASSERTION FAILED: tconn->current_epoch->list not empty\n");
4457 /* ok, no more ee's on the fly, it is safe to reset the epoch_size */
4458 atomic_set(&tconn->current_epoch->epoch_size, 0);
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01004459 tconn->send.seen_any_write_yet = false;
Philipp Reisner12038a32011-11-09 19:18:00 +01004460
Philipp Reisner360cc742011-02-08 14:29:53 +01004461 conn_info(tconn, "Connection closed\n");
4462
Philipp Reisnercb703452011-03-24 11:03:07 +01004463 if (conn_highest_role(tconn) == R_PRIMARY && conn_highest_pdsk(tconn) >= D_UNKNOWN)
4464 conn_try_outdate_peer_async(tconn);
4465
Philipp Reisner360cc742011-02-08 14:29:53 +01004466 spin_lock_irq(&tconn->req_lock);
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004467 oc = tconn->cstate;
4468 if (oc >= C_UNCONNECTED)
Philipp Reisner376694a2011-11-07 10:54:28 +01004469 _conn_request_state(tconn, NS(conn, C_UNCONNECTED), CS_VERBOSE);
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004470
Philipp Reisner360cc742011-02-08 14:29:53 +01004471 spin_unlock_irq(&tconn->req_lock);
4472
Lars Ellenbergf3dfa402011-05-02 10:45:05 +02004473 if (oc == C_DISCONNECTING)
Lars Ellenbergd9cc6e22011-04-27 10:25:28 +02004474 conn_request_state(tconn, NS(conn, C_STANDALONE), CS_VERBOSE | CS_HARD);
Philipp Reisner360cc742011-02-08 14:29:53 +01004475}
4476
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02004477static int drbd_disconnected(struct drbd_conf *mdev)
Philipp Reisner360cc742011-02-08 14:29:53 +01004478{
Philipp Reisner360cc742011-02-08 14:29:53 +01004479 unsigned int i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004480
Philipp Reisner85719572010-07-21 10:20:17 +02004481 /* wait for current activity to cease. */
Philipp Reisner87eeee42011-01-19 14:16:30 +01004482 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004483 _drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
4484 _drbd_wait_ee_list_empty(mdev, &mdev->sync_ee);
4485 _drbd_wait_ee_list_empty(mdev, &mdev->read_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01004486 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004487
4488 /* We do not have data structures that would allow us to
4489 * get the rs_pending_cnt down to 0 again.
4490 * * On C_SYNC_TARGET we do not have any data structures describing
4491 * the pending RSDataRequest's we have sent.
4492 * * On C_SYNC_SOURCE there is no data structure that tracks
4493 * the P_RS_DATA_REPLY blocks that we sent to the SyncTarget.
4494 * And no, it is not the sum of the reference counts in the
4495 * resync_LRU. The resync_LRU tracks the whole operation including
4496 * the disk-IO, while the rs_pending_cnt only tracks the blocks
4497 * on the fly. */
4498 drbd_rs_cancel_all(mdev);
4499 mdev->rs_total = 0;
4500 mdev->rs_failed = 0;
4501 atomic_set(&mdev->rs_pending_cnt, 0);
4502 wake_up(&mdev->misc_wait);
4503
Philipp Reisnerb411b362009-09-25 16:07:19 -07004504 del_timer_sync(&mdev->resync_timer);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004505 resync_timer_fn((unsigned long)mdev);
4506
Philipp Reisnerb411b362009-09-25 16:07:19 -07004507 /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
4508 * w_make_resync_request etc. which may still be on the worker queue
4509 * to be "canceled" */
4510 drbd_flush_workqueue(mdev);
4511
Andreas Gruenbachera990be42011-04-06 17:56:48 +02004512 drbd_finish_peer_reqs(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004513
Philipp Reisnerd10b4ea2011-11-30 23:25:36 +01004514 /* This second workqueue flush is necessary, since drbd_finish_peer_reqs()
4515 might have issued a work again. The one before drbd_finish_peer_reqs() is
4516 necessary to reclain net_ee in drbd_finish_peer_reqs(). */
4517 drbd_flush_workqueue(mdev);
4518
Lars Ellenberg08332d72012-08-17 15:09:13 +02004519 /* need to do it again, drbd_finish_peer_reqs() may have populated it
4520 * again via drbd_try_clear_on_disk_bm(). */
4521 drbd_rs_cancel_all(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004522
4523 kfree(mdev->p_uuid);
4524 mdev->p_uuid = NULL;
4525
Philipp Reisner2aebfab2011-03-28 16:48:11 +02004526 if (!drbd_suspended(mdev))
Philipp Reisner2f5cdd02011-02-21 14:29:27 +01004527 tl_clear(mdev->tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004528
4529 drbd_md_sync(mdev);
4530
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004531 /* serialize with bitmap writeout triggered by the state change,
4532 * if any. */
4533 wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
4534
Philipp Reisnerb411b362009-09-25 16:07:19 -07004535 /* tcp_close and release of sendpage pages can be deferred. I don't
4536 * want to use SO_LINGER, because apparently it can be deferred for
4537 * more than 20 seconds (longest time I checked).
4538 *
4539 * Actually we don't care for exactly when the network stack does its
4540 * put_page(), but release our reference on these pages right here.
4541 */
Andreas Gruenbacher7721f562011-04-06 17:14:02 +02004542 i = drbd_free_peer_reqs(mdev, &mdev->net_ee);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004543 if (i)
4544 dev_info(DEV, "net_ee not empty, killed %u entries\n", i);
Lars Ellenberg435f0742010-09-06 12:30:25 +02004545 i = atomic_read(&mdev->pp_in_use_by_net);
4546 if (i)
4547 dev_info(DEV, "pp_in_use_by_net = %d, expected 0\n", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004548 i = atomic_read(&mdev->pp_in_use);
4549 if (i)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02004550 dev_info(DEV, "pp_in_use = %d, expected 0\n", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004551
4552 D_ASSERT(list_empty(&mdev->read_ee));
4553 D_ASSERT(list_empty(&mdev->active_ee));
4554 D_ASSERT(list_empty(&mdev->sync_ee));
4555 D_ASSERT(list_empty(&mdev->done_ee));
4556
Philipp Reisner360cc742011-02-08 14:29:53 +01004557 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004558}
4559
4560/*
4561 * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version
4562 * we can agree on is stored in agreed_pro_version.
4563 *
4564 * feature flags and the reserved array should be enough room for future
4565 * enhancements of the handshake protocol, and possible plugins...
4566 *
4567 * for now, they are expected to be zero, but ignored.
4568 */
Andreas Gruenbacher60381782011-03-28 17:05:50 +02004569static int drbd_send_features(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004570{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004571 struct drbd_socket *sock;
4572 struct p_connection_features *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004573
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004574 sock = &tconn->data;
4575 p = conn_prepare_command(tconn, sock);
4576 if (!p)
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004577 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004578 memset(p, 0, sizeof(*p));
4579 p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
4580 p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004581 return conn_send_command(tconn, sock, P_CONNECTION_FEATURES, sizeof(*p), NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004582}
4583
4584/*
4585 * return values:
4586 * 1 yes, we have a valid connection
4587 * 0 oops, did not work out, please try again
4588 * -1 peer talks different language,
4589 * no point in trying again, please go standalone.
4590 */
Andreas Gruenbacher60381782011-03-28 17:05:50 +02004591static int drbd_do_features(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004592{
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004593 /* ASSERT current == tconn->receiver ... */
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004594 struct p_connection_features *p;
4595 const int expect = sizeof(struct p_connection_features);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004596 struct packet_info pi;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004597 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004598
Andreas Gruenbacher60381782011-03-28 17:05:50 +02004599 err = drbd_send_features(tconn);
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004600 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004601 return 0;
4602
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004603 err = drbd_recv_header(tconn, &pi);
4604 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004605 return 0;
4606
Andreas Gruenbacher60381782011-03-28 17:05:50 +02004607 if (pi.cmd != P_CONNECTION_FEATURES) {
4608 conn_err(tconn, "expected ConnectionFeatures packet, received: %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004609 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004610 return -1;
4611 }
4612
Philipp Reisner77351055b2011-02-07 17:24:26 +01004613 if (pi.size != expect) {
Andreas Gruenbacher60381782011-03-28 17:05:50 +02004614 conn_err(tconn, "expected ConnectionFeatures length: %u, received: %u\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004615 expect, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004616 return -1;
4617 }
4618
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004619 p = pi.data;
4620 err = drbd_recv_all_warn(tconn, p, expect);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004621 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004622 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004623
Philipp Reisnerb411b362009-09-25 16:07:19 -07004624 p->protocol_min = be32_to_cpu(p->protocol_min);
4625 p->protocol_max = be32_to_cpu(p->protocol_max);
4626 if (p->protocol_max == 0)
4627 p->protocol_max = p->protocol_min;
4628
4629 if (PRO_VERSION_MAX < p->protocol_min ||
4630 PRO_VERSION_MIN > p->protocol_max)
4631 goto incompat;
4632
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004633 tconn->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004634
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004635 conn_info(tconn, "Handshake successful: "
4636 "Agreed network protocol version %d\n", tconn->agreed_pro_version);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004637
4638 return 1;
4639
4640 incompat:
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004641 conn_err(tconn, "incompatible DRBD dialects: "
Philipp Reisnerb411b362009-09-25 16:07:19 -07004642 "I support %d-%d, peer supports %d-%d\n",
4643 PRO_VERSION_MIN, PRO_VERSION_MAX,
4644 p->protocol_min, p->protocol_max);
4645 return -1;
4646}
4647
4648#if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
Philipp Reisner13e60372011-02-08 09:54:40 +01004649static int drbd_do_auth(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004650{
Philipp Reisneref57f9e2013-03-27 14:08:44 +01004651 conn_err(tconn, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
4652 conn_err(tconn, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004653 return -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004654}
4655#else
4656#define CHALLENGE_LEN 64
Johannes Thomab10d96c2010-01-07 16:02:50 +01004657
4658/* Return value:
4659 1 - auth succeeded,
4660 0 - failed, try again (network error),
4661 -1 - auth failed, don't try again.
4662*/
4663
Philipp Reisner13e60372011-02-08 09:54:40 +01004664static int drbd_do_auth(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004665{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004666 struct drbd_socket *sock;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004667 char my_challenge[CHALLENGE_LEN]; /* 64 Bytes... */
4668 struct scatterlist sg;
4669 char *response = NULL;
4670 char *right_response = NULL;
4671 char *peers_ch = NULL;
Philipp Reisner44ed1672011-04-19 17:10:19 +02004672 unsigned int key_len;
4673 char secret[SHARED_SECRET_MAX]; /* 64 byte */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004674 unsigned int resp_size;
4675 struct hash_desc desc;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004676 struct packet_info pi;
Philipp Reisner44ed1672011-04-19 17:10:19 +02004677 struct net_conf *nc;
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004678 int err, rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004679
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004680 /* FIXME: Put the challenge/response into the preallocated socket buffer. */
4681
Philipp Reisner44ed1672011-04-19 17:10:19 +02004682 rcu_read_lock();
4683 nc = rcu_dereference(tconn->net_conf);
4684 key_len = strlen(nc->shared_secret);
4685 memcpy(secret, nc->shared_secret, key_len);
4686 rcu_read_unlock();
4687
Philipp Reisner13e60372011-02-08 09:54:40 +01004688 desc.tfm = tconn->cram_hmac_tfm;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004689 desc.flags = 0;
4690
Philipp Reisner44ed1672011-04-19 17:10:19 +02004691 rv = crypto_hash_setkey(tconn->cram_hmac_tfm, (u8 *)secret, key_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004692 if (rv) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004693 conn_err(tconn, "crypto_hash_setkey() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004694 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004695 goto fail;
4696 }
4697
4698 get_random_bytes(my_challenge, CHALLENGE_LEN);
4699
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004700 sock = &tconn->data;
4701 if (!conn_prepare_command(tconn, sock)) {
4702 rv = 0;
4703 goto fail;
4704 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004705 rv = !conn_send_command(tconn, sock, P_AUTH_CHALLENGE, 0,
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004706 my_challenge, CHALLENGE_LEN);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004707 if (!rv)
4708 goto fail;
4709
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004710 err = drbd_recv_header(tconn, &pi);
4711 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004712 rv = 0;
4713 goto fail;
4714 }
4715
Philipp Reisner77351055b2011-02-07 17:24:26 +01004716 if (pi.cmd != P_AUTH_CHALLENGE) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004717 conn_err(tconn, "expected AuthChallenge packet, received: %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004718 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004719 rv = 0;
4720 goto fail;
4721 }
4722
Philipp Reisner77351055b2011-02-07 17:24:26 +01004723 if (pi.size > CHALLENGE_LEN * 2) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004724 conn_err(tconn, "expected AuthChallenge payload too big.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004725 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004726 goto fail;
4727 }
4728
Philipp Reisner77351055b2011-02-07 17:24:26 +01004729 peers_ch = kmalloc(pi.size, GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004730 if (peers_ch == NULL) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004731 conn_err(tconn, "kmalloc of peers_ch failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004732 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004733 goto fail;
4734 }
4735
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004736 err = drbd_recv_all_warn(tconn, peers_ch, pi.size);
4737 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004738 rv = 0;
4739 goto fail;
4740 }
4741
Philipp Reisner13e60372011-02-08 09:54:40 +01004742 resp_size = crypto_hash_digestsize(tconn->cram_hmac_tfm);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004743 response = kmalloc(resp_size, GFP_NOIO);
4744 if (response == NULL) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004745 conn_err(tconn, "kmalloc of response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004746 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004747 goto fail;
4748 }
4749
4750 sg_init_table(&sg, 1);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004751 sg_set_buf(&sg, peers_ch, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004752
4753 rv = crypto_hash_digest(&desc, &sg, sg.length, response);
4754 if (rv) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004755 conn_err(tconn, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004756 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004757 goto fail;
4758 }
4759
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004760 if (!conn_prepare_command(tconn, sock)) {
4761 rv = 0;
4762 goto fail;
4763 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004764 rv = !conn_send_command(tconn, sock, P_AUTH_RESPONSE, 0,
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004765 response, resp_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004766 if (!rv)
4767 goto fail;
4768
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004769 err = drbd_recv_header(tconn, &pi);
4770 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004771 rv = 0;
4772 goto fail;
4773 }
4774
Philipp Reisner77351055b2011-02-07 17:24:26 +01004775 if (pi.cmd != P_AUTH_RESPONSE) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004776 conn_err(tconn, "expected AuthResponse packet, received: %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004777 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004778 rv = 0;
4779 goto fail;
4780 }
4781
Philipp Reisner77351055b2011-02-07 17:24:26 +01004782 if (pi.size != resp_size) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004783 conn_err(tconn, "expected AuthResponse payload of wrong size\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004784 rv = 0;
4785 goto fail;
4786 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004787
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004788 err = drbd_recv_all_warn(tconn, response , resp_size);
4789 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004790 rv = 0;
4791 goto fail;
4792 }
4793
4794 right_response = kmalloc(resp_size, GFP_NOIO);
Julia Lawall2d1ee872009-12-27 22:27:11 +01004795 if (right_response == NULL) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004796 conn_err(tconn, "kmalloc of right_response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004797 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004798 goto fail;
4799 }
4800
4801 sg_set_buf(&sg, my_challenge, CHALLENGE_LEN);
4802
4803 rv = crypto_hash_digest(&desc, &sg, sg.length, right_response);
4804 if (rv) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004805 conn_err(tconn, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004806 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004807 goto fail;
4808 }
4809
4810 rv = !memcmp(response, right_response, resp_size);
4811
4812 if (rv)
Philipp Reisner44ed1672011-04-19 17:10:19 +02004813 conn_info(tconn, "Peer authenticated using %d bytes HMAC\n",
4814 resp_size);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004815 else
4816 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004817
4818 fail:
4819 kfree(peers_ch);
4820 kfree(response);
4821 kfree(right_response);
4822
4823 return rv;
4824}
4825#endif
4826
4827int drbdd_init(struct drbd_thread *thi)
4828{
Philipp Reisner392c8802011-02-09 10:33:31 +01004829 struct drbd_tconn *tconn = thi->tconn;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004830 int h;
4831
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004832 conn_info(tconn, "receiver (re)started\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004833
4834 do {
Philipp Reisner81fa2e62011-05-04 15:10:30 +02004835 h = conn_connect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004836 if (h == 0) {
Philipp Reisner81fa2e62011-05-04 15:10:30 +02004837 conn_disconnect(tconn);
Philipp Reisner20ee6392011-01-18 15:28:59 +01004838 schedule_timeout_interruptible(HZ);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004839 }
4840 if (h == -1) {
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004841 conn_warn(tconn, "Discarding network configuration.\n");
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004842 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004843 }
4844 } while (h == 0);
4845
Philipp Reisner91fd4da2011-04-20 17:47:29 +02004846 if (h > 0)
4847 drbdd(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004848
Philipp Reisner81fa2e62011-05-04 15:10:30 +02004849 conn_disconnect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004850
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004851 conn_info(tconn, "receiver terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004852 return 0;
4853}
4854
4855/* ********* acknowledge sender ******** */
4856
Andreas Gruenbachere05e1e52011-03-25 15:16:26 +01004857static int got_conn_RqSReply(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004858{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004859 struct p_req_state_reply *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004860 int retcode = be32_to_cpu(p->retcode);
4861
4862 if (retcode >= SS_SUCCESS) {
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004863 set_bit(CONN_WD_ST_CHG_OKAY, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004864 } else {
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004865 set_bit(CONN_WD_ST_CHG_FAIL, &tconn->flags);
4866 conn_err(tconn, "Requested state change failed by peer: %s (%d)\n",
4867 drbd_set_st_err_str(retcode), retcode);
4868 }
4869 wake_up(&tconn->ping_wait);
4870
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004871 return 0;
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004872}
4873
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004874static int got_RqSReply(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004875{
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004876 struct drbd_conf *mdev;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004877 struct p_req_state_reply *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004878 int retcode = be32_to_cpu(p->retcode);
4879
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004880 mdev = vnr_to_mdev(tconn, pi->vnr);
4881 if (!mdev)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004882 return -EIO;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004883
Philipp Reisner4d0fc3f2012-01-20 13:52:27 +01004884 if (test_bit(CONN_WD_ST_CHG_REQ, &tconn->flags)) {
4885 D_ASSERT(tconn->agreed_pro_version < 100);
4886 return got_conn_RqSReply(tconn, pi);
4887 }
4888
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004889 if (retcode >= SS_SUCCESS) {
4890 set_bit(CL_ST_CHG_SUCCESS, &mdev->flags);
4891 } else {
4892 set_bit(CL_ST_CHG_FAIL, &mdev->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004893 dev_err(DEV, "Requested state change failed by peer: %s (%d)\n",
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004894 drbd_set_st_err_str(retcode), retcode);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004895 }
4896 wake_up(&mdev->state_wait);
4897
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004898 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004899}
4900
Andreas Gruenbachere05e1e52011-03-25 15:16:26 +01004901static int got_Ping(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004902{
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004903 return drbd_send_ping_ack(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004904
4905}
4906
Andreas Gruenbachere05e1e52011-03-25 15:16:26 +01004907static int got_PingAck(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004908{
4909 /* restore idle timeout */
Philipp Reisner2a67d8b2011-02-09 14:10:32 +01004910 tconn->meta.socket->sk->sk_rcvtimeo = tconn->net_conf->ping_int*HZ;
4911 if (!test_and_set_bit(GOT_PING_ACK, &tconn->flags))
4912 wake_up(&tconn->ping_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004913
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004914 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004915}
4916
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004917static int got_IsInSync(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004918{
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004919 struct drbd_conf *mdev;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004920 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004921 sector_t sector = be64_to_cpu(p->sector);
4922 int blksize = be32_to_cpu(p->blksize);
4923
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004924 mdev = vnr_to_mdev(tconn, pi->vnr);
4925 if (!mdev)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004926 return -EIO;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004927
Philipp Reisner31890f42011-01-19 14:12:51 +01004928 D_ASSERT(mdev->tconn->agreed_pro_version >= 89);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004929
4930 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4931
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004932 if (get_ldev(mdev)) {
4933 drbd_rs_complete_io(mdev, sector);
4934 drbd_set_in_sync(mdev, sector, blksize);
4935 /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
4936 mdev->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
4937 put_ldev(mdev);
4938 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004939 dec_rs_pending(mdev);
Philipp Reisner778f2712010-07-06 11:14:00 +02004940 atomic_add(blksize >> 9, &mdev->rs_sect_in);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004941
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004942 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004943}
4944
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004945static int
4946validate_req_change_req_state(struct drbd_conf *mdev, u64 id, sector_t sector,
4947 struct rb_root *root, const char *func,
4948 enum drbd_req_event what, bool missing_ok)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004949{
4950 struct drbd_request *req;
4951 struct bio_and_error m;
4952
Philipp Reisner87eeee42011-01-19 14:16:30 +01004953 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004954 req = find_request(mdev, root, id, sector, missing_ok, func);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004955 if (unlikely(!req)) {
Philipp Reisner87eeee42011-01-19 14:16:30 +01004956 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02004957 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004958 }
4959 __req_mod(req, what, &m);
Philipp Reisner87eeee42011-01-19 14:16:30 +01004960 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004961
4962 if (m.bio)
4963 complete_master_bio(mdev, &m);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02004964 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004965}
4966
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004967static int got_BlockAck(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004968{
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004969 struct drbd_conf *mdev;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004970 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004971 sector_t sector = be64_to_cpu(p->sector);
4972 int blksize = be32_to_cpu(p->blksize);
4973 enum drbd_req_event what;
4974
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004975 mdev = vnr_to_mdev(tconn, pi->vnr);
4976 if (!mdev)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004977 return -EIO;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004978
Philipp Reisnerb411b362009-09-25 16:07:19 -07004979 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4980
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01004981 if (p->block_id == ID_SYNCER) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004982 drbd_set_in_sync(mdev, sector, blksize);
4983 dec_rs_pending(mdev);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004984 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004985 }
Andreas Gruenbachere05e1e52011-03-25 15:16:26 +01004986 switch (pi->cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004987 case P_RS_WRITE_ACK:
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004988 what = WRITE_ACKED_BY_PEER_AND_SIS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004989 break;
4990 case P_WRITE_ACK:
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004991 what = WRITE_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004992 break;
4993 case P_RECV_ACK:
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004994 what = RECV_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004995 break;
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02004996 case P_SUPERSEDED:
4997 what = CONFLICT_RESOLVED;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01004998 break;
4999 case P_RETRY_WRITE:
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01005000 what = POSTPONE_WRITE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005001 break;
5002 default:
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005003 BUG();
Philipp Reisnerb411b362009-09-25 16:07:19 -07005004 }
5005
5006 return validate_req_change_req_state(mdev, p->block_id, sector,
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005007 &mdev->write_requests, __func__,
5008 what, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005009}
5010
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005011static int got_NegAck(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005012{
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005013 struct drbd_conf *mdev;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005014 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005015 sector_t sector = be64_to_cpu(p->sector);
Philipp Reisner2deb8332011-01-17 18:39:18 +01005016 int size = be32_to_cpu(p->blksize);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005017 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005018
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005019 mdev = vnr_to_mdev(tconn, pi->vnr);
5020 if (!mdev)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005021 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005022
5023 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
5024
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01005025 if (p->block_id == ID_SYNCER) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07005026 dec_rs_pending(mdev);
5027 drbd_rs_failed_io(mdev, sector, size);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005028 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005029 }
Philipp Reisner2deb8332011-01-17 18:39:18 +01005030
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005031 err = validate_req_change_req_state(mdev, p->block_id, sector,
5032 &mdev->write_requests, __func__,
Philipp Reisner303d1442011-04-13 16:24:47 -07005033 NEG_ACKED, true);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005034 if (err) {
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01005035 /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs.
5036 The master bio might already be completed, therefore the
5037 request is no longer in the collision hash. */
5038 /* In Protocol B we might already have got a P_RECV_ACK
5039 but then get a P_NEG_ACK afterwards. */
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01005040 drbd_set_out_of_sync(mdev, sector, size);
Philipp Reisner2deb8332011-01-17 18:39:18 +01005041 }
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005042 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005043}
5044
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005045static int got_NegDReply(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005046{
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005047 struct drbd_conf *mdev;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005048 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005049 sector_t sector = be64_to_cpu(p->sector);
5050
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005051 mdev = vnr_to_mdev(tconn, pi->vnr);
5052 if (!mdev)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005053 return -EIO;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005054
Philipp Reisnerb411b362009-09-25 16:07:19 -07005055 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01005056
Philipp Reisner380207d2011-11-11 12:31:20 +01005057 dev_err(DEV, "Got NegDReply; Sector %llus, len %u.\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07005058 (unsigned long long)sector, be32_to_cpu(p->blksize));
5059
5060 return validate_req_change_req_state(mdev, p->block_id, sector,
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005061 &mdev->read_requests, __func__,
5062 NEG_ACKED, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005063}
5064
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005065static int got_NegRSDReply(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005066{
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005067 struct drbd_conf *mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005068 sector_t sector;
5069 int size;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005070 struct p_block_ack *p = pi->data;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005071
5072 mdev = vnr_to_mdev(tconn, pi->vnr);
5073 if (!mdev)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005074 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005075
5076 sector = be64_to_cpu(p->sector);
5077 size = be32_to_cpu(p->blksize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005078
5079 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
5080
5081 dec_rs_pending(mdev);
5082
5083 if (get_ldev_if_state(mdev, D_FAILED)) {
5084 drbd_rs_complete_io(mdev, sector);
Andreas Gruenbachere05e1e52011-03-25 15:16:26 +01005085 switch (pi->cmd) {
Philipp Reisnerd612d302010-12-27 10:53:28 +01005086 case P_NEG_RS_DREPLY:
5087 drbd_rs_failed_io(mdev, sector, size);
5088 case P_RS_CANCEL:
5089 break;
5090 default:
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005091 BUG();
Philipp Reisnerd612d302010-12-27 10:53:28 +01005092 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005093 put_ldev(mdev);
5094 }
5095
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005096 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005097}
5098
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005099static int got_BarrierAck(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005100{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005101 struct p_barrier_ack *p = pi->data;
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005102 struct drbd_conf *mdev;
5103 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005104
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005105 tl_release(tconn, p->barrier, be32_to_cpu(p->set_size));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005106
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005107 rcu_read_lock();
5108 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
5109 if (mdev->state.conn == C_AHEAD &&
5110 atomic_read(&mdev->ap_in_flight) == 0 &&
5111 !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &mdev->flags)) {
5112 mdev->start_resync_timer.expires = jiffies + HZ;
5113 add_timer(&mdev->start_resync_timer);
5114 }
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02005115 }
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005116 rcu_read_unlock();
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02005117
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005118 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005119}
5120
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005121static int got_OVResult(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005122{
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005123 struct drbd_conf *mdev;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005124 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005125 struct drbd_work *w;
5126 sector_t sector;
5127 int size;
5128
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005129 mdev = vnr_to_mdev(tconn, pi->vnr);
5130 if (!mdev)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005131 return -EIO;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005132
Philipp Reisnerb411b362009-09-25 16:07:19 -07005133 sector = be64_to_cpu(p->sector);
5134 size = be32_to_cpu(p->blksize);
5135
5136 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
5137
5138 if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
Andreas Gruenbacher8f7bed72010-12-19 23:53:14 +01005139 drbd_ov_out_of_sync_found(mdev, sector, size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005140 else
Andreas Gruenbacher8f7bed72010-12-19 23:53:14 +01005141 ov_out_of_sync_print(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005142
Lars Ellenberg1d53f092010-09-05 01:13:24 +02005143 if (!get_ldev(mdev))
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005144 return 0;
Lars Ellenberg1d53f092010-09-05 01:13:24 +02005145
Philipp Reisnerb411b362009-09-25 16:07:19 -07005146 drbd_rs_complete_io(mdev, sector);
5147 dec_rs_pending(mdev);
5148
Lars Ellenbergea5442a2010-11-05 09:48:01 +01005149 --mdev->ov_left;
5150
5151 /* let's advance progress step marks only for every other megabyte */
5152 if ((mdev->ov_left & 0x200) == 0x200)
5153 drbd_advance_rs_marks(mdev, mdev->ov_left);
5154
5155 if (mdev->ov_left == 0) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07005156 w = kmalloc(sizeof(*w), GFP_NOIO);
5157 if (w) {
5158 w->cb = w_ov_finished;
Philipp Reisnera21e9292011-02-08 15:08:49 +01005159 w->mdev = mdev;
Lars Ellenbergd5b27b02011-11-14 15:42:37 +01005160 drbd_queue_work(&mdev->tconn->sender_work, w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005161 } else {
5162 dev_err(DEV, "kmalloc(w) failed.");
Andreas Gruenbacher8f7bed72010-12-19 23:53:14 +01005163 ov_out_of_sync_print(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005164 drbd_resync_finished(mdev);
5165 }
5166 }
Lars Ellenberg1d53f092010-09-05 01:13:24 +02005167 put_ldev(mdev);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005168 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005169}
5170
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005171static int got_skip(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisner0ced55a2010-04-30 15:26:20 +02005172{
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005173 return 0;
Philipp Reisner0ced55a2010-04-30 15:26:20 +02005174}
5175
Andreas Gruenbachera990be42011-04-06 17:56:48 +02005176static int tconn_finish_peer_reqs(struct drbd_tconn *tconn)
Philipp Reisner32862ec2011-02-08 16:41:01 +01005177{
Philipp Reisner082a3432011-03-15 16:05:42 +01005178 struct drbd_conf *mdev;
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005179 int vnr, not_empty = 0;
Philipp Reisner32862ec2011-02-08 16:41:01 +01005180
5181 do {
5182 clear_bit(SIGNAL_ASENDER, &tconn->flags);
5183 flush_signals(current);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005184
5185 rcu_read_lock();
5186 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
5187 kref_get(&mdev->kref);
5188 rcu_read_unlock();
Philipp Reisnerd3fcb492011-04-13 14:46:05 -07005189 if (drbd_finish_peer_reqs(mdev)) {
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005190 kref_put(&mdev->kref, &drbd_minor_destroy);
5191 return 1;
Philipp Reisnerd3fcb492011-04-13 14:46:05 -07005192 }
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005193 kref_put(&mdev->kref, &drbd_minor_destroy);
5194 rcu_read_lock();
Philipp Reisner082a3432011-03-15 16:05:42 +01005195 }
Philipp Reisner32862ec2011-02-08 16:41:01 +01005196 set_bit(SIGNAL_ASENDER, &tconn->flags);
Philipp Reisner082a3432011-03-15 16:05:42 +01005197
5198 spin_lock_irq(&tconn->req_lock);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005199 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
Philipp Reisner082a3432011-03-15 16:05:42 +01005200 not_empty = !list_empty(&mdev->done_ee);
5201 if (not_empty)
5202 break;
5203 }
5204 spin_unlock_irq(&tconn->req_lock);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005205 rcu_read_unlock();
Philipp Reisner32862ec2011-02-08 16:41:01 +01005206 } while (not_empty);
5207
5208 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005209}
5210
5211struct asender_cmd {
5212 size_t pkt_size;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005213 int (*fn)(struct drbd_tconn *tconn, struct packet_info *);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005214};
5215
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01005216static struct asender_cmd asender_tbl[] = {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005217 [P_PING] = { 0, got_Ping },
5218 [P_PING_ACK] = { 0, got_PingAck },
Philipp Reisnerb411b362009-09-25 16:07:19 -07005219 [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
5220 [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
5221 [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02005222 [P_SUPERSEDED] = { sizeof(struct p_block_ack), got_BlockAck },
Philipp Reisnerb411b362009-09-25 16:07:19 -07005223 [P_NEG_ACK] = { sizeof(struct p_block_ack), got_NegAck },
5224 [P_NEG_DREPLY] = { sizeof(struct p_block_ack), got_NegDReply },
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005225 [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), got_NegRSDReply },
Philipp Reisnerb411b362009-09-25 16:07:19 -07005226 [P_OV_RESULT] = { sizeof(struct p_block_ack), got_OVResult },
5227 [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck },
5228 [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
5229 [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync },
Philipp Reisner02918be2010-08-20 14:35:10 +02005230 [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip },
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005231 [P_RS_CANCEL] = { sizeof(struct p_block_ack), got_NegRSDReply },
5232 [P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), got_conn_RqSReply },
5233 [P_RETRY_WRITE] = { sizeof(struct p_block_ack), got_BlockAck },
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01005234};
Philipp Reisnerb411b362009-09-25 16:07:19 -07005235
5236int drbd_asender(struct drbd_thread *thi)
5237{
Philipp Reisner392c8802011-02-09 10:33:31 +01005238 struct drbd_tconn *tconn = thi->tconn;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005239 struct asender_cmd *cmd = NULL;
Philipp Reisner77351055b2011-02-07 17:24:26 +01005240 struct packet_info pi;
Philipp Reisner257d0af2011-01-26 12:15:29 +01005241 int rv;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005242 void *buf = tconn->meta.rbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005243 int received = 0;
Andreas Gruenbacher52b061a2011-03-30 11:38:49 +02005244 unsigned int header_size = drbd_header_size(tconn);
5245 int expect = header_size;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005246 bool ping_timeout_active = false;
5247 struct net_conf *nc;
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005248 int ping_timeo, tcp_cork, ping_int;
Philipp Reisner3990e042013-03-27 14:08:48 +01005249 struct sched_param param = { .sched_priority = 2 };
Philipp Reisnerb411b362009-09-25 16:07:19 -07005250
Philipp Reisner3990e042013-03-27 14:08:48 +01005251 rv = sched_setscheduler(current, SCHED_RR, &param);
5252 if (rv < 0)
5253 conn_err(tconn, "drbd_asender: ERROR set priority, ret=%d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005254
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +01005255 while (get_t_state(thi) == RUNNING) {
Philipp Reisner80822282011-02-08 12:46:30 +01005256 drbd_thread_current_set_cpu(thi);
Philipp Reisner44ed1672011-04-19 17:10:19 +02005257
5258 rcu_read_lock();
5259 nc = rcu_dereference(tconn->net_conf);
5260 ping_timeo = nc->ping_timeo;
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005261 tcp_cork = nc->tcp_cork;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005262 ping_int = nc->ping_int;
5263 rcu_read_unlock();
5264
Philipp Reisner32862ec2011-02-08 16:41:01 +01005265 if (test_and_clear_bit(SEND_PING, &tconn->flags)) {
Andreas Gruenbachera17647a2011-04-01 12:49:42 +02005266 if (drbd_send_ping(tconn)) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01005267 conn_err(tconn, "drbd_send_ping has failed\n");
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01005268 goto reconnect;
5269 }
Philipp Reisner44ed1672011-04-19 17:10:19 +02005270 tconn->meta.socket->sk->sk_rcvtimeo = ping_timeo * HZ / 10;
5271 ping_timeout_active = true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005272 }
5273
Philipp Reisner32862ec2011-02-08 16:41:01 +01005274 /* TODO: conditionally cork; it may hurt latency if we cork without
5275 much to send */
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005276 if (tcp_cork)
Philipp Reisner32862ec2011-02-08 16:41:01 +01005277 drbd_tcp_cork(tconn->meta.socket);
Andreas Gruenbachera990be42011-04-06 17:56:48 +02005278 if (tconn_finish_peer_reqs(tconn)) {
5279 conn_err(tconn, "tconn_finish_peer_reqs() failed\n");
Philipp Reisner32862ec2011-02-08 16:41:01 +01005280 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005281 }
5282 /* but unconditionally uncork unless disabled */
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005283 if (tcp_cork)
Philipp Reisner32862ec2011-02-08 16:41:01 +01005284 drbd_tcp_uncork(tconn->meta.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005285
5286 /* short circuit, recv_msg would return EINTR anyways. */
5287 if (signal_pending(current))
5288 continue;
5289
Philipp Reisner32862ec2011-02-08 16:41:01 +01005290 rv = drbd_recv_short(tconn->meta.socket, buf, expect-received, 0);
5291 clear_bit(SIGNAL_ASENDER, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005292
5293 flush_signals(current);
5294
5295 /* Note:
5296 * -EINTR (on meta) we got a signal
5297 * -EAGAIN (on meta) rcvtimeo expired
5298 * -ECONNRESET other side closed the connection
5299 * -ERESTARTSYS (on data) we got a signal
5300 * rv < 0 other than above: unexpected error!
5301 * rv == expected: full header or command
5302 * rv < expected: "woken" by signal during receive
5303 * rv == 0 : "connection shut down by peer"
5304 */
5305 if (likely(rv > 0)) {
5306 received += rv;
5307 buf += rv;
5308 } else if (rv == 0) {
Philipp Reisnerb66623e2012-08-08 21:19:09 +02005309 if (test_bit(DISCONNECT_SENT, &tconn->flags)) {
5310 long t;
5311 rcu_read_lock();
5312 t = rcu_dereference(tconn->net_conf)->ping_timeo * HZ/10;
5313 rcu_read_unlock();
5314
5315 t = wait_event_timeout(tconn->ping_wait,
5316 tconn->cstate < C_WF_REPORT_PARAMS,
5317 t);
Philipp Reisner599377a2012-08-17 14:50:22 +02005318 if (t)
5319 break;
5320 }
Philipp Reisner32862ec2011-02-08 16:41:01 +01005321 conn_err(tconn, "meta connection shut down by peer.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005322 goto reconnect;
5323 } else if (rv == -EAGAIN) {
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02005324 /* If the data socket received something meanwhile,
5325 * that is good enough: peer is still alive. */
Philipp Reisner32862ec2011-02-08 16:41:01 +01005326 if (time_after(tconn->last_received,
5327 jiffies - tconn->meta.socket->sk->sk_rcvtimeo))
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02005328 continue;
Lars Ellenbergf36af182011-03-09 22:44:55 +01005329 if (ping_timeout_active) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01005330 conn_err(tconn, "PingAck did not arrive in time.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005331 goto reconnect;
5332 }
Philipp Reisner32862ec2011-02-08 16:41:01 +01005333 set_bit(SEND_PING, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005334 continue;
5335 } else if (rv == -EINTR) {
5336 continue;
5337 } else {
Philipp Reisner32862ec2011-02-08 16:41:01 +01005338 conn_err(tconn, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005339 goto reconnect;
5340 }
5341
5342 if (received == expect && cmd == NULL) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005343 if (decode_header(tconn, tconn->meta.rbuf, &pi))
Philipp Reisnerb411b362009-09-25 16:07:19 -07005344 goto reconnect;
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01005345 cmd = &asender_tbl[pi.cmd];
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005346 if (pi.cmd >= ARRAY_SIZE(asender_tbl) || !cmd->fn) {
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02005347 conn_err(tconn, "Unexpected meta packet %s (0x%04x)\n",
5348 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005349 goto disconnect;
5350 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005351 expect = header_size + cmd->pkt_size;
Andreas Gruenbacher52b061a2011-03-30 11:38:49 +02005352 if (pi.size != expect - header_size) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01005353 conn_err(tconn, "Wrong packet size on meta (c: %d, l: %d)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01005354 pi.cmd, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005355 goto reconnect;
Philipp Reisner257d0af2011-01-26 12:15:29 +01005356 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005357 }
5358 if (received == expect) {
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005359 bool err;
Philipp Reisnera4fbda82011-03-16 11:13:17 +01005360
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005361 err = cmd->fn(tconn, &pi);
5362 if (err) {
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005363 conn_err(tconn, "%pf failed\n", cmd->fn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005364 goto reconnect;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005365 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005366
Philipp Reisnera4fbda82011-03-16 11:13:17 +01005367 tconn->last_received = jiffies;
Lars Ellenbergf36af182011-03-09 22:44:55 +01005368
Philipp Reisner44ed1672011-04-19 17:10:19 +02005369 if (cmd == &asender_tbl[P_PING_ACK]) {
5370 /* restore idle timeout */
5371 tconn->meta.socket->sk->sk_rcvtimeo = ping_int * HZ;
5372 ping_timeout_active = false;
5373 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005374
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005375 buf = tconn->meta.rbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005376 received = 0;
Andreas Gruenbacher52b061a2011-03-30 11:38:49 +02005377 expect = header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005378 cmd = NULL;
5379 }
5380 }
5381
5382 if (0) {
5383reconnect:
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01005384 conn_request_state(tconn, NS(conn, C_NETWORK_FAILURE), CS_HARD);
Philipp Reisner19fffd72012-08-28 16:48:03 +02005385 conn_md_sync(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005386 }
5387 if (0) {
5388disconnect:
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01005389 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005390 }
Philipp Reisner32862ec2011-02-08 16:41:01 +01005391 clear_bit(SIGNAL_ASENDER, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005392
Philipp Reisner32862ec2011-02-08 16:41:01 +01005393 conn_info(tconn, "asender terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005394
5395 return 0;
5396}