blob: 99121076bf123dd088c0ab3627f5207395d897d9 [file] [log] [blame]
Philipp Reisnerb411b362009-09-25 16:07:19 -07001/*
2 drbd_receiver.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23 */
24
25
Philipp Reisnerb411b362009-09-25 16:07:19 -070026#include <linux/module.h>
27
28#include <asm/uaccess.h>
29#include <net/sock.h>
30
Philipp Reisnerb411b362009-09-25 16:07:19 -070031#include <linux/drbd.h>
32#include <linux/fs.h>
33#include <linux/file.h>
34#include <linux/in.h>
35#include <linux/mm.h>
36#include <linux/memcontrol.h>
37#include <linux/mm_inline.h>
38#include <linux/slab.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070039#include <linux/pkt_sched.h>
40#define __KERNEL_SYSCALLS__
41#include <linux/unistd.h>
42#include <linux/vmalloc.h>
43#include <linux/random.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070044#include <linux/string.h>
45#include <linux/scatterlist.h>
46#include "drbd_int.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070047#include "drbd_req.h"
48
49#include "drbd_vli.h"
50
Philipp Reisner77351055b2011-02-07 17:24:26 +010051struct packet_info {
52 enum drbd_packet cmd;
Andreas Gruenbachere2857212011-03-25 00:57:38 +010053 unsigned int size;
54 unsigned int vnr;
Andreas Gruenbachere6589832011-03-30 12:54:42 +020055 void *data;
Philipp Reisner77351055b2011-02-07 17:24:26 +010056};
57
Philipp Reisnerb411b362009-09-25 16:07:19 -070058enum finish_epoch {
59 FE_STILL_LIVE,
60 FE_DESTROYED,
61 FE_RECYCLED,
62};
63
Andreas Gruenbacher60381782011-03-28 17:05:50 +020064static int drbd_do_features(struct drbd_tconn *tconn);
Philipp Reisner13e60372011-02-08 09:54:40 +010065static int drbd_do_auth(struct drbd_tconn *tconn);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +020066static int drbd_disconnected(struct drbd_conf *mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -070067
Philipp Reisner1e9dd292011-11-10 15:14:53 +010068static enum finish_epoch drbd_may_finish_epoch(struct drbd_tconn *, struct drbd_epoch *, enum epoch_event);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +010069static int e_end_block(struct drbd_work *, int);
Philipp Reisnerb411b362009-09-25 16:07:19 -070070
Philipp Reisnerb411b362009-09-25 16:07:19 -070071
72#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
73
Lars Ellenberg45bb9122010-05-14 17:10:48 +020074/*
75 * some helper functions to deal with single linked page lists,
76 * page->private being our "next" pointer.
77 */
78
79/* If at least n pages are linked at head, get n pages off.
80 * Otherwise, don't modify head, and return NULL.
81 * Locking is the responsibility of the caller.
82 */
83static struct page *page_chain_del(struct page **head, int n)
84{
85 struct page *page;
86 struct page *tmp;
87
88 BUG_ON(!n);
89 BUG_ON(!head);
90
91 page = *head;
Philipp Reisner23ce4222010-05-20 13:35:31 +020092
93 if (!page)
94 return NULL;
95
Lars Ellenberg45bb9122010-05-14 17:10:48 +020096 while (page) {
97 tmp = page_chain_next(page);
98 if (--n == 0)
99 break; /* found sufficient pages */
100 if (tmp == NULL)
101 /* insufficient pages, don't use any of them. */
102 return NULL;
103 page = tmp;
104 }
105
106 /* add end of list marker for the returned list */
107 set_page_private(page, 0);
108 /* actual return value, and adjustment of head */
109 page = *head;
110 *head = tmp;
111 return page;
112}
113
114/* may be used outside of locks to find the tail of a (usually short)
115 * "private" page chain, before adding it back to a global chain head
116 * with page_chain_add() under a spinlock. */
117static struct page *page_chain_tail(struct page *page, int *len)
118{
119 struct page *tmp;
120 int i = 1;
121 while ((tmp = page_chain_next(page)))
122 ++i, page = tmp;
123 if (len)
124 *len = i;
125 return page;
126}
127
128static int page_chain_free(struct page *page)
129{
130 struct page *tmp;
131 int i = 0;
132 page_chain_for_each_safe(page, tmp) {
133 put_page(page);
134 ++i;
135 }
136 return i;
137}
138
139static void page_chain_add(struct page **head,
140 struct page *chain_first, struct page *chain_last)
141{
142#if 1
143 struct page *tmp;
144 tmp = page_chain_tail(chain_first, NULL);
145 BUG_ON(tmp != chain_last);
146#endif
147
148 /* add chain to head */
149 set_page_private(chain_last, (unsigned long)*head);
150 *head = chain_first;
151}
152
Andreas Gruenbacher18c2d522011-04-07 21:08:50 +0200153static struct page *__drbd_alloc_pages(struct drbd_conf *mdev,
154 unsigned int number)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700155{
156 struct page *page = NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200157 struct page *tmp = NULL;
Andreas Gruenbacher18c2d522011-04-07 21:08:50 +0200158 unsigned int i = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700159
160 /* Yes, testing drbd_pp_vacant outside the lock is racy.
161 * So what. It saves a spin_lock. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200162 if (drbd_pp_vacant >= number) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700163 spin_lock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200164 page = page_chain_del(&drbd_pp_pool, number);
165 if (page)
166 drbd_pp_vacant -= number;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700167 spin_unlock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200168 if (page)
169 return page;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700170 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200171
Philipp Reisnerb411b362009-09-25 16:07:19 -0700172 /* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD
173 * "criss-cross" setup, that might cause write-out on some other DRBD,
174 * which in turn might block on the other node at this very place. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200175 for (i = 0; i < number; i++) {
176 tmp = alloc_page(GFP_TRY);
177 if (!tmp)
178 break;
179 set_page_private(tmp, (unsigned long)page);
180 page = tmp;
181 }
182
183 if (i == number)
184 return page;
185
186 /* Not enough pages immediately available this time.
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200187 * No need to jump around here, drbd_alloc_pages will retry this
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200188 * function "soon". */
189 if (page) {
190 tmp = page_chain_tail(page, NULL);
191 spin_lock(&drbd_pp_lock);
192 page_chain_add(&drbd_pp_pool, page, tmp);
193 drbd_pp_vacant += i;
194 spin_unlock(&drbd_pp_lock);
195 }
196 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700197}
198
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200199static void reclaim_finished_net_peer_reqs(struct drbd_conf *mdev,
200 struct list_head *to_be_freed)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700201{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100202 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700203 struct list_head *le, *tle;
204
205 /* The EEs are always appended to the end of the list. Since
206 they are sent in order over the wire, they have to finish
207 in order. As soon as we see the first not finished we can
208 stop to examine the list... */
209
210 list_for_each_safe(le, tle, &mdev->net_ee) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100211 peer_req = list_entry(le, struct drbd_peer_request, w.list);
Andreas Gruenbacher045417f2011-04-07 21:34:24 +0200212 if (drbd_peer_req_has_active_page(peer_req))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700213 break;
214 list_move(le, to_be_freed);
215 }
216}
217
218static void drbd_kick_lo_and_reclaim_net(struct drbd_conf *mdev)
219{
220 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100221 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700222
Philipp Reisner87eeee42011-01-19 14:16:30 +0100223 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200224 reclaim_finished_net_peer_reqs(mdev, &reclaimed);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100225 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700226
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100227 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +0200228 drbd_free_net_peer_req(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700229}
230
231/**
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200232 * drbd_alloc_pages() - Returns @number pages, retries forever (or until signalled)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700233 * @mdev: DRBD device.
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200234 * @number: number of pages requested
235 * @retry: whether to retry, if not enough pages are available right now
Philipp Reisnerb411b362009-09-25 16:07:19 -0700236 *
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200237 * Tries to allocate number pages, first from our own page pool, then from
238 * the kernel, unless this allocation would exceed the max_buffers setting.
239 * Possibly retry until DRBD frees sufficient pages somewhere else.
240 *
241 * Returns a page chain linked via page->private.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700242 */
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200243struct page *drbd_alloc_pages(struct drbd_conf *mdev, unsigned int number,
244 bool retry)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700245{
246 struct page *page = NULL;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200247 struct net_conf *nc;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700248 DEFINE_WAIT(wait);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200249 int mxb;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700250
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200251 /* Yes, we may run up to @number over max_buffers. If we
252 * follow it strictly, the admin will get it wrong anyways. */
Philipp Reisner44ed1672011-04-19 17:10:19 +0200253 rcu_read_lock();
254 nc = rcu_dereference(mdev->tconn->net_conf);
255 mxb = nc ? nc->max_buffers : 1000000;
256 rcu_read_unlock();
257
258 if (atomic_read(&mdev->pp_in_use) < mxb)
Andreas Gruenbacher18c2d522011-04-07 21:08:50 +0200259 page = __drbd_alloc_pages(mdev, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700260
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200261 while (page == NULL) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700262 prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
263
264 drbd_kick_lo_and_reclaim_net(mdev);
265
Philipp Reisner44ed1672011-04-19 17:10:19 +0200266 if (atomic_read(&mdev->pp_in_use) < mxb) {
Andreas Gruenbacher18c2d522011-04-07 21:08:50 +0200267 page = __drbd_alloc_pages(mdev, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700268 if (page)
269 break;
270 }
271
272 if (!retry)
273 break;
274
275 if (signal_pending(current)) {
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200276 dev_warn(DEV, "drbd_alloc_pages interrupted!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700277 break;
278 }
279
280 schedule();
281 }
282 finish_wait(&drbd_pp_wait, &wait);
283
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200284 if (page)
285 atomic_add(number, &mdev->pp_in_use);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700286 return page;
287}
288
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200289/* Must not be used from irq, as that may deadlock: see drbd_alloc_pages.
Philipp Reisner87eeee42011-01-19 14:16:30 +0100290 * Is also used from inside an other spin_lock_irq(&mdev->tconn->req_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200291 * Either links the page chain back to the global pool,
292 * or returns all pages to the system. */
Andreas Gruenbacher5cc287e2011-04-07 21:02:59 +0200293static void drbd_free_pages(struct drbd_conf *mdev, struct page *page, int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700294{
Lars Ellenberg435f0742010-09-06 12:30:25 +0200295 atomic_t *a = is_net ? &mdev->pp_in_use_by_net : &mdev->pp_in_use;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700296 int i;
Lars Ellenberg435f0742010-09-06 12:30:25 +0200297
Lars Ellenberg81a35372012-07-30 09:00:54 +0200298 if (page == NULL)
299 return;
300
Philipp Reisner81a5d602011-02-22 19:53:16 -0500301 if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200302 i = page_chain_free(page);
303 else {
304 struct page *tmp;
305 tmp = page_chain_tail(page, &i);
306 spin_lock(&drbd_pp_lock);
307 page_chain_add(&drbd_pp_pool, page, tmp);
308 drbd_pp_vacant += i;
309 spin_unlock(&drbd_pp_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700310 }
Lars Ellenberg435f0742010-09-06 12:30:25 +0200311 i = atomic_sub_return(i, a);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200312 if (i < 0)
Lars Ellenberg435f0742010-09-06 12:30:25 +0200313 dev_warn(DEV, "ASSERTION FAILED: %s: %d < 0\n",
314 is_net ? "pp_in_use_by_net" : "pp_in_use", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700315 wake_up(&drbd_pp_wait);
316}
317
318/*
319You need to hold the req_lock:
320 _drbd_wait_ee_list_empty()
321
322You must not have the req_lock:
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +0200323 drbd_free_peer_req()
Andreas Gruenbacher0db55362011-04-06 16:09:15 +0200324 drbd_alloc_peer_req()
Andreas Gruenbacher7721f562011-04-06 17:14:02 +0200325 drbd_free_peer_reqs()
Philipp Reisnerb411b362009-09-25 16:07:19 -0700326 drbd_ee_fix_bhs()
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200327 drbd_finish_peer_reqs()
Philipp Reisnerb411b362009-09-25 16:07:19 -0700328 drbd_clear_done_ee()
329 drbd_wait_ee_list_empty()
330*/
331
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100332struct drbd_peer_request *
Andreas Gruenbacher0db55362011-04-06 16:09:15 +0200333drbd_alloc_peer_req(struct drbd_conf *mdev, u64 id, sector_t sector,
334 unsigned int data_size, gfp_t gfp_mask) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700335{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100336 struct drbd_peer_request *peer_req;
Lars Ellenberg81a35372012-07-30 09:00:54 +0200337 struct page *page = NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200338 unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700339
Andreas Gruenbacher0cf9d272010-12-07 10:43:29 +0100340 if (drbd_insert_fault(mdev, DRBD_FAULT_AL_EE))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700341 return NULL;
342
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100343 peer_req = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
344 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700345 if (!(gfp_mask & __GFP_NOWARN))
Andreas Gruenbacher0db55362011-04-06 16:09:15 +0200346 dev_err(DEV, "%s: allocation failed\n", __func__);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700347 return NULL;
348 }
349
Lars Ellenberg81a35372012-07-30 09:00:54 +0200350 if (data_size) {
351 page = drbd_alloc_pages(mdev, nr_pages, (gfp_mask & __GFP_WAIT));
352 if (!page)
353 goto fail;
354 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700355
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100356 drbd_clear_interval(&peer_req->i);
357 peer_req->i.size = data_size;
358 peer_req->i.sector = sector;
359 peer_req->i.local = false;
360 peer_req->i.waiting = false;
Andreas Gruenbacher53840642011-01-28 10:31:04 +0100361
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100362 peer_req->epoch = NULL;
Philipp Reisnera21e9292011-02-08 15:08:49 +0100363 peer_req->w.mdev = mdev;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100364 peer_req->pages = page;
365 atomic_set(&peer_req->pending_bios, 0);
366 peer_req->flags = 0;
Andreas Gruenbacher9a8e7752011-01-11 14:04:09 +0100367 /*
368 * The block_id is opaque to the receiver. It is not endianness
369 * converted, and sent back to the sender unchanged.
370 */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100371 peer_req->block_id = id;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700372
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100373 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700374
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200375 fail:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100376 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700377 return NULL;
378}
379
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +0200380void __drbd_free_peer_req(struct drbd_conf *mdev, struct drbd_peer_request *peer_req,
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100381 int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700382{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100383 if (peer_req->flags & EE_HAS_DIGEST)
384 kfree(peer_req->digest);
Andreas Gruenbacher5cc287e2011-04-07 21:02:59 +0200385 drbd_free_pages(mdev, peer_req->pages, is_net);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100386 D_ASSERT(atomic_read(&peer_req->pending_bios) == 0);
387 D_ASSERT(drbd_interval_empty(&peer_req->i));
388 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700389}
390
Andreas Gruenbacher7721f562011-04-06 17:14:02 +0200391int drbd_free_peer_reqs(struct drbd_conf *mdev, struct list_head *list)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700392{
393 LIST_HEAD(work_list);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100394 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700395 int count = 0;
Lars Ellenberg435f0742010-09-06 12:30:25 +0200396 int is_net = list == &mdev->net_ee;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700397
Philipp Reisner87eeee42011-01-19 14:16:30 +0100398 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700399 list_splice_init(list, &work_list);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100400 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700401
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100402 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +0200403 __drbd_free_peer_req(mdev, peer_req, is_net);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700404 count++;
405 }
406 return count;
407}
408
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200409/*
410 * See also comments in _req_mod(,BARRIER_ACKED) and receive_Barrier.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700411 */
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200412static int drbd_finish_peer_reqs(struct drbd_conf *mdev)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700413{
414 LIST_HEAD(work_list);
415 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100416 struct drbd_peer_request *peer_req, *t;
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100417 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700418
Philipp Reisner87eeee42011-01-19 14:16:30 +0100419 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200420 reclaim_finished_net_peer_reqs(mdev, &reclaimed);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700421 list_splice_init(&mdev->done_ee, &work_list);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100422 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700423
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100424 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +0200425 drbd_free_net_peer_req(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700426
427 /* possible callbacks here:
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +0200428 * e_end_block, and e_end_resync_block, e_send_superseded.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700429 * all ignore the last argument.
430 */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100431 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100432 int err2;
433
Philipp Reisnerb411b362009-09-25 16:07:19 -0700434 /* list_del not necessary, next/prev members not touched */
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100435 err2 = peer_req->w.cb(&peer_req->w, !!err);
436 if (!err)
437 err = err2;
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +0200438 drbd_free_peer_req(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700439 }
440 wake_up(&mdev->ee_wait);
441
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100442 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700443}
444
Andreas Gruenbacherd4da1532011-04-07 00:06:56 +0200445static void _drbd_wait_ee_list_empty(struct drbd_conf *mdev,
446 struct list_head *head)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700447{
448 DEFINE_WAIT(wait);
449
450 /* avoids spin_lock/unlock
451 * and calling prepare_to_wait in the fast path */
452 while (!list_empty(head)) {
453 prepare_to_wait(&mdev->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100454 spin_unlock_irq(&mdev->tconn->req_lock);
Jens Axboe7eaceac2011-03-10 08:52:07 +0100455 io_schedule();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700456 finish_wait(&mdev->ee_wait, &wait);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100457 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700458 }
459}
460
Andreas Gruenbacherd4da1532011-04-07 00:06:56 +0200461static void drbd_wait_ee_list_empty(struct drbd_conf *mdev,
462 struct list_head *head)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700463{
Philipp Reisner87eeee42011-01-19 14:16:30 +0100464 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700465 _drbd_wait_ee_list_empty(mdev, head);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100466 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700467}
468
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100469static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700470{
471 mm_segment_t oldfs;
472 struct kvec iov = {
473 .iov_base = buf,
474 .iov_len = size,
475 };
476 struct msghdr msg = {
477 .msg_iovlen = 1,
478 .msg_iov = (struct iovec *)&iov,
479 .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
480 };
481 int rv;
482
483 oldfs = get_fs();
484 set_fs(KERNEL_DS);
485 rv = sock_recvmsg(sock, &msg, size, msg.msg_flags);
486 set_fs(oldfs);
487
488 return rv;
489}
490
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100491static int drbd_recv(struct drbd_tconn *tconn, void *buf, size_t size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700492{
493 mm_segment_t oldfs;
494 struct kvec iov = {
495 .iov_base = buf,
496 .iov_len = size,
497 };
498 struct msghdr msg = {
499 .msg_iovlen = 1,
500 .msg_iov = (struct iovec *)&iov,
501 .msg_flags = MSG_WAITALL | MSG_NOSIGNAL
502 };
503 int rv;
504
505 oldfs = get_fs();
506 set_fs(KERNEL_DS);
507
508 for (;;) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100509 rv = sock_recvmsg(tconn->data.socket, &msg, size, msg.msg_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700510 if (rv == size)
511 break;
512
513 /* Note:
514 * ECONNRESET other side closed the connection
515 * ERESTARTSYS (on sock) we got a signal
516 */
517
518 if (rv < 0) {
519 if (rv == -ECONNRESET)
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100520 conn_info(tconn, "sock was reset by peer\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700521 else if (rv != -ERESTARTSYS)
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100522 conn_err(tconn, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700523 break;
524 } else if (rv == 0) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100525 conn_info(tconn, "sock was shut down by peer\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700526 break;
527 } else {
528 /* signal came in, or peer/link went down,
529 * after we read a partial message
530 */
531 /* D_ASSERT(signal_pending(current)); */
532 break;
533 }
534 };
535
536 set_fs(oldfs);
537
538 if (rv != size)
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100539 conn_request_state(tconn, NS(conn, C_BROKEN_PIPE), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700540
541 return rv;
542}
543
Andreas Gruenbacherc6967742011-03-17 17:15:20 +0100544static int drbd_recv_all(struct drbd_tconn *tconn, void *buf, size_t size)
545{
546 int err;
547
548 err = drbd_recv(tconn, buf, size);
549 if (err != size) {
550 if (err >= 0)
551 err = -EIO;
552 } else
553 err = 0;
554 return err;
555}
556
Andreas Gruenbachera5c31902011-03-24 03:28:04 +0100557static int drbd_recv_all_warn(struct drbd_tconn *tconn, void *buf, size_t size)
558{
559 int err;
560
561 err = drbd_recv_all(tconn, buf, size);
562 if (err && !signal_pending(current))
563 conn_warn(tconn, "short read (expected size %d)\n", (int)size);
564 return err;
565}
566
Lars Ellenberg5dbf1672010-05-25 16:18:01 +0200567/* quoting tcp(7):
568 * On individual connections, the socket buffer size must be set prior to the
569 * listen(2) or connect(2) calls in order to have it take effect.
570 * This is our wrapper to do so.
571 */
572static void drbd_setbufsize(struct socket *sock, unsigned int snd,
573 unsigned int rcv)
574{
575 /* open coded SO_SNDBUF, SO_RCVBUF */
576 if (snd) {
577 sock->sk->sk_sndbuf = snd;
578 sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
579 }
580 if (rcv) {
581 sock->sk->sk_rcvbuf = rcv;
582 sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
583 }
584}
585
Philipp Reisnereac3e992011-02-07 14:05:07 +0100586static struct socket *drbd_try_connect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700587{
588 const char *what;
589 struct socket *sock;
590 struct sockaddr_in6 src_in6;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200591 struct sockaddr_in6 peer_in6;
592 struct net_conf *nc;
593 int err, peer_addr_len, my_addr_len;
Andreas Gruenbacher69ef82d2011-05-11 14:34:35 +0200594 int sndbuf_size, rcvbuf_size, connect_int;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700595 int disconnect_on_error = 1;
596
Philipp Reisner44ed1672011-04-19 17:10:19 +0200597 rcu_read_lock();
598 nc = rcu_dereference(tconn->net_conf);
599 if (!nc) {
600 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700601 return NULL;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200602 }
Philipp Reisner44ed1672011-04-19 17:10:19 +0200603 sndbuf_size = nc->sndbuf_size;
604 rcvbuf_size = nc->rcvbuf_size;
Andreas Gruenbacher69ef82d2011-05-11 14:34:35 +0200605 connect_int = nc->connect_int;
Andreas Gruenbacher089c0752011-06-14 18:28:09 +0200606 rcu_read_unlock();
Philipp Reisner44ed1672011-04-19 17:10:19 +0200607
Andreas Gruenbacher089c0752011-06-14 18:28:09 +0200608 my_addr_len = min_t(int, tconn->my_addr_len, sizeof(src_in6));
609 memcpy(&src_in6, &tconn->my_addr, my_addr_len);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200610
Andreas Gruenbacher089c0752011-06-14 18:28:09 +0200611 if (((struct sockaddr *)&tconn->my_addr)->sa_family == AF_INET6)
Philipp Reisner44ed1672011-04-19 17:10:19 +0200612 src_in6.sin6_port = 0;
613 else
614 ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
615
Andreas Gruenbacher089c0752011-06-14 18:28:09 +0200616 peer_addr_len = min_t(int, tconn->peer_addr_len, sizeof(src_in6));
617 memcpy(&peer_in6, &tconn->peer_addr, peer_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700618
619 what = "sock_create_kern";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200620 err = sock_create_kern(((struct sockaddr *)&src_in6)->sa_family,
621 SOCK_STREAM, IPPROTO_TCP, &sock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700622 if (err < 0) {
623 sock = NULL;
624 goto out;
625 }
626
627 sock->sk->sk_rcvtimeo =
Andreas Gruenbacher69ef82d2011-05-11 14:34:35 +0200628 sock->sk->sk_sndtimeo = connect_int * HZ;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200629 drbd_setbufsize(sock, sndbuf_size, rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700630
631 /* explicitly bind to the configured IP as source IP
632 * for the outgoing connections.
633 * This is needed for multihomed hosts and to be
634 * able to use lo: interfaces for drbd.
635 * Make sure to use 0 as port number, so linux selects
636 * a free one dynamically.
637 */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700638 what = "bind before connect";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200639 err = sock->ops->bind(sock, (struct sockaddr *) &src_in6, my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700640 if (err < 0)
641 goto out;
642
643 /* connect may fail, peer not yet available.
644 * stay C_WF_CONNECTION, don't go Disconnecting! */
645 disconnect_on_error = 0;
646 what = "connect";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200647 err = sock->ops->connect(sock, (struct sockaddr *) &peer_in6, peer_addr_len, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700648
649out:
650 if (err < 0) {
651 if (sock) {
652 sock_release(sock);
653 sock = NULL;
654 }
655 switch (-err) {
656 /* timeout, busy, signal pending */
657 case ETIMEDOUT: case EAGAIN: case EINPROGRESS:
658 case EINTR: case ERESTARTSYS:
659 /* peer not (yet) available, network problem */
660 case ECONNREFUSED: case ENETUNREACH:
661 case EHOSTDOWN: case EHOSTUNREACH:
662 disconnect_on_error = 0;
663 break;
664 default:
Philipp Reisnereac3e992011-02-07 14:05:07 +0100665 conn_err(tconn, "%s failed, err = %d\n", what, err);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700666 }
667 if (disconnect_on_error)
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100668 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700669 }
Philipp Reisner44ed1672011-04-19 17:10:19 +0200670
Philipp Reisnerb411b362009-09-25 16:07:19 -0700671 return sock;
672}
673
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200674struct accept_wait_data {
675 struct drbd_tconn *tconn;
676 struct socket *s_listen;
677 struct completion door_bell;
678 void (*original_sk_state_change)(struct sock *sk);
679
680};
681
682static void incomming_connection(struct sock *sk)
683{
684 struct accept_wait_data *ad = sk->sk_user_data;
685 struct drbd_tconn *tconn = ad->tconn;
686
687 if (sk->sk_state != TCP_ESTABLISHED)
688 conn_warn(tconn, "unexpected tcp state change. sk_state = %d\n", sk->sk_state);
689
690 write_lock_bh(&sk->sk_callback_lock);
691 sk->sk_state_change = ad->original_sk_state_change;
692 sk->sk_user_data = NULL;
693 write_unlock_bh(&sk->sk_callback_lock);
694
695 sk->sk_state_change(sk);
696 complete(&ad->door_bell);
697}
698
699static int prepare_listen_socket(struct drbd_tconn *tconn, struct accept_wait_data *ad)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700700{
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200701 int err, sndbuf_size, rcvbuf_size, my_addr_len;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200702 struct sockaddr_in6 my_addr;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200703 struct socket *s_listen;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200704 struct net_conf *nc;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700705 const char *what;
706
Philipp Reisner44ed1672011-04-19 17:10:19 +0200707 rcu_read_lock();
708 nc = rcu_dereference(tconn->net_conf);
709 if (!nc) {
710 rcu_read_unlock();
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200711 return -EIO;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200712 }
Philipp Reisner44ed1672011-04-19 17:10:19 +0200713 sndbuf_size = nc->sndbuf_size;
714 rcvbuf_size = nc->rcvbuf_size;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200715 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700716
Andreas Gruenbacher089c0752011-06-14 18:28:09 +0200717 my_addr_len = min_t(int, tconn->my_addr_len, sizeof(struct sockaddr_in6));
718 memcpy(&my_addr, &tconn->my_addr, my_addr_len);
719
Philipp Reisnerb411b362009-09-25 16:07:19 -0700720 what = "sock_create_kern";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200721 err = sock_create_kern(((struct sockaddr *)&my_addr)->sa_family,
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200722 SOCK_STREAM, IPPROTO_TCP, &s_listen);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700723 if (err) {
724 s_listen = NULL;
725 goto out;
726 }
727
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200728 s_listen->sk->sk_reuse = 1; /* SO_REUSEADDR */
Philipp Reisner44ed1672011-04-19 17:10:19 +0200729 drbd_setbufsize(s_listen, sndbuf_size, rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700730
731 what = "bind before listen";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200732 err = s_listen->ops->bind(s_listen, (struct sockaddr *)&my_addr, my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700733 if (err < 0)
734 goto out;
735
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200736 ad->s_listen = s_listen;
737 write_lock_bh(&s_listen->sk->sk_callback_lock);
738 ad->original_sk_state_change = s_listen->sk->sk_state_change;
739 s_listen->sk->sk_state_change = incomming_connection;
740 s_listen->sk->sk_user_data = ad;
741 write_unlock_bh(&s_listen->sk->sk_callback_lock);
742
Philipp Reisner2820fd32012-07-12 10:22:48 +0200743 what = "listen";
744 err = s_listen->ops->listen(s_listen, 5);
745 if (err < 0)
746 goto out;
747
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200748 return 0;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200749out:
750 if (s_listen)
751 sock_release(s_listen);
752 if (err < 0) {
753 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
754 conn_err(tconn, "%s failed, err = %d\n", what, err);
755 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
756 }
757 }
758
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200759 return -EIO;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200760}
761
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200762static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn, struct accept_wait_data *ad)
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200763{
764 int timeo, connect_int, err = 0;
765 struct socket *s_estab = NULL;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200766 struct net_conf *nc;
767
768 rcu_read_lock();
769 nc = rcu_dereference(tconn->net_conf);
770 if (!nc) {
771 rcu_read_unlock();
772 return NULL;
773 }
774 connect_int = nc->connect_int;
775 rcu_read_unlock();
776
777 timeo = connect_int * HZ;
778 timeo += (random32() & 1) ? timeo / 7 : -timeo / 7; /* 28.5% random jitter */
779
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200780 err = wait_for_completion_interruptible_timeout(&ad->door_bell, timeo);
781 if (err <= 0)
782 return NULL;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200783
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200784 err = kernel_accept(ad->s_listen, &s_estab, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700785 if (err < 0) {
786 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200787 conn_err(tconn, "accept failed, err = %d\n", err);
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100788 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700789 }
790 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700791
792 return s_estab;
793}
794
Andreas Gruenbachere6589832011-03-30 12:54:42 +0200795static int decode_header(struct drbd_tconn *, void *, struct packet_info *);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700796
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200797static int send_first_packet(struct drbd_tconn *tconn, struct drbd_socket *sock,
798 enum drbd_packet cmd)
799{
800 if (!conn_prepare_command(tconn, sock))
801 return -EIO;
Andreas Gruenbachere6589832011-03-30 12:54:42 +0200802 return conn_send_command(tconn, sock, cmd, 0, NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700803}
804
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200805static int receive_first_packet(struct drbd_tconn *tconn, struct socket *sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700806{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200807 unsigned int header_size = drbd_header_size(tconn);
808 struct packet_info pi;
809 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700810
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200811 err = drbd_recv_short(sock, tconn->data.rbuf, header_size, 0);
812 if (err != header_size) {
813 if (err >= 0)
814 err = -EIO;
815 return err;
816 }
817 err = decode_header(tconn, tconn->data.rbuf, &pi);
818 if (err)
819 return err;
820 return pi.cmd;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700821}
822
823/**
824 * drbd_socket_okay() - Free the socket if its connection is not okay
Philipp Reisnerb411b362009-09-25 16:07:19 -0700825 * @sock: pointer to the pointer to the socket.
826 */
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100827static int drbd_socket_okay(struct socket **sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700828{
829 int rr;
830 char tb[4];
831
832 if (!*sock)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100833 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700834
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100835 rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700836
837 if (rr > 0 || rr == -EAGAIN) {
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100838 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700839 } else {
840 sock_release(*sock);
841 *sock = NULL;
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100842 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700843 }
844}
Philipp Reisner2325eb62011-03-15 16:56:18 +0100845/* Gets called if a connection is established, or if a new minor gets created
846 in a connection */
Philipp Reisnerc141ebd2011-05-05 16:13:10 +0200847int drbd_connected(struct drbd_conf *mdev)
Philipp Reisner907599e2011-02-08 11:25:37 +0100848{
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100849 int err;
Philipp Reisner907599e2011-02-08 11:25:37 +0100850
851 atomic_set(&mdev->packet_seq, 0);
852 mdev->peer_seq = 0;
853
Philipp Reisner8410da82011-02-11 20:11:10 +0100854 mdev->state_mutex = mdev->tconn->agreed_pro_version < 100 ?
855 &mdev->tconn->cstate_mutex :
856 &mdev->own_state_mutex;
857
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100858 err = drbd_send_sync_param(mdev);
859 if (!err)
860 err = drbd_send_sizes(mdev, 0, 0);
861 if (!err)
862 err = drbd_send_uuids(mdev);
863 if (!err)
Philipp Reisner43de7c82011-11-10 13:16:13 +0100864 err = drbd_send_current_state(mdev);
Philipp Reisner907599e2011-02-08 11:25:37 +0100865 clear_bit(USE_DEGR_WFC_T, &mdev->flags);
866 clear_bit(RESIZE_PENDING, &mdev->flags);
Philipp Reisner8b924f12011-03-01 11:08:28 +0100867 mod_timer(&mdev->request_timer, jiffies + HZ); /* just start it here. */
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100868 return err;
Philipp Reisner907599e2011-02-08 11:25:37 +0100869}
870
Philipp Reisnerb411b362009-09-25 16:07:19 -0700871/*
872 * return values:
873 * 1 yes, we have a valid connection
874 * 0 oops, did not work out, please try again
875 * -1 peer talks different language,
876 * no point in trying again, please go standalone.
877 * -2 We do not have a network config...
878 */
Philipp Reisner81fa2e62011-05-04 15:10:30 +0200879static int conn_connect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700880{
Philipp Reisner7da35862011-12-19 22:42:56 +0100881 struct drbd_socket sock, msock;
Philipp Reisnerc141ebd2011-05-05 16:13:10 +0200882 struct drbd_conf *mdev;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200883 struct net_conf *nc;
Philipp Reisner92f14952012-08-01 11:41:01 +0200884 int vnr, timeout, h, ok;
Philipp Reisner08b165b2011-09-05 16:22:33 +0200885 bool discard_my_data;
Philipp Reisnera1096a62012-04-06 12:07:34 +0200886 enum drbd_state_rv rv;
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200887 struct accept_wait_data ad = {
888 .tconn = tconn,
889 .door_bell = COMPLETION_INITIALIZER_ONSTACK(ad.door_bell),
890 };
Philipp Reisnerb411b362009-09-25 16:07:19 -0700891
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100892 if (conn_request_state(tconn, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700893 return -2;
894
Philipp Reisner7da35862011-12-19 22:42:56 +0100895 mutex_init(&sock.mutex);
896 sock.sbuf = tconn->data.sbuf;
897 sock.rbuf = tconn->data.rbuf;
898 sock.socket = NULL;
899 mutex_init(&msock.mutex);
900 msock.sbuf = tconn->meta.sbuf;
901 msock.rbuf = tconn->meta.rbuf;
902 msock.socket = NULL;
903
Andreas Gruenbacher0916e0e2011-03-21 14:10:15 +0100904 /* Assume that the peer only understands protocol 80 until we know better. */
905 tconn->agreed_pro_version = 80;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700906
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200907 if (prepare_listen_socket(tconn, &ad))
908 return 0;
909
Philipp Reisnerb411b362009-09-25 16:07:19 -0700910 do {
Andreas Gruenbacher2bf89622011-03-28 16:33:12 +0200911 struct socket *s;
912
Philipp Reisner92f14952012-08-01 11:41:01 +0200913 s = drbd_try_connect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700914 if (s) {
Philipp Reisner7da35862011-12-19 22:42:56 +0100915 if (!sock.socket) {
916 sock.socket = s;
917 send_first_packet(tconn, &sock, P_INITIAL_DATA);
918 } else if (!msock.socket) {
Lars Ellenberg427c0432012-08-01 12:43:01 +0200919 clear_bit(RESOLVE_CONFLICTS, &tconn->flags);
Philipp Reisner7da35862011-12-19 22:42:56 +0100920 msock.socket = s;
921 send_first_packet(tconn, &msock, P_INITIAL_META);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700922 } else {
Philipp Reisner81fa2e62011-05-04 15:10:30 +0200923 conn_err(tconn, "Logic error in conn_connect()\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700924 goto out_release_sockets;
925 }
926 }
927
Philipp Reisner7da35862011-12-19 22:42:56 +0100928 if (sock.socket && msock.socket) {
929 rcu_read_lock();
930 nc = rcu_dereference(tconn->net_conf);
931 timeout = nc->ping_timeo * HZ / 10;
932 rcu_read_unlock();
933 schedule_timeout_interruptible(timeout);
934 ok = drbd_socket_okay(&sock.socket);
935 ok = drbd_socket_okay(&msock.socket) && ok;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700936 if (ok)
937 break;
938 }
939
940retry:
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200941 s = drbd_wait_for_connect(tconn, &ad);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700942 if (s) {
Philipp Reisner92f14952012-08-01 11:41:01 +0200943 int fp = receive_first_packet(tconn, s);
Philipp Reisner7da35862011-12-19 22:42:56 +0100944 drbd_socket_okay(&sock.socket);
945 drbd_socket_okay(&msock.socket);
Philipp Reisner92f14952012-08-01 11:41:01 +0200946 switch (fp) {
Andreas Gruenbachere5d6f332011-03-28 16:44:40 +0200947 case P_INITIAL_DATA:
Philipp Reisner7da35862011-12-19 22:42:56 +0100948 if (sock.socket) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100949 conn_warn(tconn, "initial packet S crossed\n");
Philipp Reisner7da35862011-12-19 22:42:56 +0100950 sock_release(sock.socket);
Philipp Reisner80c6eed2012-08-01 14:53:39 +0200951 sock.socket = s;
952 goto randomize;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700953 }
Philipp Reisner7da35862011-12-19 22:42:56 +0100954 sock.socket = s;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700955 break;
Andreas Gruenbachere5d6f332011-03-28 16:44:40 +0200956 case P_INITIAL_META:
Lars Ellenberg427c0432012-08-01 12:43:01 +0200957 set_bit(RESOLVE_CONFLICTS, &tconn->flags);
Philipp Reisner7da35862011-12-19 22:42:56 +0100958 if (msock.socket) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100959 conn_warn(tconn, "initial packet M crossed\n");
Philipp Reisner7da35862011-12-19 22:42:56 +0100960 sock_release(msock.socket);
Philipp Reisner80c6eed2012-08-01 14:53:39 +0200961 msock.socket = s;
962 goto randomize;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700963 }
Philipp Reisner7da35862011-12-19 22:42:56 +0100964 msock.socket = s;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700965 break;
966 default:
Philipp Reisner907599e2011-02-08 11:25:37 +0100967 conn_warn(tconn, "Error receiving initial packet\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700968 sock_release(s);
Philipp Reisner80c6eed2012-08-01 14:53:39 +0200969randomize:
Philipp Reisnerb411b362009-09-25 16:07:19 -0700970 if (random32() & 1)
971 goto retry;
972 }
973 }
974
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100975 if (tconn->cstate <= C_DISCONNECTING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700976 goto out_release_sockets;
977 if (signal_pending(current)) {
978 flush_signals(current);
979 smp_rmb();
Philipp Reisner907599e2011-02-08 11:25:37 +0100980 if (get_t_state(&tconn->receiver) == EXITING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700981 goto out_release_sockets;
982 }
983
Philipp Reisnerb666dbf2012-07-26 14:12:59 +0200984 ok = drbd_socket_okay(&sock.socket);
985 ok = drbd_socket_okay(&msock.socket) && ok;
986 } while (!ok);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700987
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200988 if (ad.s_listen)
989 sock_release(ad.s_listen);
990
Philipp Reisner7da35862011-12-19 22:42:56 +0100991 sock.socket->sk->sk_reuse = 1; /* SO_REUSEADDR */
992 msock.socket->sk->sk_reuse = 1; /* SO_REUSEADDR */
Andreas Gruenbacher2bf89622011-03-28 16:33:12 +0200993
Philipp Reisner7da35862011-12-19 22:42:56 +0100994 sock.socket->sk->sk_allocation = GFP_NOIO;
995 msock.socket->sk->sk_allocation = GFP_NOIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700996
Philipp Reisner7da35862011-12-19 22:42:56 +0100997 sock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
998 msock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700999
Philipp Reisnerb411b362009-09-25 16:07:19 -07001000 /* NOT YET ...
Philipp Reisner7da35862011-12-19 22:42:56 +01001001 * sock.socket->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10;
1002 * sock.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
Andreas Gruenbacher60381782011-03-28 17:05:50 +02001003 * first set it to the P_CONNECTION_FEATURES timeout,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001004 * which we set to 4x the configured ping_timeout. */
Philipp Reisner44ed1672011-04-19 17:10:19 +02001005 rcu_read_lock();
1006 nc = rcu_dereference(tconn->net_conf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001007
Philipp Reisner7da35862011-12-19 22:42:56 +01001008 sock.socket->sk->sk_sndtimeo =
1009 sock.socket->sk->sk_rcvtimeo = nc->ping_timeo*4*HZ/10;
Philipp Reisner44ed1672011-04-19 17:10:19 +02001010
Philipp Reisner7da35862011-12-19 22:42:56 +01001011 msock.socket->sk->sk_rcvtimeo = nc->ping_int*HZ;
Philipp Reisner44ed1672011-04-19 17:10:19 +02001012 timeout = nc->timeout * HZ / 10;
Philipp Reisner08b165b2011-09-05 16:22:33 +02001013 discard_my_data = nc->discard_my_data;
Philipp Reisner44ed1672011-04-19 17:10:19 +02001014 rcu_read_unlock();
1015
Philipp Reisner7da35862011-12-19 22:42:56 +01001016 msock.socket->sk->sk_sndtimeo = timeout;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001017
1018 /* we don't want delays.
Lucas De Marchi25985ed2011-03-30 22:57:33 -03001019 * we use TCP_CORK where appropriate, though */
Philipp Reisner7da35862011-12-19 22:42:56 +01001020 drbd_tcp_nodelay(sock.socket);
1021 drbd_tcp_nodelay(msock.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001022
Philipp Reisner7da35862011-12-19 22:42:56 +01001023 tconn->data.socket = sock.socket;
1024 tconn->meta.socket = msock.socket;
Philipp Reisner907599e2011-02-08 11:25:37 +01001025 tconn->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001026
Andreas Gruenbacher60381782011-03-28 17:05:50 +02001027 h = drbd_do_features(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001028 if (h <= 0)
1029 return h;
1030
Philipp Reisner907599e2011-02-08 11:25:37 +01001031 if (tconn->cram_hmac_tfm) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001032 /* drbd_request_state(mdev, NS(conn, WFAuth)); */
Philipp Reisner907599e2011-02-08 11:25:37 +01001033 switch (drbd_do_auth(tconn)) {
Johannes Thomab10d96c2010-01-07 16:02:50 +01001034 case -1:
Philipp Reisner907599e2011-02-08 11:25:37 +01001035 conn_err(tconn, "Authentication of peer failed\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001036 return -1;
Johannes Thomab10d96c2010-01-07 16:02:50 +01001037 case 0:
Philipp Reisner907599e2011-02-08 11:25:37 +01001038 conn_err(tconn, "Authentication of peer failed, trying again.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01001039 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001040 }
1041 }
1042
Philipp Reisner7da35862011-12-19 22:42:56 +01001043 tconn->data.socket->sk->sk_sndtimeo = timeout;
1044 tconn->data.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001045
Andreas Gruenbacher387eb302011-03-16 01:05:37 +01001046 if (drbd_send_protocol(tconn) == -EOPNOTSUPP)
Philipp Reisner7e2455c2010-04-22 14:50:23 +02001047 return -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001048
Philipp Reisnera1096a62012-04-06 12:07:34 +02001049 set_bit(STATE_SENT, &tconn->flags);
1050
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02001051 rcu_read_lock();
1052 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
1053 kref_get(&mdev->kref);
1054 rcu_read_unlock();
Philipp Reisner08b165b2011-09-05 16:22:33 +02001055
1056 if (discard_my_data)
1057 set_bit(DISCARD_MY_DATA, &mdev->flags);
1058 else
1059 clear_bit(DISCARD_MY_DATA, &mdev->flags);
1060
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02001061 drbd_connected(mdev);
1062 kref_put(&mdev->kref, &drbd_minor_destroy);
1063 rcu_read_lock();
1064 }
1065 rcu_read_unlock();
1066
Philipp Reisnera1096a62012-04-06 12:07:34 +02001067 rv = conn_request_state(tconn, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE);
1068 if (rv < SS_SUCCESS) {
1069 clear_bit(STATE_SENT, &tconn->flags);
Philipp Reisner823bd832012-11-08 15:04:36 +01001070 return 0;
Philipp Reisnera1096a62012-04-06 12:07:34 +02001071 }
Philipp Reisner823bd832012-11-08 15:04:36 +01001072
1073 drbd_thread_start(&tconn->asender);
1074
Philipp Reisner08b165b2011-09-05 16:22:33 +02001075 mutex_lock(&tconn->conf_update);
1076 /* The discard_my_data flag is a single-shot modifier to the next
1077 * connection attempt, the handshake of which is now well underway.
1078 * No need for rcu style copying of the whole struct
1079 * just to clear a single value. */
1080 tconn->net_conf->discard_my_data = 0;
1081 mutex_unlock(&tconn->conf_update);
1082
Philipp Reisnerd3fcb492011-04-13 14:46:05 -07001083 return h;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001084
1085out_release_sockets:
Philipp Reisner7a426fd2012-07-12 14:22:37 +02001086 if (ad.s_listen)
1087 sock_release(ad.s_listen);
Philipp Reisner7da35862011-12-19 22:42:56 +01001088 if (sock.socket)
1089 sock_release(sock.socket);
1090 if (msock.socket)
1091 sock_release(msock.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001092 return -1;
1093}
1094
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001095static int decode_header(struct drbd_tconn *tconn, void *header, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001096{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001097 unsigned int header_size = drbd_header_size(tconn);
1098
Andreas Gruenbacher0c8e36d2011-03-30 16:00:17 +02001099 if (header_size == sizeof(struct p_header100) &&
1100 *(__be32 *)header == cpu_to_be32(DRBD_MAGIC_100)) {
1101 struct p_header100 *h = header;
1102 if (h->pad != 0) {
1103 conn_err(tconn, "Header padding is not zero\n");
1104 return -EINVAL;
1105 }
1106 pi->vnr = be16_to_cpu(h->volume);
1107 pi->cmd = be16_to_cpu(h->command);
1108 pi->size = be32_to_cpu(h->length);
1109 } else if (header_size == sizeof(struct p_header95) &&
1110 *(__be16 *)header == cpu_to_be16(DRBD_MAGIC_BIG)) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001111 struct p_header95 *h = header;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001112 pi->cmd = be16_to_cpu(h->command);
Andreas Gruenbacherb55d84b2011-03-22 13:17:47 +01001113 pi->size = be32_to_cpu(h->length);
1114 pi->vnr = 0;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001115 } else if (header_size == sizeof(struct p_header80) &&
1116 *(__be32 *)header == cpu_to_be32(DRBD_MAGIC)) {
1117 struct p_header80 *h = header;
1118 pi->cmd = be16_to_cpu(h->command);
1119 pi->size = be16_to_cpu(h->length);
Philipp Reisner77351055b2011-02-07 17:24:26 +01001120 pi->vnr = 0;
Philipp Reisner02918be2010-08-20 14:35:10 +02001121 } else {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001122 conn_err(tconn, "Wrong magic value 0x%08x in protocol version %d\n",
1123 be32_to_cpu(*(__be32 *)header),
1124 tconn->agreed_pro_version);
Andreas Gruenbacher8172f3e2011-03-16 17:22:39 +01001125 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001126 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001127 pi->data = header + header_size;
Andreas Gruenbacher8172f3e2011-03-16 17:22:39 +01001128 return 0;
Philipp Reisner257d0af2011-01-26 12:15:29 +01001129}
1130
Philipp Reisner9ba7aa02011-02-07 17:32:41 +01001131static int drbd_recv_header(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisner257d0af2011-01-26 12:15:29 +01001132{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001133 void *buffer = tconn->data.rbuf;
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001134 int err;
Philipp Reisner257d0af2011-01-26 12:15:29 +01001135
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001136 err = drbd_recv_all_warn(tconn, buffer, drbd_header_size(tconn));
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001137 if (err)
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001138 return err;
Philipp Reisner257d0af2011-01-26 12:15:29 +01001139
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001140 err = decode_header(tconn, buffer, pi);
Philipp Reisner9ba7aa02011-02-07 17:32:41 +01001141 tconn->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001142
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001143 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001144}
1145
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001146static void drbd_flush(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001147{
1148 int rv;
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001149 struct drbd_conf *mdev;
1150 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001151
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001152 if (tconn->write_ordering >= WO_bdev_flush) {
Lars Ellenberg615e0872011-11-17 14:32:12 +01001153 rcu_read_lock();
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001154 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
Lars Ellenberg615e0872011-11-17 14:32:12 +01001155 if (!get_ldev(mdev))
1156 continue;
1157 kref_get(&mdev->kref);
1158 rcu_read_unlock();
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001159
Lars Ellenberg615e0872011-11-17 14:32:12 +01001160 rv = blkdev_issue_flush(mdev->ldev->backing_bdev,
1161 GFP_NOIO, NULL);
1162 if (rv) {
1163 dev_info(DEV, "local disk flush failed with status %d\n", rv);
1164 /* would rather check on EOPNOTSUPP, but that is not reliable.
1165 * don't try again for ANY return value != 0
1166 * if (rv == -EOPNOTSUPP) */
1167 drbd_bump_write_ordering(tconn, WO_drain_io);
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001168 }
Lars Ellenberg615e0872011-11-17 14:32:12 +01001169 put_ldev(mdev);
1170 kref_put(&mdev->kref, &drbd_minor_destroy);
1171
1172 rcu_read_lock();
1173 if (rv)
1174 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001175 }
Lars Ellenberg615e0872011-11-17 14:32:12 +01001176 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07001177 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001178}
1179
1180/**
1181 * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it.
1182 * @mdev: DRBD device.
1183 * @epoch: Epoch object.
1184 * @ev: Epoch event.
1185 */
Philipp Reisner1e9dd292011-11-10 15:14:53 +01001186static enum finish_epoch drbd_may_finish_epoch(struct drbd_tconn *tconn,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001187 struct drbd_epoch *epoch,
1188 enum epoch_event ev)
1189{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001190 int epoch_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001191 struct drbd_epoch *next_epoch;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001192 enum finish_epoch rv = FE_STILL_LIVE;
1193
Philipp Reisner12038a32011-11-09 19:18:00 +01001194 spin_lock(&tconn->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001195 do {
1196 next_epoch = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001197
1198 epoch_size = atomic_read(&epoch->epoch_size);
1199
1200 switch (ev & ~EV_CLEANUP) {
1201 case EV_PUT:
1202 atomic_dec(&epoch->active);
1203 break;
1204 case EV_GOT_BARRIER_NR:
1205 set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001206 break;
1207 case EV_BECAME_LAST:
1208 /* nothing to do*/
1209 break;
1210 }
1211
Philipp Reisnerb411b362009-09-25 16:07:19 -07001212 if (epoch_size != 0 &&
1213 atomic_read(&epoch->active) == 0 &&
Philipp Reisner85d735132011-07-18 15:45:15 +02001214 (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) || ev & EV_CLEANUP)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001215 if (!(ev & EV_CLEANUP)) {
Philipp Reisner12038a32011-11-09 19:18:00 +01001216 spin_unlock(&tconn->epoch_lock);
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001217 drbd_send_b_ack(epoch->tconn, epoch->barrier_nr, epoch_size);
Philipp Reisner12038a32011-11-09 19:18:00 +01001218 spin_lock(&tconn->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001219 }
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001220#if 0
1221 /* FIXME: dec unacked on connection, once we have
1222 * something to count pending connection packets in. */
Philipp Reisner85d735132011-07-18 15:45:15 +02001223 if (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags))
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001224 dec_unacked(epoch->tconn);
1225#endif
Philipp Reisnerb411b362009-09-25 16:07:19 -07001226
Philipp Reisner12038a32011-11-09 19:18:00 +01001227 if (tconn->current_epoch != epoch) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001228 next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
1229 list_del(&epoch->list);
1230 ev = EV_BECAME_LAST | (ev & EV_CLEANUP);
Philipp Reisner12038a32011-11-09 19:18:00 +01001231 tconn->epochs--;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001232 kfree(epoch);
1233
1234 if (rv == FE_STILL_LIVE)
1235 rv = FE_DESTROYED;
1236 } else {
1237 epoch->flags = 0;
1238 atomic_set(&epoch->epoch_size, 0);
Uwe Kleine-König698f9312010-07-02 20:41:51 +02001239 /* atomic_set(&epoch->active, 0); is already zero */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001240 if (rv == FE_STILL_LIVE)
1241 rv = FE_RECYCLED;
1242 }
1243 }
1244
1245 if (!next_epoch)
1246 break;
1247
1248 epoch = next_epoch;
1249 } while (1);
1250
Philipp Reisner12038a32011-11-09 19:18:00 +01001251 spin_unlock(&tconn->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001252
Philipp Reisnerb411b362009-09-25 16:07:19 -07001253 return rv;
1254}
1255
1256/**
1257 * drbd_bump_write_ordering() - Fall back to an other write ordering method
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001258 * @tconn: DRBD connection.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001259 * @wo: Write ordering method to try.
1260 */
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001261void drbd_bump_write_ordering(struct drbd_tconn *tconn, enum write_ordering_e wo)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001262{
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001263 struct disk_conf *dc;
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001264 struct drbd_conf *mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001265 enum write_ordering_e pwo;
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001266 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001267 static char *write_ordering_str[] = {
1268 [WO_none] = "none",
1269 [WO_drain_io] = "drain",
1270 [WO_bdev_flush] = "flush",
Philipp Reisnerb411b362009-09-25 16:07:19 -07001271 };
1272
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001273 pwo = tconn->write_ordering;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001274 wo = min(pwo, wo);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001275 rcu_read_lock();
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001276 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
Philipp Reisner27eb13e2012-03-30 14:12:15 +02001277 if (!get_ldev_if_state(mdev, D_ATTACHING))
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001278 continue;
1279 dc = rcu_dereference(mdev->ldev->disk_conf);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001280
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001281 if (wo == WO_bdev_flush && !dc->disk_flushes)
1282 wo = WO_drain_io;
1283 if (wo == WO_drain_io && !dc->disk_drain)
1284 wo = WO_none;
1285 put_ldev(mdev);
1286 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001287 rcu_read_unlock();
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001288 tconn->write_ordering = wo;
1289 if (pwo != tconn->write_ordering || wo == WO_bdev_flush)
1290 conn_info(tconn, "Method to ensure write ordering: %s\n", write_ordering_str[tconn->write_ordering]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001291}
1292
1293/**
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001294 * drbd_submit_peer_request()
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001295 * @mdev: DRBD device.
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001296 * @peer_req: peer request
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001297 * @rw: flag field, see bio->bi_rw
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001298 *
1299 * May spread the pages to multiple bios,
1300 * depending on bio_add_page restrictions.
1301 *
1302 * Returns 0 if all bios have been submitted,
1303 * -ENOMEM if we could not allocate enough bios,
1304 * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a
1305 * single page to an empty bio (which should never happen and likely indicates
1306 * that the lower level IO stack is in some way broken). This has been observed
1307 * on certain Xen deployments.
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001308 */
1309/* TODO allocate from our own bio_set. */
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001310int drbd_submit_peer_request(struct drbd_conf *mdev,
1311 struct drbd_peer_request *peer_req,
1312 const unsigned rw, const int fault_type)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001313{
1314 struct bio *bios = NULL;
1315 struct bio *bio;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001316 struct page *page = peer_req->pages;
1317 sector_t sector = peer_req->i.sector;
1318 unsigned ds = peer_req->i.size;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001319 unsigned n_bios = 0;
1320 unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT;
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001321 int err = -ENOMEM;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001322
1323 /* In most cases, we will only need one bio. But in case the lower
1324 * level restrictions happen to be different at this offset on this
1325 * side than those of the sending peer, we may need to submit the
Lars Ellenbergda4a75d2011-02-23 17:02:01 +01001326 * request in more than one bio.
1327 *
1328 * Plain bio_alloc is good enough here, this is no DRBD internally
1329 * generated bio, but a bio allocated on behalf of the peer.
1330 */
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001331next_bio:
1332 bio = bio_alloc(GFP_NOIO, nr_pages);
1333 if (!bio) {
1334 dev_err(DEV, "submit_ee: Allocation of a bio failed\n");
1335 goto fail;
1336 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001337 /* > peer_req->i.sector, unless this is the first bio */
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001338 bio->bi_sector = sector;
1339 bio->bi_bdev = mdev->ldev->backing_bdev;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001340 bio->bi_rw = rw;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001341 bio->bi_private = peer_req;
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001342 bio->bi_end_io = drbd_peer_request_endio;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001343
1344 bio->bi_next = bios;
1345 bios = bio;
1346 ++n_bios;
1347
1348 page_chain_for_each(page) {
1349 unsigned len = min_t(unsigned, ds, PAGE_SIZE);
1350 if (!bio_add_page(bio, page, len, 0)) {
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001351 /* A single page must always be possible!
1352 * But in case it fails anyways,
1353 * we deal with it, and complain (below). */
1354 if (bio->bi_vcnt == 0) {
1355 dev_err(DEV,
1356 "bio_add_page failed for len=%u, "
1357 "bi_vcnt=0 (bi_sector=%llu)\n",
1358 len, (unsigned long long)bio->bi_sector);
1359 err = -ENOSPC;
1360 goto fail;
1361 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001362 goto next_bio;
1363 }
1364 ds -= len;
1365 sector += len >> 9;
1366 --nr_pages;
1367 }
1368 D_ASSERT(page == NULL);
1369 D_ASSERT(ds == 0);
1370
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001371 atomic_set(&peer_req->pending_bios, n_bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001372 do {
1373 bio = bios;
1374 bios = bios->bi_next;
1375 bio->bi_next = NULL;
1376
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001377 drbd_generic_make_request(mdev, fault_type, bio);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001378 } while (bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001379 return 0;
1380
1381fail:
1382 while (bios) {
1383 bio = bios;
1384 bios = bios->bi_next;
1385 bio_put(bio);
1386 }
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001387 return err;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001388}
1389
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001390static void drbd_remove_epoch_entry_interval(struct drbd_conf *mdev,
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001391 struct drbd_peer_request *peer_req)
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001392{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001393 struct drbd_interval *i = &peer_req->i;
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001394
1395 drbd_remove_interval(&mdev->write_requests, i);
1396 drbd_clear_interval(i);
1397
Andreas Gruenbacher6c852be2011-02-04 15:38:52 +01001398 /* Wake up any processes waiting for this peer request to complete. */
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001399 if (i->waiting)
1400 wake_up(&mdev->misc_wait);
1401}
1402
Philipp Reisner77fede52011-11-10 21:19:11 +01001403void conn_wait_active_ee_empty(struct drbd_tconn *tconn)
1404{
1405 struct drbd_conf *mdev;
1406 int vnr;
1407
1408 rcu_read_lock();
1409 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
1410 kref_get(&mdev->kref);
1411 rcu_read_unlock();
1412 drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
1413 kref_put(&mdev->kref, &drbd_minor_destroy);
1414 rcu_read_lock();
1415 }
1416 rcu_read_unlock();
1417}
1418
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001419static int receive_Barrier(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001420{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001421 int rv;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001422 struct p_barrier *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001423 struct drbd_epoch *epoch;
1424
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001425 /* FIXME these are unacked on connection,
1426 * not a specific (peer)device.
1427 */
Philipp Reisner12038a32011-11-09 19:18:00 +01001428 tconn->current_epoch->barrier_nr = p->barrier;
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001429 tconn->current_epoch->tconn = tconn;
Philipp Reisner1e9dd292011-11-10 15:14:53 +01001430 rv = drbd_may_finish_epoch(tconn, tconn->current_epoch, EV_GOT_BARRIER_NR);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001431
1432 /* P_BARRIER_ACK may imply that the corresponding extent is dropped from
1433 * the activity log, which means it would not be resynced in case the
1434 * R_PRIMARY crashes now.
1435 * Therefore we must send the barrier_ack after the barrier request was
1436 * completed. */
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001437 switch (tconn->write_ordering) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001438 case WO_none:
1439 if (rv == FE_RECYCLED)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001440 return 0;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001441
1442 /* receiver context, in the writeout path of the other node.
1443 * avoid potential distributed deadlock */
1444 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1445 if (epoch)
1446 break;
1447 else
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001448 conn_warn(tconn, "Allocation of an epoch failed, slowing down\n");
Philipp Reisner2451fc32010-08-24 13:43:11 +02001449 /* Fall through */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001450
1451 case WO_bdev_flush:
1452 case WO_drain_io:
Philipp Reisner77fede52011-11-10 21:19:11 +01001453 conn_wait_active_ee_empty(tconn);
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001454 drbd_flush(tconn);
Philipp Reisner2451fc32010-08-24 13:43:11 +02001455
Philipp Reisner12038a32011-11-09 19:18:00 +01001456 if (atomic_read(&tconn->current_epoch->epoch_size)) {
Philipp Reisner2451fc32010-08-24 13:43:11 +02001457 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1458 if (epoch)
1459 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001460 }
1461
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001462 return 0;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001463 default:
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001464 conn_err(tconn, "Strangeness in tconn->write_ordering %d\n", tconn->write_ordering);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001465 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001466 }
1467
1468 epoch->flags = 0;
1469 atomic_set(&epoch->epoch_size, 0);
1470 atomic_set(&epoch->active, 0);
1471
Philipp Reisner12038a32011-11-09 19:18:00 +01001472 spin_lock(&tconn->epoch_lock);
1473 if (atomic_read(&tconn->current_epoch->epoch_size)) {
1474 list_add(&epoch->list, &tconn->current_epoch->list);
1475 tconn->current_epoch = epoch;
1476 tconn->epochs++;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001477 } else {
1478 /* The current_epoch got recycled while we allocated this one... */
1479 kfree(epoch);
1480 }
Philipp Reisner12038a32011-11-09 19:18:00 +01001481 spin_unlock(&tconn->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001482
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001483 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001484}
1485
1486/* used from receive_RSDataReply (recv_resync_read)
1487 * and from receive_Data */
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +01001488static struct drbd_peer_request *
1489read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector,
1490 int data_size) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001491{
Lars Ellenberg66660322010-04-06 12:15:04 +02001492 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001493 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001494 struct page *page;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001495 int dgs, ds, err;
Philipp Reisnera0638452011-01-19 14:31:32 +01001496 void *dig_in = mdev->tconn->int_dig_in;
1497 void *dig_vv = mdev->tconn->int_dig_vv;
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001498 unsigned long *data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001499
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001500 dgs = 0;
1501 if (mdev->tconn->peer_integrity_tfm) {
1502 dgs = crypto_hash_digestsize(mdev->tconn->peer_integrity_tfm);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001503 /*
1504 * FIXME: Receive the incoming digest into the receive buffer
1505 * here, together with its struct p_data?
1506 */
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001507 err = drbd_recv_all_warn(mdev->tconn, dig_in, dgs);
1508 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001509 return NULL;
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001510 data_size -= dgs;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001511 }
1512
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01001513 if (!expect(IS_ALIGNED(data_size, 512)))
1514 return NULL;
1515 if (!expect(data_size <= DRBD_MAX_BIO_SIZE))
1516 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001517
Lars Ellenberg66660322010-04-06 12:15:04 +02001518 /* even though we trust out peer,
1519 * we sometimes have to double check. */
1520 if (sector + (data_size>>9) > capacity) {
Lars Ellenbergfdda6542011-01-24 15:11:01 +01001521 dev_err(DEV, "request from peer beyond end of local disk: "
1522 "capacity: %llus < sector: %llus + size: %u\n",
Lars Ellenberg66660322010-04-06 12:15:04 +02001523 (unsigned long long)capacity,
1524 (unsigned long long)sector, data_size);
1525 return NULL;
1526 }
1527
Philipp Reisnerb411b362009-09-25 16:07:19 -07001528 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
1529 * "criss-cross" setup, that might cause write-out on some other DRBD,
1530 * which in turn might block on the other node at this very place. */
Andreas Gruenbacher0db55362011-04-06 16:09:15 +02001531 peer_req = drbd_alloc_peer_req(mdev, id, sector, data_size, GFP_NOIO);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001532 if (!peer_req)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001533 return NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001534
Lars Ellenberg81a35372012-07-30 09:00:54 +02001535 if (!data_size)
1536 return peer_req;
1537
Philipp Reisnerb411b362009-09-25 16:07:19 -07001538 ds = data_size;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001539 page = peer_req->pages;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001540 page_chain_for_each(page) {
1541 unsigned len = min_t(int, ds, PAGE_SIZE);
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001542 data = kmap(page);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001543 err = drbd_recv_all_warn(mdev->tconn, data, len);
Andreas Gruenbacher0cf9d272010-12-07 10:43:29 +01001544 if (drbd_insert_fault(mdev, DRBD_FAULT_RECEIVE)) {
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001545 dev_err(DEV, "Fault injection: Corrupting data on receive\n");
1546 data[0] = data[0] ^ (unsigned long)-1;
1547 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001548 kunmap(page);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001549 if (err) {
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +02001550 drbd_free_peer_req(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001551 return NULL;
1552 }
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001553 ds -= len;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001554 }
1555
1556 if (dgs) {
Andreas Gruenbacher5b614ab2011-04-27 21:00:12 +02001557 drbd_csum_ee(mdev, mdev->tconn->peer_integrity_tfm, peer_req, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001558 if (memcmp(dig_in, dig_vv, dgs)) {
Lars Ellenberg470be442010-11-10 10:36:52 +01001559 dev_err(DEV, "Digest integrity check FAILED: %llus +%u\n",
1560 (unsigned long long)sector, data_size);
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +02001561 drbd_free_peer_req(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001562 return NULL;
1563 }
1564 }
1565 mdev->recv_cnt += data_size>>9;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001566 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001567}
1568
1569/* drbd_drain_block() just takes a data block
1570 * out of the socket input buffer, and discards it.
1571 */
1572static int drbd_drain_block(struct drbd_conf *mdev, int data_size)
1573{
1574 struct page *page;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001575 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001576 void *data;
1577
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001578 if (!data_size)
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001579 return 0;
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001580
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +02001581 page = drbd_alloc_pages(mdev, 1, 1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001582
1583 data = kmap(page);
1584 while (data_size) {
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001585 unsigned int len = min_t(int, data_size, PAGE_SIZE);
1586
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001587 err = drbd_recv_all_warn(mdev->tconn, data, len);
1588 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001589 break;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001590 data_size -= len;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001591 }
1592 kunmap(page);
Andreas Gruenbacher5cc287e2011-04-07 21:02:59 +02001593 drbd_free_pages(mdev, page, 0);
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001594 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001595}
1596
1597static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req,
1598 sector_t sector, int data_size)
1599{
1600 struct bio_vec *bvec;
1601 struct bio *bio;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001602 int dgs, err, i, expect;
Philipp Reisnera0638452011-01-19 14:31:32 +01001603 void *dig_in = mdev->tconn->int_dig_in;
1604 void *dig_vv = mdev->tconn->int_dig_vv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001605
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001606 dgs = 0;
1607 if (mdev->tconn->peer_integrity_tfm) {
1608 dgs = crypto_hash_digestsize(mdev->tconn->peer_integrity_tfm);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001609 err = drbd_recv_all_warn(mdev->tconn, dig_in, dgs);
1610 if (err)
1611 return err;
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001612 data_size -= dgs;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001613 }
1614
Philipp Reisnerb411b362009-09-25 16:07:19 -07001615 /* optimistically update recv_cnt. if receiving fails below,
1616 * we disconnect anyways, and counters will be reset. */
1617 mdev->recv_cnt += data_size>>9;
1618
1619 bio = req->master_bio;
1620 D_ASSERT(sector == bio->bi_sector);
1621
1622 bio_for_each_segment(bvec, bio, i) {
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001623 void *mapped = kmap(bvec->bv_page) + bvec->bv_offset;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001624 expect = min_t(int, data_size, bvec->bv_len);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001625 err = drbd_recv_all_warn(mdev->tconn, mapped, expect);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001626 kunmap(bvec->bv_page);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001627 if (err)
1628 return err;
1629 data_size -= expect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001630 }
1631
1632 if (dgs) {
Andreas Gruenbacher5b614ab2011-04-27 21:00:12 +02001633 drbd_csum_bio(mdev, mdev->tconn->peer_integrity_tfm, bio, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001634 if (memcmp(dig_in, dig_vv, dgs)) {
1635 dev_err(DEV, "Digest integrity check FAILED. Broken NICs?\n");
Andreas Gruenbacher28284ce2011-03-16 17:54:02 +01001636 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001637 }
1638 }
1639
1640 D_ASSERT(data_size == 0);
Andreas Gruenbacher28284ce2011-03-16 17:54:02 +01001641 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001642}
1643
Andreas Gruenbachera990be42011-04-06 17:56:48 +02001644/*
1645 * e_end_resync_block() is called in asender context via
1646 * drbd_finish_peer_reqs().
1647 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001648static int e_end_resync_block(struct drbd_work *w, int unused)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001649{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001650 struct drbd_peer_request *peer_req =
1651 container_of(w, struct drbd_peer_request, w);
Philipp Reisner00d56942011-02-09 18:09:48 +01001652 struct drbd_conf *mdev = w->mdev;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001653 sector_t sector = peer_req->i.sector;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001654 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001655
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001656 D_ASSERT(drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001657
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001658 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
1659 drbd_set_in_sync(mdev, sector, peer_req->i.size);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001660 err = drbd_send_ack(mdev, P_RS_WRITE_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001661 } else {
1662 /* Record failure to sync */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001663 drbd_rs_failed_io(mdev, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001664
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001665 err = drbd_send_ack(mdev, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001666 }
1667 dec_unacked(mdev);
1668
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001669 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001670}
1671
1672static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_size) __releases(local)
1673{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001674 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001675
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001676 peer_req = read_in_block(mdev, ID_SYNCER, sector, data_size);
1677 if (!peer_req)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001678 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001679
1680 dec_rs_pending(mdev);
1681
Philipp Reisnerb411b362009-09-25 16:07:19 -07001682 inc_unacked(mdev);
1683 /* corresponding dec_unacked() in e_end_resync_block()
1684 * respective _drbd_clear_done_ee */
1685
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001686 peer_req->w.cb = e_end_resync_block;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001687
Philipp Reisner87eeee42011-01-19 14:16:30 +01001688 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001689 list_add(&peer_req->w.list, &mdev->sync_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001690 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001691
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001692 atomic_add(data_size >> 9, &mdev->rs_sect_ev);
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001693 if (drbd_submit_peer_request(mdev, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0)
Andreas Gruenbachere1c1b0f2011-03-16 17:58:27 +01001694 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001695
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001696 /* don't care for the reason here */
1697 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01001698 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001699 list_del(&peer_req->w.list);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001700 spin_unlock_irq(&mdev->tconn->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02001701
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +02001702 drbd_free_peer_req(mdev, peer_req);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001703fail:
1704 put_ldev(mdev);
Andreas Gruenbachere1c1b0f2011-03-16 17:58:27 +01001705 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001706}
1707
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001708static struct drbd_request *
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001709find_request(struct drbd_conf *mdev, struct rb_root *root, u64 id,
1710 sector_t sector, bool missing_ok, const char *func)
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001711{
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001712 struct drbd_request *req;
1713
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001714 /* Request object according to our peer */
1715 req = (struct drbd_request *)(unsigned long)id;
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001716 if (drbd_contains_interval(root, sector, &req->i) && req->i.local)
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001717 return req;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001718 if (!missing_ok) {
Andreas Gruenbacher5af172e2011-07-15 09:43:23 +02001719 dev_err(DEV, "%s: failed to find request 0x%lx, sector %llus\n", func,
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001720 (unsigned long)id, (unsigned long long)sector);
1721 }
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001722 return NULL;
1723}
1724
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001725static int receive_DataReply(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001726{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001727 struct drbd_conf *mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001728 struct drbd_request *req;
1729 sector_t sector;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001730 int err;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001731 struct p_data *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001732
1733 mdev = vnr_to_mdev(tconn, pi->vnr);
1734 if (!mdev)
1735 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001736
1737 sector = be64_to_cpu(p->sector);
1738
Philipp Reisner87eeee42011-01-19 14:16:30 +01001739 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001740 req = find_request(mdev, &mdev->read_requests, p->block_id, sector, false, __func__);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001741 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001742 if (unlikely(!req))
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001743 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001744
Bart Van Assche24c48302011-05-21 18:32:29 +02001745 /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid
Philipp Reisnerb411b362009-09-25 16:07:19 -07001746 * special casing it there for the various failure cases.
1747 * still no race with drbd_fail_pending_reads */
Andreas Gruenbachere2857212011-03-25 00:57:38 +01001748 err = recv_dless_read(mdev, req, sector, pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001749 if (!err)
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001750 req_mod(req, DATA_RECEIVED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001751 /* else: nothing. handled from drbd_disconnect...
1752 * I don't think we may complete this just yet
1753 * in case we are "on-disconnect: freeze" */
1754
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001755 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001756}
1757
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001758static int receive_RSDataReply(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001759{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001760 struct drbd_conf *mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001761 sector_t sector;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001762 int err;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001763 struct p_data *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001764
1765 mdev = vnr_to_mdev(tconn, pi->vnr);
1766 if (!mdev)
1767 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001768
1769 sector = be64_to_cpu(p->sector);
1770 D_ASSERT(p->block_id == ID_SYNCER);
1771
1772 if (get_ldev(mdev)) {
1773 /* data is submitted to disk within recv_resync_read.
1774 * corresponding put_ldev done below on error,
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001775 * or in drbd_peer_request_endio. */
Andreas Gruenbachere2857212011-03-25 00:57:38 +01001776 err = recv_resync_read(mdev, sector, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001777 } else {
1778 if (__ratelimit(&drbd_ratelimit_state))
1779 dev_err(DEV, "Can not write resync data to local disk.\n");
1780
Andreas Gruenbachere2857212011-03-25 00:57:38 +01001781 err = drbd_drain_block(mdev, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001782
Andreas Gruenbachere2857212011-03-25 00:57:38 +01001783 drbd_send_ack_dp(mdev, P_NEG_ACK, p, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001784 }
1785
Andreas Gruenbachere2857212011-03-25 00:57:38 +01001786 atomic_add(pi->size >> 9, &mdev->rs_sect_in);
Philipp Reisner778f2712010-07-06 11:14:00 +02001787
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001788 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001789}
1790
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001791static void restart_conflicting_writes(struct drbd_conf *mdev,
1792 sector_t sector, int size)
1793{
1794 struct drbd_interval *i;
1795 struct drbd_request *req;
1796
1797 drbd_for_each_overlap(i, &mdev->write_requests, sector, size) {
1798 if (!i->local)
1799 continue;
1800 req = container_of(i, struct drbd_request, i);
1801 if (req->rq_state & RQ_LOCAL_PENDING ||
1802 !(req->rq_state & RQ_POSTPONED))
1803 continue;
Lars Ellenberg2312f0b32011-11-24 10:36:25 +01001804 /* as it is RQ_POSTPONED, this will cause it to
1805 * be queued on the retry workqueue. */
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02001806 __req_mod(req, CONFLICT_RESOLVED, NULL);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001807 }
1808}
1809
Andreas Gruenbachera990be42011-04-06 17:56:48 +02001810/*
1811 * e_end_block() is called in asender context via drbd_finish_peer_reqs().
Philipp Reisnerb411b362009-09-25 16:07:19 -07001812 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001813static int e_end_block(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001814{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001815 struct drbd_peer_request *peer_req =
1816 container_of(w, struct drbd_peer_request, w);
Philipp Reisner00d56942011-02-09 18:09:48 +01001817 struct drbd_conf *mdev = w->mdev;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001818 sector_t sector = peer_req->i.sector;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001819 int err = 0, pcmd;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001820
Philipp Reisner303d1442011-04-13 16:24:47 -07001821 if (peer_req->flags & EE_SEND_WRITE_ACK) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001822 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001823 pcmd = (mdev->state.conn >= C_SYNC_SOURCE &&
1824 mdev->state.conn <= C_PAUSED_SYNC_T &&
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001825 peer_req->flags & EE_MAY_SET_IN_SYNC) ?
Philipp Reisnerb411b362009-09-25 16:07:19 -07001826 P_RS_WRITE_ACK : P_WRITE_ACK;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001827 err = drbd_send_ack(mdev, pcmd, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001828 if (pcmd == P_RS_WRITE_ACK)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001829 drbd_set_in_sync(mdev, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001830 } else {
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001831 err = drbd_send_ack(mdev, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001832 /* we expect it to be marked out of sync anyways...
1833 * maybe assert this? */
1834 }
1835 dec_unacked(mdev);
1836 }
1837 /* we delete from the conflict detection hash _after_ we sent out the
1838 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */
Philipp Reisner302bdea2011-04-21 11:36:49 +02001839 if (peer_req->flags & EE_IN_INTERVAL_TREE) {
Philipp Reisner87eeee42011-01-19 14:16:30 +01001840 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001841 D_ASSERT(!drbd_interval_empty(&peer_req->i));
1842 drbd_remove_epoch_entry_interval(mdev, peer_req);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001843 if (peer_req->flags & EE_RESTART_REQUESTS)
1844 restart_conflicting_writes(mdev, sector, peer_req->i.size);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001845 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +01001846 } else
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001847 D_ASSERT(drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001848
Philipp Reisner1e9dd292011-11-10 15:14:53 +01001849 drbd_may_finish_epoch(mdev->tconn, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001850
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001851 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001852}
1853
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001854static int e_send_ack(struct drbd_work *w, enum drbd_packet ack)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001855{
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001856 struct drbd_conf *mdev = w->mdev;
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001857 struct drbd_peer_request *peer_req =
1858 container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001859 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001860
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001861 err = drbd_send_ack(mdev, ack, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001862 dec_unacked(mdev);
1863
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001864 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001865}
1866
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02001867static int e_send_superseded(struct drbd_work *w, int unused)
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001868{
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02001869 return e_send_ack(w, P_SUPERSEDED);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001870}
1871
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001872static int e_send_retry_write(struct drbd_work *w, int unused)
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001873{
1874 struct drbd_tconn *tconn = w->mdev->tconn;
1875
1876 return e_send_ack(w, tconn->agreed_pro_version >= 100 ?
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02001877 P_RETRY_WRITE : P_SUPERSEDED);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001878}
1879
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001880static bool seq_greater(u32 a, u32 b)
1881{
1882 /*
1883 * We assume 32-bit wrap-around here.
1884 * For 24-bit wrap-around, we would have to shift:
1885 * a <<= 8; b <<= 8;
1886 */
1887 return (s32)a - (s32)b > 0;
1888}
1889
1890static u32 seq_max(u32 a, u32 b)
1891{
1892 return seq_greater(a, b) ? a : b;
1893}
1894
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001895static bool need_peer_seq(struct drbd_conf *mdev)
1896{
1897 struct drbd_tconn *tconn = mdev->tconn;
Philipp Reisner302bdea2011-04-21 11:36:49 +02001898 int tp;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001899
1900 /*
1901 * We only need to keep track of the last packet_seq number of our peer
Lars Ellenberg427c0432012-08-01 12:43:01 +02001902 * if we are in dual-primary mode and we have the resolve-conflicts flag set; see
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001903 * handle_write_conflicts().
1904 */
Philipp Reisner302bdea2011-04-21 11:36:49 +02001905
1906 rcu_read_lock();
1907 tp = rcu_dereference(mdev->tconn->net_conf)->two_primaries;
1908 rcu_read_unlock();
1909
Lars Ellenberg427c0432012-08-01 12:43:01 +02001910 return tp && test_bit(RESOLVE_CONFLICTS, &tconn->flags);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001911}
1912
Andreas Gruenbacher43ae0772011-02-03 18:42:08 +01001913static void update_peer_seq(struct drbd_conf *mdev, unsigned int peer_seq)
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001914{
Lars Ellenberg3c13b682011-02-23 16:10:01 +01001915 unsigned int newest_peer_seq;
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001916
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001917 if (need_peer_seq(mdev)) {
1918 spin_lock(&mdev->peer_seq_lock);
Lars Ellenberg3c13b682011-02-23 16:10:01 +01001919 newest_peer_seq = seq_max(mdev->peer_seq, peer_seq);
1920 mdev->peer_seq = newest_peer_seq;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001921 spin_unlock(&mdev->peer_seq_lock);
Lars Ellenberg3c13b682011-02-23 16:10:01 +01001922 /* wake up only if we actually changed mdev->peer_seq */
1923 if (peer_seq == newest_peer_seq)
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001924 wake_up(&mdev->seq_wait);
1925 }
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001926}
1927
Lars Ellenbergd93f6302012-03-26 15:49:13 +02001928static inline int overlaps(sector_t s1, int l1, sector_t s2, int l2)
1929{
1930 return !((s1 + (l1>>9) <= s2) || (s1 >= s2 + (l2>>9)));
1931}
1932
1933/* maybe change sync_ee into interval trees as well? */
Philipp Reisner3ea35df2012-04-06 12:13:18 +02001934static bool overlapping_resync_write(struct drbd_conf *mdev, struct drbd_peer_request *peer_req)
Lars Ellenbergd93f6302012-03-26 15:49:13 +02001935{
1936 struct drbd_peer_request *rs_req;
1937 bool rv = 0;
1938
1939 spin_lock_irq(&mdev->tconn->req_lock);
1940 list_for_each_entry(rs_req, &mdev->sync_ee, w.list) {
1941 if (overlaps(peer_req->i.sector, peer_req->i.size,
1942 rs_req->i.sector, rs_req->i.size)) {
1943 rv = 1;
1944 break;
1945 }
1946 }
1947 spin_unlock_irq(&mdev->tconn->req_lock);
1948
Lars Ellenbergd93f6302012-03-26 15:49:13 +02001949 return rv;
1950}
1951
Philipp Reisnerb411b362009-09-25 16:07:19 -07001952/* Called from receive_Data.
1953 * Synchronize packets on sock with packets on msock.
1954 *
1955 * This is here so even when a P_DATA packet traveling via sock overtook an Ack
1956 * packet traveling on msock, they are still processed in the order they have
1957 * been sent.
1958 *
1959 * Note: we don't care for Ack packets overtaking P_DATA packets.
1960 *
1961 * In case packet_seq is larger than mdev->peer_seq number, there are
1962 * outstanding packets on the msock. We wait for them to arrive.
1963 * In case we are the logically next packet, we update mdev->peer_seq
1964 * ourselves. Correctly handles 32bit wrap around.
1965 *
1966 * Assume we have a 10 GBit connection, that is about 1<<30 byte per second,
1967 * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds
1968 * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have
1969 * 1<<9 == 512 seconds aka ages for the 32bit wrap around...
1970 *
1971 * returns 0 if we may process the packet,
1972 * -ERESTARTSYS if we were interrupted (by disconnect signal). */
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001973static int wait_for_and_update_peer_seq(struct drbd_conf *mdev, const u32 peer_seq)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001974{
1975 DEFINE_WAIT(wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001976 long timeout;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001977 int ret;
1978
1979 if (!need_peer_seq(mdev))
1980 return 0;
1981
Philipp Reisnerb411b362009-09-25 16:07:19 -07001982 spin_lock(&mdev->peer_seq_lock);
1983 for (;;) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001984 if (!seq_greater(peer_seq - 1, mdev->peer_seq)) {
1985 mdev->peer_seq = seq_max(mdev->peer_seq, peer_seq);
1986 ret = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001987 break;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001988 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001989 if (signal_pending(current)) {
1990 ret = -ERESTARTSYS;
1991 break;
1992 }
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001993 prepare_to_wait(&mdev->seq_wait, &wait, TASK_INTERRUPTIBLE);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001994 spin_unlock(&mdev->peer_seq_lock);
Philipp Reisner44ed1672011-04-19 17:10:19 +02001995 rcu_read_lock();
1996 timeout = rcu_dereference(mdev->tconn->net_conf)->ping_timeo*HZ/10;
1997 rcu_read_unlock();
Andreas Gruenbacher71b1c1e2011-03-01 15:40:43 +01001998 timeout = schedule_timeout(timeout);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001999 spin_lock(&mdev->peer_seq_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002000 if (!timeout) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002001 ret = -ETIMEDOUT;
Andreas Gruenbacher71b1c1e2011-03-01 15:40:43 +01002002 dev_err(DEV, "Timed out waiting for missing ack packets; disconnecting\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002003 break;
2004 }
2005 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002006 spin_unlock(&mdev->peer_seq_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002007 finish_wait(&mdev->seq_wait, &wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002008 return ret;
2009}
2010
Lars Ellenberg688593c2010-11-17 22:25:03 +01002011/* see also bio_flags_to_wire()
2012 * DRBD_REQ_*, because we need to semantically map the flags to data packet
2013 * flags and back. We may replicate to other kernel versions. */
2014static unsigned long wire_flags_to_bio(struct drbd_conf *mdev, u32 dpf)
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02002015{
Lars Ellenberg688593c2010-11-17 22:25:03 +01002016 return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
2017 (dpf & DP_FUA ? REQ_FUA : 0) |
2018 (dpf & DP_FLUSH ? REQ_FLUSH : 0) |
2019 (dpf & DP_DISCARD ? REQ_DISCARD : 0);
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02002020}
2021
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002022static void fail_postponed_requests(struct drbd_conf *mdev, sector_t sector,
2023 unsigned int size)
2024{
2025 struct drbd_interval *i;
2026
2027 repeat:
2028 drbd_for_each_overlap(i, &mdev->write_requests, sector, size) {
2029 struct drbd_request *req;
2030 struct bio_and_error m;
2031
2032 if (!i->local)
2033 continue;
2034 req = container_of(i, struct drbd_request, i);
2035 if (!(req->rq_state & RQ_POSTPONED))
2036 continue;
2037 req->rq_state &= ~RQ_POSTPONED;
2038 __req_mod(req, NEG_ACKED, &m);
2039 spin_unlock_irq(&mdev->tconn->req_lock);
2040 if (m.bio)
2041 complete_master_bio(mdev, &m);
2042 spin_lock_irq(&mdev->tconn->req_lock);
2043 goto repeat;
2044 }
2045}
2046
2047static int handle_write_conflicts(struct drbd_conf *mdev,
2048 struct drbd_peer_request *peer_req)
2049{
2050 struct drbd_tconn *tconn = mdev->tconn;
Lars Ellenberg427c0432012-08-01 12:43:01 +02002051 bool resolve_conflicts = test_bit(RESOLVE_CONFLICTS, &tconn->flags);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002052 sector_t sector = peer_req->i.sector;
2053 const unsigned int size = peer_req->i.size;
2054 struct drbd_interval *i;
2055 bool equal;
2056 int err;
2057
2058 /*
2059 * Inserting the peer request into the write_requests tree will prevent
2060 * new conflicting local requests from being added.
2061 */
2062 drbd_insert_interval(&mdev->write_requests, &peer_req->i);
2063
2064 repeat:
2065 drbd_for_each_overlap(i, &mdev->write_requests, sector, size) {
2066 if (i == &peer_req->i)
2067 continue;
2068
2069 if (!i->local) {
2070 /*
2071 * Our peer has sent a conflicting remote request; this
2072 * should not happen in a two-node setup. Wait for the
2073 * earlier peer request to complete.
2074 */
2075 err = drbd_wait_misc(mdev, i);
2076 if (err)
2077 goto out;
2078 goto repeat;
2079 }
2080
2081 equal = i->sector == sector && i->size == size;
2082 if (resolve_conflicts) {
2083 /*
2084 * If the peer request is fully contained within the
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002085 * overlapping request, it can be considered overwritten
2086 * and thus superseded; otherwise, it will be retried
2087 * once all overlapping requests have completed.
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002088 */
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002089 bool superseded = i->sector <= sector && i->sector +
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002090 (i->size >> 9) >= sector + (size >> 9);
2091
2092 if (!equal)
2093 dev_alert(DEV, "Concurrent writes detected: "
2094 "local=%llus +%u, remote=%llus +%u, "
2095 "assuming %s came first\n",
2096 (unsigned long long)i->sector, i->size,
2097 (unsigned long long)sector, size,
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002098 superseded ? "local" : "remote");
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002099
2100 inc_unacked(mdev);
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002101 peer_req->w.cb = superseded ? e_send_superseded :
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002102 e_send_retry_write;
2103 list_add_tail(&peer_req->w.list, &mdev->done_ee);
2104 wake_asender(mdev->tconn);
2105
2106 err = -ENOENT;
2107 goto out;
2108 } else {
2109 struct drbd_request *req =
2110 container_of(i, struct drbd_request, i);
2111
2112 if (!equal)
2113 dev_alert(DEV, "Concurrent writes detected: "
2114 "local=%llus +%u, remote=%llus +%u\n",
2115 (unsigned long long)i->sector, i->size,
2116 (unsigned long long)sector, size);
2117
2118 if (req->rq_state & RQ_LOCAL_PENDING ||
2119 !(req->rq_state & RQ_POSTPONED)) {
2120 /*
2121 * Wait for the node with the discard flag to
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002122 * decide if this request has been superseded
2123 * or needs to be retried.
2124 * Requests that have been superseded will
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002125 * disappear from the write_requests tree.
2126 *
2127 * In addition, wait for the conflicting
2128 * request to finish locally before submitting
2129 * the conflicting peer request.
2130 */
2131 err = drbd_wait_misc(mdev, &req->i);
2132 if (err) {
2133 _conn_request_state(mdev->tconn,
2134 NS(conn, C_TIMEOUT),
2135 CS_HARD);
2136 fail_postponed_requests(mdev, sector, size);
2137 goto out;
2138 }
2139 goto repeat;
2140 }
2141 /*
2142 * Remember to restart the conflicting requests after
2143 * the new peer request has completed.
2144 */
2145 peer_req->flags |= EE_RESTART_REQUESTS;
2146 }
2147 }
2148 err = 0;
2149
2150 out:
2151 if (err)
2152 drbd_remove_epoch_entry_interval(mdev, peer_req);
2153 return err;
2154}
2155
Philipp Reisnerb411b362009-09-25 16:07:19 -07002156/* mirrored write */
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002157static int receive_Data(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002158{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002159 struct drbd_conf *mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002160 sector_t sector;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002161 struct drbd_peer_request *peer_req;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02002162 struct p_data *p = pi->data;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002163 u32 peer_seq = be32_to_cpu(p->seq_num);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002164 int rw = WRITE;
2165 u32 dp_flags;
Philipp Reisner302bdea2011-04-21 11:36:49 +02002166 int err, tp;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002167
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002168 mdev = vnr_to_mdev(tconn, pi->vnr);
2169 if (!mdev)
2170 return -EIO;
2171
Philipp Reisnerb411b362009-09-25 16:07:19 -07002172 if (!get_ldev(mdev)) {
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002173 int err2;
2174
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002175 err = wait_for_and_update_peer_seq(mdev, peer_seq);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002176 drbd_send_ack_dp(mdev, P_NEG_ACK, p, pi->size);
Philipp Reisner12038a32011-11-09 19:18:00 +01002177 atomic_inc(&tconn->current_epoch->epoch_size);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002178 err2 = drbd_drain_block(mdev, pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002179 if (!err)
2180 err = err2;
2181 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002182 }
2183
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01002184 /*
2185 * Corresponding put_ldev done either below (on various errors), or in
2186 * drbd_peer_request_endio, if we successfully submit the data at the
2187 * end of this function.
2188 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002189
2190 sector = be64_to_cpu(p->sector);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002191 peer_req = read_in_block(mdev, p->block_id, sector, pi->size);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002192 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002193 put_ldev(mdev);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002194 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002195 }
2196
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002197 peer_req->w.cb = e_end_block;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002198
Lars Ellenberg688593c2010-11-17 22:25:03 +01002199 dp_flags = be32_to_cpu(p->dp_flags);
2200 rw |= wire_flags_to_bio(mdev, dp_flags);
Lars Ellenberg81a35372012-07-30 09:00:54 +02002201 if (peer_req->pages == NULL) {
2202 D_ASSERT(peer_req->i.size == 0);
2203 D_ASSERT(dp_flags & DP_FLUSH);
2204 }
Lars Ellenberg688593c2010-11-17 22:25:03 +01002205
2206 if (dp_flags & DP_MAY_SET_IN_SYNC)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002207 peer_req->flags |= EE_MAY_SET_IN_SYNC;
Lars Ellenberg688593c2010-11-17 22:25:03 +01002208
Philipp Reisner12038a32011-11-09 19:18:00 +01002209 spin_lock(&tconn->epoch_lock);
2210 peer_req->epoch = tconn->current_epoch;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002211 atomic_inc(&peer_req->epoch->epoch_size);
2212 atomic_inc(&peer_req->epoch->active);
Philipp Reisner12038a32011-11-09 19:18:00 +01002213 spin_unlock(&tconn->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002214
Philipp Reisner302bdea2011-04-21 11:36:49 +02002215 rcu_read_lock();
2216 tp = rcu_dereference(mdev->tconn->net_conf)->two_primaries;
2217 rcu_read_unlock();
2218 if (tp) {
2219 peer_req->flags |= EE_IN_INTERVAL_TREE;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002220 err = wait_for_and_update_peer_seq(mdev, peer_seq);
2221 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002222 goto out_interrupted;
Philipp Reisner87eeee42011-01-19 14:16:30 +01002223 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002224 err = handle_write_conflicts(mdev, peer_req);
2225 if (err) {
2226 spin_unlock_irq(&mdev->tconn->req_lock);
2227 if (err == -ENOENT) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002228 put_ldev(mdev);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002229 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002230 }
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002231 goto out_interrupted;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002232 }
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002233 } else
2234 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002235 list_add(&peer_req->w.list, &mdev->active_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002236 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002237
Lars Ellenbergd93f6302012-03-26 15:49:13 +02002238 if (mdev->state.conn == C_SYNC_TARGET)
Philipp Reisner3ea35df2012-04-06 12:13:18 +02002239 wait_event(mdev->ee_wait, !overlapping_resync_write(mdev, peer_req));
Lars Ellenbergd93f6302012-03-26 15:49:13 +02002240
Philipp Reisner303d1442011-04-13 16:24:47 -07002241 if (mdev->tconn->agreed_pro_version < 100) {
Philipp Reisner44ed1672011-04-19 17:10:19 +02002242 rcu_read_lock();
2243 switch (rcu_dereference(mdev->tconn->net_conf)->wire_protocol) {
Philipp Reisner303d1442011-04-13 16:24:47 -07002244 case DRBD_PROT_C:
2245 dp_flags |= DP_SEND_WRITE_ACK;
2246 break;
2247 case DRBD_PROT_B:
2248 dp_flags |= DP_SEND_RECEIVE_ACK;
2249 break;
2250 }
Philipp Reisner44ed1672011-04-19 17:10:19 +02002251 rcu_read_unlock();
Philipp Reisner303d1442011-04-13 16:24:47 -07002252 }
2253
2254 if (dp_flags & DP_SEND_WRITE_ACK) {
2255 peer_req->flags |= EE_SEND_WRITE_ACK;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002256 inc_unacked(mdev);
2257 /* corresponding dec_unacked() in e_end_block()
2258 * respective _drbd_clear_done_ee */
Philipp Reisner303d1442011-04-13 16:24:47 -07002259 }
2260
2261 if (dp_flags & DP_SEND_RECEIVE_ACK) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002262 /* I really don't like it that the receiver thread
2263 * sends on the msock, but anyways */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002264 drbd_send_ack(mdev, P_RECV_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002265 }
2266
Lars Ellenberg6719fb02010-10-18 23:04:07 +02002267 if (mdev->state.pdsk < D_INCONSISTENT) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002268 /* In case we have the only disk of the cluster, */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002269 drbd_set_out_of_sync(mdev, peer_req->i.sector, peer_req->i.size);
2270 peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
2271 peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
Lars Ellenberg181286a2011-03-31 15:18:56 +02002272 drbd_al_begin_io(mdev, &peer_req->i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002273 }
2274
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002275 err = drbd_submit_peer_request(mdev, peer_req, rw, DRBD_FAULT_DT_WR);
2276 if (!err)
2277 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002278
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002279 /* don't care for the reason here */
2280 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01002281 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002282 list_del(&peer_req->w.list);
2283 drbd_remove_epoch_entry_interval(mdev, peer_req);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002284 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002285 if (peer_req->flags & EE_CALL_AL_COMPLETE_IO)
Lars Ellenberg181286a2011-03-31 15:18:56 +02002286 drbd_al_complete_io(mdev, &peer_req->i);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002287
Philipp Reisnerb411b362009-09-25 16:07:19 -07002288out_interrupted:
Philipp Reisner1e9dd292011-11-10 15:14:53 +01002289 drbd_may_finish_epoch(tconn, peer_req->epoch, EV_PUT + EV_CLEANUP);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002290 put_ldev(mdev);
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +02002291 drbd_free_peer_req(mdev, peer_req);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002292 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002293}
2294
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002295/* We may throttle resync, if the lower device seems to be busy,
2296 * and current sync rate is above c_min_rate.
2297 *
2298 * To decide whether or not the lower device is busy, we use a scheme similar
2299 * to MD RAID is_mddev_idle(): if the partition stats reveal "significant"
2300 * (more than 64 sectors) of activity we cannot account for with our own resync
2301 * activity, it obviously is "busy".
2302 *
2303 * The current sync rate used here uses only the most recent two step marks,
2304 * to have a short time average so we can react faster.
2305 */
Philipp Reisnere3555d82010-11-07 15:56:29 +01002306int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002307{
2308 struct gendisk *disk = mdev->ldev->backing_bdev->bd_contains->bd_disk;
2309 unsigned long db, dt, dbdt;
Philipp Reisnere3555d82010-11-07 15:56:29 +01002310 struct lc_element *tmp;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002311 int curr_events;
2312 int throttle = 0;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002313 unsigned int c_min_rate;
2314
2315 rcu_read_lock();
2316 c_min_rate = rcu_dereference(mdev->ldev->disk_conf)->c_min_rate;
2317 rcu_read_unlock();
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002318
2319 /* feature disabled? */
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002320 if (c_min_rate == 0)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002321 return 0;
2322
Philipp Reisnere3555d82010-11-07 15:56:29 +01002323 spin_lock_irq(&mdev->al_lock);
2324 tmp = lc_find(mdev->resync, BM_SECT_TO_EXT(sector));
2325 if (tmp) {
2326 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
2327 if (test_bit(BME_PRIORITY, &bm_ext->flags)) {
2328 spin_unlock_irq(&mdev->al_lock);
2329 return 0;
2330 }
2331 /* Do not slow down if app IO is already waiting for this extent */
2332 }
2333 spin_unlock_irq(&mdev->al_lock);
2334
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002335 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
2336 (int)part_stat_read(&disk->part0, sectors[1]) -
2337 atomic_read(&mdev->rs_sect_ev);
Philipp Reisnere3555d82010-11-07 15:56:29 +01002338
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002339 if (!mdev->rs_last_events || curr_events - mdev->rs_last_events > 64) {
2340 unsigned long rs_left;
2341 int i;
2342
2343 mdev->rs_last_events = curr_events;
2344
2345 /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
2346 * approx. */
Lars Ellenberg2649f082010-11-05 10:05:47 +01002347 i = (mdev->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
2348
2349 if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T)
2350 rs_left = mdev->ov_left;
2351 else
2352 rs_left = drbd_bm_total_weight(mdev) - mdev->rs_failed;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002353
2354 dt = ((long)jiffies - (long)mdev->rs_mark_time[i]) / HZ;
2355 if (!dt)
2356 dt++;
2357 db = mdev->rs_mark_left[i] - rs_left;
2358 dbdt = Bit2KB(db/dt);
2359
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002360 if (dbdt > c_min_rate)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002361 throttle = 1;
2362 }
2363 return throttle;
2364}
2365
2366
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002367static int receive_DataRequest(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002368{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002369 struct drbd_conf *mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002370 sector_t sector;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002371 sector_t capacity;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002372 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002373 struct digest_info *di = NULL;
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002374 int size, verb;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002375 unsigned int fault_type;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02002376 struct p_block_req *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002377
2378 mdev = vnr_to_mdev(tconn, pi->vnr);
2379 if (!mdev)
2380 return -EIO;
2381 capacity = drbd_get_capacity(mdev->this_bdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002382
2383 sector = be64_to_cpu(p->sector);
2384 size = be32_to_cpu(p->blksize);
2385
Andreas Gruenbacherc670a392011-02-21 12:41:39 +01002386 if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002387 dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2388 (unsigned long long)sector, size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002389 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002390 }
2391 if (sector + (size>>9) > capacity) {
2392 dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2393 (unsigned long long)sector, size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002394 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002395 }
2396
2397 if (!get_ldev_if_state(mdev, D_UP_TO_DATE)) {
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002398 verb = 1;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002399 switch (pi->cmd) {
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002400 case P_DATA_REQUEST:
2401 drbd_send_ack_rp(mdev, P_NEG_DREPLY, p);
2402 break;
2403 case P_RS_DATA_REQUEST:
2404 case P_CSUM_RS_REQUEST:
2405 case P_OV_REQUEST:
2406 drbd_send_ack_rp(mdev, P_NEG_RS_DREPLY , p);
2407 break;
2408 case P_OV_REPLY:
2409 verb = 0;
2410 dec_rs_pending(mdev);
2411 drbd_send_ack_ex(mdev, P_OV_RESULT, sector, size, ID_IN_SYNC);
2412 break;
2413 default:
Andreas Gruenbacher49ba9b12011-03-25 00:35:45 +01002414 BUG();
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002415 }
2416 if (verb && __ratelimit(&drbd_ratelimit_state))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002417 dev_err(DEV, "Can not satisfy peer's read request, "
2418 "no local data.\n");
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002419
Lars Ellenberga821cc42010-09-06 12:31:37 +02002420 /* drain possibly payload */
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002421 return drbd_drain_block(mdev, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002422 }
2423
2424 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
2425 * "criss-cross" setup, that might cause write-out on some other DRBD,
2426 * which in turn might block on the other node at this very place. */
Andreas Gruenbacher0db55362011-04-06 16:09:15 +02002427 peer_req = drbd_alloc_peer_req(mdev, p->block_id, sector, size, GFP_NOIO);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002428 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002429 put_ldev(mdev);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002430 return -ENOMEM;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002431 }
2432
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002433 switch (pi->cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002434 case P_DATA_REQUEST:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002435 peer_req->w.cb = w_e_end_data_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002436 fault_type = DRBD_FAULT_DT_RD;
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002437 /* application IO, don't drbd_rs_begin_io */
2438 goto submit;
2439
Philipp Reisnerb411b362009-09-25 16:07:19 -07002440 case P_RS_DATA_REQUEST:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002441 peer_req->w.cb = w_e_end_rsdata_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002442 fault_type = DRBD_FAULT_RS_RD;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002443 /* used in the sector offset progress display */
2444 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002445 break;
2446
2447 case P_OV_REPLY:
2448 case P_CSUM_RS_REQUEST:
2449 fault_type = DRBD_FAULT_RS_RD;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002450 di = kmalloc(sizeof(*di) + pi->size, GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002451 if (!di)
2452 goto out_free_e;
2453
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002454 di->digest_size = pi->size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002455 di->digest = (((char *)di)+sizeof(struct digest_info));
2456
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002457 peer_req->digest = di;
2458 peer_req->flags |= EE_HAS_DIGEST;
Lars Ellenbergc36c3ce2010-08-11 20:42:55 +02002459
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002460 if (drbd_recv_all(mdev->tconn, di->digest, pi->size))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002461 goto out_free_e;
2462
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002463 if (pi->cmd == P_CSUM_RS_REQUEST) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002464 D_ASSERT(mdev->tconn->agreed_pro_version >= 89);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002465 peer_req->w.cb = w_e_end_csum_rs_req;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002466 /* used in the sector offset progress display */
2467 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002468 } else if (pi->cmd == P_OV_REPLY) {
Lars Ellenberg2649f082010-11-05 10:05:47 +01002469 /* track progress, we may need to throttle */
2470 atomic_add(size >> 9, &mdev->rs_sect_in);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002471 peer_req->w.cb = w_e_end_ov_reply;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002472 dec_rs_pending(mdev);
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002473 /* drbd_rs_begin_io done when we sent this request,
2474 * but accounting still needs to be done. */
2475 goto submit_for_resync;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002476 }
2477 break;
2478
2479 case P_OV_REQUEST:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002480 if (mdev->ov_start_sector == ~(sector_t)0 &&
Philipp Reisner31890f42011-01-19 14:12:51 +01002481 mdev->tconn->agreed_pro_version >= 90) {
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002482 unsigned long now = jiffies;
2483 int i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002484 mdev->ov_start_sector = sector;
2485 mdev->ov_position = sector;
Lars Ellenberg30b743a2010-11-05 09:39:06 +01002486 mdev->ov_left = drbd_bm_bits(mdev) - BM_SECT_TO_BIT(sector);
2487 mdev->rs_total = mdev->ov_left;
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002488 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
2489 mdev->rs_mark_left[i] = mdev->ov_left;
2490 mdev->rs_mark_time[i] = now;
2491 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002492 dev_info(DEV, "Online Verify start sector: %llu\n",
2493 (unsigned long long)sector);
2494 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002495 peer_req->w.cb = w_e_end_ov_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002496 fault_type = DRBD_FAULT_RS_RD;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002497 break;
2498
Philipp Reisnerb411b362009-09-25 16:07:19 -07002499 default:
Andreas Gruenbacher49ba9b12011-03-25 00:35:45 +01002500 BUG();
Philipp Reisnerb411b362009-09-25 16:07:19 -07002501 }
2502
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002503 /* Throttle, drbd_rs_begin_io and submit should become asynchronous
2504 * wrt the receiver, but it is not as straightforward as it may seem.
2505 * Various places in the resync start and stop logic assume resync
2506 * requests are processed in order, requeuing this on the worker thread
2507 * introduces a bunch of new code for synchronization between threads.
2508 *
2509 * Unlimited throttling before drbd_rs_begin_io may stall the resync
2510 * "forever", throttling after drbd_rs_begin_io will lock that extent
2511 * for application writes for the same time. For now, just throttle
2512 * here, where the rest of the code expects the receiver to sleep for
2513 * a while, anyways.
2514 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002515
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002516 /* Throttle before drbd_rs_begin_io, as that locks out application IO;
2517 * this defers syncer requests for some time, before letting at least
2518 * on request through. The resync controller on the receiving side
2519 * will adapt to the incoming rate accordingly.
2520 *
2521 * We cannot throttle here if remote is Primary/SyncTarget:
2522 * we would also throttle its application reads.
2523 * In that case, throttling is done on the SyncTarget only.
2524 */
Philipp Reisnere3555d82010-11-07 15:56:29 +01002525 if (mdev->state.peer != R_PRIMARY && drbd_rs_should_slow_down(mdev, sector))
2526 schedule_timeout_uninterruptible(HZ/10);
2527 if (drbd_rs_begin_io(mdev, sector))
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002528 goto out_free_e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002529
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002530submit_for_resync:
2531 atomic_add(size >> 9, &mdev->rs_sect_ev);
2532
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002533submit:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002534 inc_unacked(mdev);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002535 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002536 list_add_tail(&peer_req->w.list, &mdev->read_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002537 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002538
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01002539 if (drbd_submit_peer_request(mdev, peer_req, READ, fault_type) == 0)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002540 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002541
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002542 /* don't care for the reason here */
2543 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01002544 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002545 list_del(&peer_req->w.list);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002546 spin_unlock_irq(&mdev->tconn->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002547 /* no drbd_rs_complete_io(), we are dropping the connection anyways */
2548
Philipp Reisnerb411b362009-09-25 16:07:19 -07002549out_free_e:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002550 put_ldev(mdev);
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +02002551 drbd_free_peer_req(mdev, peer_req);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002552 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002553}
2554
2555static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local)
2556{
2557 int self, peer, rv = -100;
2558 unsigned long ch_self, ch_peer;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002559 enum drbd_after_sb_p after_sb_0p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002560
2561 self = mdev->ldev->md.uuid[UI_BITMAP] & 1;
2562 peer = mdev->p_uuid[UI_BITMAP] & 1;
2563
2564 ch_peer = mdev->p_uuid[UI_SIZE];
2565 ch_self = mdev->comm_bm_set;
2566
Philipp Reisner44ed1672011-04-19 17:10:19 +02002567 rcu_read_lock();
2568 after_sb_0p = rcu_dereference(mdev->tconn->net_conf)->after_sb_0p;
2569 rcu_read_unlock();
2570 switch (after_sb_0p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002571 case ASB_CONSENSUS:
2572 case ASB_DISCARD_SECONDARY:
2573 case ASB_CALL_HELPER:
Philipp Reisner44ed1672011-04-19 17:10:19 +02002574 case ASB_VIOLENTLY:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002575 dev_err(DEV, "Configuration error.\n");
2576 break;
2577 case ASB_DISCONNECT:
2578 break;
2579 case ASB_DISCARD_YOUNGER_PRI:
2580 if (self == 0 && peer == 1) {
2581 rv = -1;
2582 break;
2583 }
2584 if (self == 1 && peer == 0) {
2585 rv = 1;
2586 break;
2587 }
2588 /* Else fall through to one of the other strategies... */
2589 case ASB_DISCARD_OLDER_PRI:
2590 if (self == 0 && peer == 1) {
2591 rv = 1;
2592 break;
2593 }
2594 if (self == 1 && peer == 0) {
2595 rv = -1;
2596 break;
2597 }
2598 /* Else fall through to one of the other strategies... */
Lars Ellenbergad19bf62009-10-14 09:36:49 +02002599 dev_warn(DEV, "Discard younger/older primary did not find a decision\n"
Philipp Reisnerb411b362009-09-25 16:07:19 -07002600 "Using discard-least-changes instead\n");
2601 case ASB_DISCARD_ZERO_CHG:
2602 if (ch_peer == 0 && ch_self == 0) {
Lars Ellenberg427c0432012-08-01 12:43:01 +02002603 rv = test_bit(RESOLVE_CONFLICTS, &mdev->tconn->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002604 ? -1 : 1;
2605 break;
2606 } else {
2607 if (ch_peer == 0) { rv = 1; break; }
2608 if (ch_self == 0) { rv = -1; break; }
2609 }
Philipp Reisner44ed1672011-04-19 17:10:19 +02002610 if (after_sb_0p == ASB_DISCARD_ZERO_CHG)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002611 break;
2612 case ASB_DISCARD_LEAST_CHG:
2613 if (ch_self < ch_peer)
2614 rv = -1;
2615 else if (ch_self > ch_peer)
2616 rv = 1;
2617 else /* ( ch_self == ch_peer ) */
2618 /* Well, then use something else. */
Lars Ellenberg427c0432012-08-01 12:43:01 +02002619 rv = test_bit(RESOLVE_CONFLICTS, &mdev->tconn->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002620 ? -1 : 1;
2621 break;
2622 case ASB_DISCARD_LOCAL:
2623 rv = -1;
2624 break;
2625 case ASB_DISCARD_REMOTE:
2626 rv = 1;
2627 }
2628
2629 return rv;
2630}
2631
2632static int drbd_asb_recover_1p(struct drbd_conf *mdev) __must_hold(local)
2633{
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002634 int hg, rv = -100;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002635 enum drbd_after_sb_p after_sb_1p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002636
Philipp Reisner44ed1672011-04-19 17:10:19 +02002637 rcu_read_lock();
2638 after_sb_1p = rcu_dereference(mdev->tconn->net_conf)->after_sb_1p;
2639 rcu_read_unlock();
2640 switch (after_sb_1p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002641 case ASB_DISCARD_YOUNGER_PRI:
2642 case ASB_DISCARD_OLDER_PRI:
2643 case ASB_DISCARD_LEAST_CHG:
2644 case ASB_DISCARD_LOCAL:
2645 case ASB_DISCARD_REMOTE:
Philipp Reisner44ed1672011-04-19 17:10:19 +02002646 case ASB_DISCARD_ZERO_CHG:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002647 dev_err(DEV, "Configuration error.\n");
2648 break;
2649 case ASB_DISCONNECT:
2650 break;
2651 case ASB_CONSENSUS:
2652 hg = drbd_asb_recover_0p(mdev);
2653 if (hg == -1 && mdev->state.role == R_SECONDARY)
2654 rv = hg;
2655 if (hg == 1 && mdev->state.role == R_PRIMARY)
2656 rv = hg;
2657 break;
2658 case ASB_VIOLENTLY:
2659 rv = drbd_asb_recover_0p(mdev);
2660 break;
2661 case ASB_DISCARD_SECONDARY:
2662 return mdev->state.role == R_PRIMARY ? 1 : -1;
2663 case ASB_CALL_HELPER:
2664 hg = drbd_asb_recover_0p(mdev);
2665 if (hg == -1 && mdev->state.role == R_PRIMARY) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002666 enum drbd_state_rv rv2;
2667
2668 drbd_set_role(mdev, R_SECONDARY, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002669 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2670 * we might be here in C_WF_REPORT_PARAMS which is transient.
2671 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002672 rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
2673 if (rv2 != SS_SUCCESS) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002674 drbd_khelper(mdev, "pri-lost-after-sb");
2675 } else {
2676 dev_warn(DEV, "Successfully gave up primary role.\n");
2677 rv = hg;
2678 }
2679 } else
2680 rv = hg;
2681 }
2682
2683 return rv;
2684}
2685
2686static int drbd_asb_recover_2p(struct drbd_conf *mdev) __must_hold(local)
2687{
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002688 int hg, rv = -100;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002689 enum drbd_after_sb_p after_sb_2p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002690
Philipp Reisner44ed1672011-04-19 17:10:19 +02002691 rcu_read_lock();
2692 after_sb_2p = rcu_dereference(mdev->tconn->net_conf)->after_sb_2p;
2693 rcu_read_unlock();
2694 switch (after_sb_2p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002695 case ASB_DISCARD_YOUNGER_PRI:
2696 case ASB_DISCARD_OLDER_PRI:
2697 case ASB_DISCARD_LEAST_CHG:
2698 case ASB_DISCARD_LOCAL:
2699 case ASB_DISCARD_REMOTE:
2700 case ASB_CONSENSUS:
2701 case ASB_DISCARD_SECONDARY:
Philipp Reisner44ed1672011-04-19 17:10:19 +02002702 case ASB_DISCARD_ZERO_CHG:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002703 dev_err(DEV, "Configuration error.\n");
2704 break;
2705 case ASB_VIOLENTLY:
2706 rv = drbd_asb_recover_0p(mdev);
2707 break;
2708 case ASB_DISCONNECT:
2709 break;
2710 case ASB_CALL_HELPER:
2711 hg = drbd_asb_recover_0p(mdev);
2712 if (hg == -1) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002713 enum drbd_state_rv rv2;
2714
Philipp Reisnerb411b362009-09-25 16:07:19 -07002715 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2716 * we might be here in C_WF_REPORT_PARAMS which is transient.
2717 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002718 rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
2719 if (rv2 != SS_SUCCESS) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002720 drbd_khelper(mdev, "pri-lost-after-sb");
2721 } else {
2722 dev_warn(DEV, "Successfully gave up primary role.\n");
2723 rv = hg;
2724 }
2725 } else
2726 rv = hg;
2727 }
2728
2729 return rv;
2730}
2731
2732static void drbd_uuid_dump(struct drbd_conf *mdev, char *text, u64 *uuid,
2733 u64 bits, u64 flags)
2734{
2735 if (!uuid) {
2736 dev_info(DEV, "%s uuid info vanished while I was looking!\n", text);
2737 return;
2738 }
2739 dev_info(DEV, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n",
2740 text,
2741 (unsigned long long)uuid[UI_CURRENT],
2742 (unsigned long long)uuid[UI_BITMAP],
2743 (unsigned long long)uuid[UI_HISTORY_START],
2744 (unsigned long long)uuid[UI_HISTORY_END],
2745 (unsigned long long)bits,
2746 (unsigned long long)flags);
2747}
2748
2749/*
2750 100 after split brain try auto recover
2751 2 C_SYNC_SOURCE set BitMap
2752 1 C_SYNC_SOURCE use BitMap
2753 0 no Sync
2754 -1 C_SYNC_TARGET use BitMap
2755 -2 C_SYNC_TARGET set BitMap
2756 -100 after split brain, disconnect
2757-1000 unrelated data
Philipp Reisner4a23f262011-01-11 17:42:17 +01002758-1091 requires proto 91
2759-1096 requires proto 96
Philipp Reisnerb411b362009-09-25 16:07:19 -07002760 */
2761static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(local)
2762{
2763 u64 self, peer;
2764 int i, j;
2765
2766 self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2767 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
2768
2769 *rule_nr = 10;
2770 if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED)
2771 return 0;
2772
2773 *rule_nr = 20;
2774 if ((self == UUID_JUST_CREATED || self == (u64)0) &&
2775 peer != UUID_JUST_CREATED)
2776 return -2;
2777
2778 *rule_nr = 30;
2779 if (self != UUID_JUST_CREATED &&
2780 (peer == UUID_JUST_CREATED || peer == (u64)0))
2781 return 2;
2782
2783 if (self == peer) {
2784 int rct, dc; /* roles at crash time */
2785
2786 if (mdev->p_uuid[UI_BITMAP] == (u64)0 && mdev->ldev->md.uuid[UI_BITMAP] != (u64)0) {
2787
Philipp Reisner31890f42011-01-19 14:12:51 +01002788 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002789 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002790
2791 if ((mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
2792 (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
2793 dev_info(DEV, "was SyncSource, missed the resync finished event, corrected myself:\n");
2794 drbd_uuid_set_bm(mdev, 0UL);
2795
2796 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
2797 mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
2798 *rule_nr = 34;
2799 } else {
2800 dev_info(DEV, "was SyncSource (peer failed to write sync_uuid)\n");
2801 *rule_nr = 36;
2802 }
2803
2804 return 1;
2805 }
2806
2807 if (mdev->ldev->md.uuid[UI_BITMAP] == (u64)0 && mdev->p_uuid[UI_BITMAP] != (u64)0) {
2808
Philipp Reisner31890f42011-01-19 14:12:51 +01002809 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002810 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002811
2812 if ((mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_BITMAP] & ~((u64)1)) &&
2813 (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
2814 dev_info(DEV, "was SyncTarget, peer missed the resync finished event, corrected peer:\n");
2815
2816 mdev->p_uuid[UI_HISTORY_START + 1] = mdev->p_uuid[UI_HISTORY_START];
2817 mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_BITMAP];
2818 mdev->p_uuid[UI_BITMAP] = 0UL;
2819
2820 drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2821 *rule_nr = 35;
2822 } else {
2823 dev_info(DEV, "was SyncTarget (failed to write sync_uuid)\n");
2824 *rule_nr = 37;
2825 }
2826
2827 return -1;
2828 }
2829
2830 /* Common power [off|failure] */
2831 rct = (test_bit(CRASHED_PRIMARY, &mdev->flags) ? 1 : 0) +
2832 (mdev->p_uuid[UI_FLAGS] & 2);
2833 /* lowest bit is set when we were primary,
2834 * next bit (weight 2) is set when peer was primary */
2835 *rule_nr = 40;
2836
2837 switch (rct) {
2838 case 0: /* !self_pri && !peer_pri */ return 0;
2839 case 1: /* self_pri && !peer_pri */ return 1;
2840 case 2: /* !self_pri && peer_pri */ return -1;
2841 case 3: /* self_pri && peer_pri */
Lars Ellenberg427c0432012-08-01 12:43:01 +02002842 dc = test_bit(RESOLVE_CONFLICTS, &mdev->tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002843 return dc ? -1 : 1;
2844 }
2845 }
2846
2847 *rule_nr = 50;
2848 peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1);
2849 if (self == peer)
2850 return -1;
2851
2852 *rule_nr = 51;
2853 peer = mdev->p_uuid[UI_HISTORY_START] & ~((u64)1);
2854 if (self == peer) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002855 if (mdev->tconn->agreed_pro_version < 96 ?
Philipp Reisner4a23f262011-01-11 17:42:17 +01002856 (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
2857 (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
2858 peer + UUID_NEW_BM_OFFSET == (mdev->p_uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002859 /* The last P_SYNC_UUID did not get though. Undo the last start of
2860 resync as sync source modifications of the peer's UUIDs. */
2861
Philipp Reisner31890f42011-01-19 14:12:51 +01002862 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002863 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002864
2865 mdev->p_uuid[UI_BITMAP] = mdev->p_uuid[UI_HISTORY_START];
2866 mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_HISTORY_START + 1];
Philipp Reisner4a23f262011-01-11 17:42:17 +01002867
Lars Ellenberg1882e222012-05-07 13:09:00 +02002868 dev_info(DEV, "Lost last syncUUID packet, corrected:\n");
Philipp Reisner4a23f262011-01-11 17:42:17 +01002869 drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2870
Philipp Reisnerb411b362009-09-25 16:07:19 -07002871 return -1;
2872 }
2873 }
2874
2875 *rule_nr = 60;
2876 self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2877 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2878 peer = mdev->p_uuid[i] & ~((u64)1);
2879 if (self == peer)
2880 return -2;
2881 }
2882
2883 *rule_nr = 70;
2884 self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2885 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
2886 if (self == peer)
2887 return 1;
2888
2889 *rule_nr = 71;
2890 self = mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
2891 if (self == peer) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002892 if (mdev->tconn->agreed_pro_version < 96 ?
Philipp Reisner4a23f262011-01-11 17:42:17 +01002893 (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
2894 (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
2895 self + UUID_NEW_BM_OFFSET == (mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002896 /* The last P_SYNC_UUID did not get though. Undo the last start of
2897 resync as sync source modifications of our UUIDs. */
2898
Philipp Reisner31890f42011-01-19 14:12:51 +01002899 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002900 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002901
2902 _drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_HISTORY_START]);
2903 _drbd_uuid_set(mdev, UI_HISTORY_START, mdev->ldev->md.uuid[UI_HISTORY_START + 1]);
2904
Philipp Reisner4a23f262011-01-11 17:42:17 +01002905 dev_info(DEV, "Last syncUUID did not get through, corrected:\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002906 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
2907 mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
2908
2909 return 1;
2910 }
2911 }
2912
2913
2914 *rule_nr = 80;
Philipp Reisnerd8c2a362009-11-18 15:52:51 +01002915 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002916 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2917 self = mdev->ldev->md.uuid[i] & ~((u64)1);
2918 if (self == peer)
2919 return 2;
2920 }
2921
2922 *rule_nr = 90;
2923 self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2924 peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1);
2925 if (self == peer && self != ((u64)0))
2926 return 100;
2927
2928 *rule_nr = 100;
2929 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2930 self = mdev->ldev->md.uuid[i] & ~((u64)1);
2931 for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) {
2932 peer = mdev->p_uuid[j] & ~((u64)1);
2933 if (self == peer)
2934 return -100;
2935 }
2936 }
2937
2938 return -1000;
2939}
2940
2941/* drbd_sync_handshake() returns the new conn state on success, or
2942 CONN_MASK (-1) on failure.
2943 */
2944static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_role peer_role,
2945 enum drbd_disk_state peer_disk) __must_hold(local)
2946{
Philipp Reisnerb411b362009-09-25 16:07:19 -07002947 enum drbd_conns rv = C_MASK;
2948 enum drbd_disk_state mydisk;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002949 struct net_conf *nc;
Andreas Gruenbacher6dff2902011-06-28 14:18:12 +02002950 int hg, rule_nr, rr_conflict, tentative;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002951
2952 mydisk = mdev->state.disk;
2953 if (mydisk == D_NEGOTIATING)
2954 mydisk = mdev->new_state_tmp.disk;
2955
2956 dev_info(DEV, "drbd_sync_handshake:\n");
2957 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid, mdev->comm_bm_set, 0);
2958 drbd_uuid_dump(mdev, "peer", mdev->p_uuid,
2959 mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2960
2961 hg = drbd_uuid_compare(mdev, &rule_nr);
2962
2963 dev_info(DEV, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
2964
2965 if (hg == -1000) {
2966 dev_alert(DEV, "Unrelated data, aborting!\n");
2967 return C_MASK;
2968 }
Philipp Reisner4a23f262011-01-11 17:42:17 +01002969 if (hg < -1000) {
2970 dev_alert(DEV, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002971 return C_MASK;
2972 }
2973
2974 if ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) ||
2975 (peer_disk == D_INCONSISTENT && mydisk > D_INCONSISTENT)) {
2976 int f = (hg == -100) || abs(hg) == 2;
2977 hg = mydisk > D_INCONSISTENT ? 1 : -1;
2978 if (f)
2979 hg = hg*2;
2980 dev_info(DEV, "Becoming sync %s due to disk states.\n",
2981 hg > 0 ? "source" : "target");
2982 }
2983
Adam Gandelman3a11a482010-04-08 16:48:23 -07002984 if (abs(hg) == 100)
2985 drbd_khelper(mdev, "initial-split-brain");
2986
Philipp Reisner44ed1672011-04-19 17:10:19 +02002987 rcu_read_lock();
2988 nc = rcu_dereference(mdev->tconn->net_conf);
2989
2990 if (hg == 100 || (hg == -100 && nc->always_asbp)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002991 int pcount = (mdev->state.role == R_PRIMARY)
2992 + (peer_role == R_PRIMARY);
2993 int forced = (hg == -100);
2994
2995 switch (pcount) {
2996 case 0:
2997 hg = drbd_asb_recover_0p(mdev);
2998 break;
2999 case 1:
3000 hg = drbd_asb_recover_1p(mdev);
3001 break;
3002 case 2:
3003 hg = drbd_asb_recover_2p(mdev);
3004 break;
3005 }
3006 if (abs(hg) < 100) {
3007 dev_warn(DEV, "Split-Brain detected, %d primaries, "
3008 "automatically solved. Sync from %s node\n",
3009 pcount, (hg < 0) ? "peer" : "this");
3010 if (forced) {
3011 dev_warn(DEV, "Doing a full sync, since"
3012 " UUIDs where ambiguous.\n");
3013 hg = hg*2;
3014 }
3015 }
3016 }
3017
3018 if (hg == -100) {
Philipp Reisner08b165b2011-09-05 16:22:33 +02003019 if (test_bit(DISCARD_MY_DATA, &mdev->flags) && !(mdev->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003020 hg = -1;
Philipp Reisner08b165b2011-09-05 16:22:33 +02003021 if (!test_bit(DISCARD_MY_DATA, &mdev->flags) && (mdev->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003022 hg = 1;
3023
3024 if (abs(hg) < 100)
3025 dev_warn(DEV, "Split-Brain detected, manually solved. "
3026 "Sync from %s node\n",
3027 (hg < 0) ? "peer" : "this");
3028 }
Philipp Reisner44ed1672011-04-19 17:10:19 +02003029 rr_conflict = nc->rr_conflict;
Andreas Gruenbacher6dff2902011-06-28 14:18:12 +02003030 tentative = nc->tentative;
Philipp Reisner44ed1672011-04-19 17:10:19 +02003031 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07003032
3033 if (hg == -100) {
Lars Ellenberg580b9762010-02-26 23:15:23 +01003034 /* FIXME this log message is not correct if we end up here
3035 * after an attempted attach on a diskless node.
3036 * We just refuse to attach -- well, we drop the "connection"
3037 * to that disk, in a way... */
Adam Gandelman3a11a482010-04-08 16:48:23 -07003038 dev_alert(DEV, "Split-Brain detected but unresolved, dropping connection!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003039 drbd_khelper(mdev, "split-brain");
3040 return C_MASK;
3041 }
3042
3043 if (hg > 0 && mydisk <= D_INCONSISTENT) {
3044 dev_err(DEV, "I shall become SyncSource, but I am inconsistent!\n");
3045 return C_MASK;
3046 }
3047
3048 if (hg < 0 && /* by intention we do not use mydisk here. */
3049 mdev->state.role == R_PRIMARY && mdev->state.disk >= D_CONSISTENT) {
Philipp Reisner44ed1672011-04-19 17:10:19 +02003050 switch (rr_conflict) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003051 case ASB_CALL_HELPER:
3052 drbd_khelper(mdev, "pri-lost");
3053 /* fall through */
3054 case ASB_DISCONNECT:
3055 dev_err(DEV, "I shall become SyncTarget, but I am primary!\n");
3056 return C_MASK;
3057 case ASB_VIOLENTLY:
3058 dev_warn(DEV, "Becoming SyncTarget, violating the stable-data"
3059 "assumption\n");
3060 }
3061 }
3062
Andreas Gruenbacher6dff2902011-06-28 14:18:12 +02003063 if (tentative || test_bit(CONN_DRY_RUN, &mdev->tconn->flags)) {
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003064 if (hg == 0)
3065 dev_info(DEV, "dry-run connect: No resync, would become Connected immediately.\n");
3066 else
3067 dev_info(DEV, "dry-run connect: Would become %s, doing a %s resync.",
3068 drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET),
3069 abs(hg) >= 2 ? "full" : "bit-map based");
3070 return C_MASK;
3071 }
3072
Philipp Reisnerb411b362009-09-25 16:07:19 -07003073 if (abs(hg) >= 2) {
3074 dev_info(DEV, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003075 if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
3076 BM_LOCKED_SET_ALLOWED))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003077 return C_MASK;
3078 }
3079
3080 if (hg > 0) { /* become sync source. */
3081 rv = C_WF_BITMAP_S;
3082 } else if (hg < 0) { /* become sync target */
3083 rv = C_WF_BITMAP_T;
3084 } else {
3085 rv = C_CONNECTED;
3086 if (drbd_bm_total_weight(mdev)) {
3087 dev_info(DEV, "No resync, but %lu bits in bitmap!\n",
3088 drbd_bm_total_weight(mdev));
3089 }
3090 }
3091
3092 return rv;
3093}
3094
Philipp Reisnerf179d762011-05-16 17:31:47 +02003095static enum drbd_after_sb_p convert_after_sb(enum drbd_after_sb_p peer)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003096{
3097 /* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */
Philipp Reisnerf179d762011-05-16 17:31:47 +02003098 if (peer == ASB_DISCARD_REMOTE)
3099 return ASB_DISCARD_LOCAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003100
3101 /* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */
Philipp Reisnerf179d762011-05-16 17:31:47 +02003102 if (peer == ASB_DISCARD_LOCAL)
3103 return ASB_DISCARD_REMOTE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003104
3105 /* everything else is valid if they are equal on both sides. */
Philipp Reisnerf179d762011-05-16 17:31:47 +02003106 return peer;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003107}
3108
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003109static int receive_protocol(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003110{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003111 struct p_protocol *p = pi->data;
Philipp Reisner036b17e2011-05-16 17:38:11 +02003112 enum drbd_after_sb_p p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
3113 int p_proto, p_discard_my_data, p_two_primaries, cf;
3114 struct net_conf *nc, *old_net_conf, *new_net_conf = NULL;
3115 char integrity_alg[SHARED_SECRET_MAX] = "";
Andreas Gruenbacheraccdbcc2011-07-15 17:41:09 +02003116 struct crypto_hash *peer_integrity_tfm = NULL;
Philipp Reisner7aca6c72011-05-17 10:12:56 +02003117 void *int_dig_in = NULL, *int_dig_vv = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003118
Philipp Reisnerb411b362009-09-25 16:07:19 -07003119 p_proto = be32_to_cpu(p->protocol);
3120 p_after_sb_0p = be32_to_cpu(p->after_sb_0p);
3121 p_after_sb_1p = be32_to_cpu(p->after_sb_1p);
3122 p_after_sb_2p = be32_to_cpu(p->after_sb_2p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003123 p_two_primaries = be32_to_cpu(p->two_primaries);
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003124 cf = be32_to_cpu(p->conn_flags);
Andreas Gruenbacher6139f602011-05-06 20:00:02 +02003125 p_discard_my_data = cf & CF_DISCARD_MY_DATA;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003126
Andreas Gruenbacher86db0612011-04-28 15:24:18 +02003127 if (tconn->agreed_pro_version >= 87) {
3128 int err;
3129
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02003130 if (pi->size > sizeof(integrity_alg))
Andreas Gruenbacher86db0612011-04-28 15:24:18 +02003131 return -EIO;
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02003132 err = drbd_recv_all(tconn, integrity_alg, pi->size);
Andreas Gruenbacher86db0612011-04-28 15:24:18 +02003133 if (err)
3134 return err;
Philipp Reisner036b17e2011-05-16 17:38:11 +02003135 integrity_alg[SHARED_SECRET_MAX - 1] = 0;
3136 }
Andreas Gruenbacher86db0612011-04-28 15:24:18 +02003137
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003138 if (pi->cmd != P_PROTOCOL_UPDATE) {
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003139 clear_bit(CONN_DRY_RUN, &tconn->flags);
Philipp Reisner036b17e2011-05-16 17:38:11 +02003140
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003141 if (cf & CF_DRY_RUN)
3142 set_bit(CONN_DRY_RUN, &tconn->flags);
3143
3144 rcu_read_lock();
3145 nc = rcu_dereference(tconn->net_conf);
3146
3147 if (p_proto != nc->wire_protocol) {
Andreas Gruenbacherd505d9b2011-07-15 17:19:18 +02003148 conn_err(tconn, "incompatible %s settings\n", "protocol");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003149 goto disconnect_rcu_unlock;
3150 }
3151
3152 if (convert_after_sb(p_after_sb_0p) != nc->after_sb_0p) {
Andreas Gruenbacherd505d9b2011-07-15 17:19:18 +02003153 conn_err(tconn, "incompatible %s settings\n", "after-sb-0pri");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003154 goto disconnect_rcu_unlock;
3155 }
3156
3157 if (convert_after_sb(p_after_sb_1p) != nc->after_sb_1p) {
Andreas Gruenbacherd505d9b2011-07-15 17:19:18 +02003158 conn_err(tconn, "incompatible %s settings\n", "after-sb-1pri");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003159 goto disconnect_rcu_unlock;
3160 }
3161
3162 if (convert_after_sb(p_after_sb_2p) != nc->after_sb_2p) {
Andreas Gruenbacherd505d9b2011-07-15 17:19:18 +02003163 conn_err(tconn, "incompatible %s settings\n", "after-sb-2pri");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003164 goto disconnect_rcu_unlock;
3165 }
3166
3167 if (p_discard_my_data && nc->discard_my_data) {
Andreas Gruenbacherd505d9b2011-07-15 17:19:18 +02003168 conn_err(tconn, "incompatible %s settings\n", "discard-my-data");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003169 goto disconnect_rcu_unlock;
3170 }
3171
3172 if (p_two_primaries != nc->two_primaries) {
Andreas Gruenbacherd505d9b2011-07-15 17:19:18 +02003173 conn_err(tconn, "incompatible %s settings\n", "allow-two-primaries");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003174 goto disconnect_rcu_unlock;
3175 }
3176
3177 if (strcmp(integrity_alg, nc->integrity_alg)) {
Andreas Gruenbacherd505d9b2011-07-15 17:19:18 +02003178 conn_err(tconn, "incompatible %s settings\n", "data-integrity-alg");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003179 goto disconnect_rcu_unlock;
3180 }
3181
3182 rcu_read_unlock();
Andreas Gruenbacher86db0612011-04-28 15:24:18 +02003183 }
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003184
3185 if (integrity_alg[0]) {
3186 int hash_size;
3187
3188 /*
3189 * We can only change the peer data integrity algorithm
3190 * here. Changing our own data integrity algorithm
3191 * requires that we send a P_PROTOCOL_UPDATE packet at
3192 * the same time; otherwise, the peer has no way to
3193 * tell between which packets the algorithm should
3194 * change.
3195 */
3196
3197 peer_integrity_tfm = crypto_alloc_hash(integrity_alg, 0, CRYPTO_ALG_ASYNC);
3198 if (!peer_integrity_tfm) {
3199 conn_err(tconn, "peer data-integrity-alg %s not supported\n",
3200 integrity_alg);
3201 goto disconnect;
3202 }
3203
3204 hash_size = crypto_hash_digestsize(peer_integrity_tfm);
3205 int_dig_in = kmalloc(hash_size, GFP_KERNEL);
3206 int_dig_vv = kmalloc(hash_size, GFP_KERNEL);
3207 if (!(int_dig_in && int_dig_vv)) {
3208 conn_err(tconn, "Allocation of buffers for data integrity checking failed\n");
3209 goto disconnect;
3210 }
3211 }
3212
3213 new_net_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL);
3214 if (!new_net_conf) {
3215 conn_err(tconn, "Allocation of new net_conf failed\n");
3216 goto disconnect;
3217 }
3218
3219 mutex_lock(&tconn->data.mutex);
3220 mutex_lock(&tconn->conf_update);
3221 old_net_conf = tconn->net_conf;
3222 *new_net_conf = *old_net_conf;
3223
3224 new_net_conf->wire_protocol = p_proto;
3225 new_net_conf->after_sb_0p = convert_after_sb(p_after_sb_0p);
3226 new_net_conf->after_sb_1p = convert_after_sb(p_after_sb_1p);
3227 new_net_conf->after_sb_2p = convert_after_sb(p_after_sb_2p);
3228 new_net_conf->two_primaries = p_two_primaries;
3229
3230 rcu_assign_pointer(tconn->net_conf, new_net_conf);
3231 mutex_unlock(&tconn->conf_update);
3232 mutex_unlock(&tconn->data.mutex);
3233
3234 crypto_free_hash(tconn->peer_integrity_tfm);
3235 kfree(tconn->int_dig_in);
3236 kfree(tconn->int_dig_vv);
3237 tconn->peer_integrity_tfm = peer_integrity_tfm;
3238 tconn->int_dig_in = int_dig_in;
3239 tconn->int_dig_vv = int_dig_vv;
3240
3241 if (strcmp(old_net_conf->integrity_alg, integrity_alg))
3242 conn_info(tconn, "peer data-integrity-alg: %s\n",
3243 integrity_alg[0] ? integrity_alg : "(none)");
3244
3245 synchronize_rcu();
3246 kfree(old_net_conf);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003247 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003248
Philipp Reisner44ed1672011-04-19 17:10:19 +02003249disconnect_rcu_unlock:
3250 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07003251disconnect:
Andreas Gruenbacherb792c352011-07-15 16:48:49 +02003252 crypto_free_hash(peer_integrity_tfm);
Philipp Reisner036b17e2011-05-16 17:38:11 +02003253 kfree(int_dig_in);
3254 kfree(int_dig_vv);
Philipp Reisner72046242011-03-15 18:51:47 +01003255 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003256 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003257}
3258
3259/* helper function
3260 * input: alg name, feature name
3261 * return: NULL (alg name was "")
3262 * ERR_PTR(error) if something goes wrong
3263 * or the crypto hash ptr, if it worked out ok. */
3264struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_conf *mdev,
3265 const char *alg, const char *name)
3266{
3267 struct crypto_hash *tfm;
3268
3269 if (!alg[0])
3270 return NULL;
3271
3272 tfm = crypto_alloc_hash(alg, 0, CRYPTO_ALG_ASYNC);
3273 if (IS_ERR(tfm)) {
3274 dev_err(DEV, "Can not allocate \"%s\" as %s (reason: %ld)\n",
3275 alg, name, PTR_ERR(tfm));
3276 return tfm;
3277 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003278 return tfm;
3279}
3280
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003281static int ignore_remaining_packet(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003282{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003283 void *buffer = tconn->data.rbuf;
3284 int size = pi->size;
3285
3286 while (size) {
3287 int s = min_t(int, size, DRBD_SOCKET_BUFFER_SIZE);
3288 s = drbd_recv(tconn, buffer, s);
3289 if (s <= 0) {
3290 if (s < 0)
3291 return s;
3292 break;
3293 }
3294 size -= s;
3295 }
3296 if (size)
3297 return -EIO;
3298 return 0;
3299}
3300
3301/*
3302 * config_unknown_volume - device configuration command for unknown volume
3303 *
3304 * When a device is added to an existing connection, the node on which the
3305 * device is added first will send configuration commands to its peer but the
3306 * peer will not know about the device yet. It will warn and ignore these
3307 * commands. Once the device is added on the second node, the second node will
3308 * send the same device configuration commands, but in the other direction.
3309 *
3310 * (We can also end up here if drbd is misconfigured.)
3311 */
3312static int config_unknown_volume(struct drbd_tconn *tconn, struct packet_info *pi)
3313{
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02003314 conn_warn(tconn, "%s packet received for volume %u, which is not configured locally\n",
3315 cmdname(pi->cmd), pi->vnr);
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003316 return ignore_remaining_packet(tconn, pi);
3317}
3318
3319static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi)
3320{
3321 struct drbd_conf *mdev;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003322 struct p_rs_param_95 *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003323 unsigned int header_size, data_size, exp_max_sz;
3324 struct crypto_hash *verify_tfm = NULL;
3325 struct crypto_hash *csums_tfm = NULL;
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003326 struct net_conf *old_net_conf, *new_net_conf = NULL;
Philipp Reisner813472c2011-05-03 16:47:02 +02003327 struct disk_conf *old_disk_conf = NULL, *new_disk_conf = NULL;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003328 const int apv = tconn->agreed_pro_version;
Philipp Reisner813472c2011-05-03 16:47:02 +02003329 struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
Philipp Reisner778f2712010-07-06 11:14:00 +02003330 int fifo_size = 0;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003331 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003332
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003333 mdev = vnr_to_mdev(tconn, pi->vnr);
3334 if (!mdev)
3335 return config_unknown_volume(tconn, pi);
3336
Philipp Reisnerb411b362009-09-25 16:07:19 -07003337 exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param)
3338 : apv == 88 ? sizeof(struct p_rs_param)
3339 + SHARED_SECRET_MAX
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003340 : apv <= 94 ? sizeof(struct p_rs_param_89)
3341 : /* apv >= 95 */ sizeof(struct p_rs_param_95);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003342
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003343 if (pi->size > exp_max_sz) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003344 dev_err(DEV, "SyncParam packet too long: received %u, expected <= %u bytes\n",
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003345 pi->size, exp_max_sz);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003346 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003347 }
3348
3349 if (apv <= 88) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003350 header_size = sizeof(struct p_rs_param);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003351 data_size = pi->size - header_size;
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003352 } else if (apv <= 94) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003353 header_size = sizeof(struct p_rs_param_89);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003354 data_size = pi->size - header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003355 D_ASSERT(data_size == 0);
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003356 } else {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003357 header_size = sizeof(struct p_rs_param_95);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003358 data_size = pi->size - header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003359 D_ASSERT(data_size == 0);
3360 }
3361
3362 /* initialize verify_alg and csums_alg */
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003363 p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003364 memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
3365
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003366 err = drbd_recv_all(mdev->tconn, p, header_size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003367 if (err)
3368 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003369
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003370 mutex_lock(&mdev->tconn->conf_update);
3371 old_net_conf = mdev->tconn->net_conf;
Philipp Reisner813472c2011-05-03 16:47:02 +02003372 if (get_ldev(mdev)) {
3373 new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3374 if (!new_disk_conf) {
3375 put_ldev(mdev);
3376 mutex_unlock(&mdev->tconn->conf_update);
3377 dev_err(DEV, "Allocation of new disk_conf failed\n");
3378 return -ENOMEM;
3379 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003380
Philipp Reisner813472c2011-05-03 16:47:02 +02003381 old_disk_conf = mdev->ldev->disk_conf;
3382 *new_disk_conf = *old_disk_conf;
3383
Andreas Gruenbacher6394b932011-05-11 14:29:52 +02003384 new_disk_conf->resync_rate = be32_to_cpu(p->resync_rate);
Philipp Reisner813472c2011-05-03 16:47:02 +02003385 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003386
Philipp Reisnerb411b362009-09-25 16:07:19 -07003387 if (apv >= 88) {
3388 if (apv == 88) {
Philipp Reisnere4bad1b2012-04-06 12:08:51 +02003389 if (data_size > SHARED_SECRET_MAX || data_size == 0) {
3390 dev_err(DEV, "verify-alg of wrong size, "
3391 "peer wants %u, accepting only up to %u byte\n",
3392 data_size, SHARED_SECRET_MAX);
Philipp Reisner813472c2011-05-03 16:47:02 +02003393 err = -EIO;
3394 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003395 }
3396
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003397 err = drbd_recv_all(mdev->tconn, p->verify_alg, data_size);
Philipp Reisner813472c2011-05-03 16:47:02 +02003398 if (err)
3399 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003400 /* we expect NUL terminated string */
3401 /* but just in case someone tries to be evil */
3402 D_ASSERT(p->verify_alg[data_size-1] == 0);
3403 p->verify_alg[data_size-1] = 0;
3404
3405 } else /* apv >= 89 */ {
3406 /* we still expect NUL terminated strings */
3407 /* but just in case someone tries to be evil */
3408 D_ASSERT(p->verify_alg[SHARED_SECRET_MAX-1] == 0);
3409 D_ASSERT(p->csums_alg[SHARED_SECRET_MAX-1] == 0);
3410 p->verify_alg[SHARED_SECRET_MAX-1] = 0;
3411 p->csums_alg[SHARED_SECRET_MAX-1] = 0;
3412 }
3413
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003414 if (strcmp(old_net_conf->verify_alg, p->verify_alg)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003415 if (mdev->state.conn == C_WF_REPORT_PARAMS) {
3416 dev_err(DEV, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003417 old_net_conf->verify_alg, p->verify_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003418 goto disconnect;
3419 }
3420 verify_tfm = drbd_crypto_alloc_digest_safe(mdev,
3421 p->verify_alg, "verify-alg");
3422 if (IS_ERR(verify_tfm)) {
3423 verify_tfm = NULL;
3424 goto disconnect;
3425 }
3426 }
3427
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003428 if (apv >= 89 && strcmp(old_net_conf->csums_alg, p->csums_alg)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003429 if (mdev->state.conn == C_WF_REPORT_PARAMS) {
3430 dev_err(DEV, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003431 old_net_conf->csums_alg, p->csums_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003432 goto disconnect;
3433 }
3434 csums_tfm = drbd_crypto_alloc_digest_safe(mdev,
3435 p->csums_alg, "csums-alg");
3436 if (IS_ERR(csums_tfm)) {
3437 csums_tfm = NULL;
3438 goto disconnect;
3439 }
3440 }
3441
Philipp Reisner813472c2011-05-03 16:47:02 +02003442 if (apv > 94 && new_disk_conf) {
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003443 new_disk_conf->c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
3444 new_disk_conf->c_delay_target = be32_to_cpu(p->c_delay_target);
3445 new_disk_conf->c_fill_target = be32_to_cpu(p->c_fill_target);
3446 new_disk_conf->c_max_rate = be32_to_cpu(p->c_max_rate);
Philipp Reisner778f2712010-07-06 11:14:00 +02003447
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003448 fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
Philipp Reisner9958c852011-05-03 16:19:31 +02003449 if (fifo_size != mdev->rs_plan_s->size) {
Philipp Reisner813472c2011-05-03 16:47:02 +02003450 new_plan = fifo_alloc(fifo_size);
3451 if (!new_plan) {
Philipp Reisner778f2712010-07-06 11:14:00 +02003452 dev_err(DEV, "kmalloc of fifo_buffer failed");
Lars Ellenbergf3990022011-03-23 14:31:09 +01003453 put_ldev(mdev);
Philipp Reisner778f2712010-07-06 11:14:00 +02003454 goto disconnect;
3455 }
3456 }
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003457 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003458
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003459 if (verify_tfm || csums_tfm) {
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003460 new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
3461 if (!new_net_conf) {
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003462 dev_err(DEV, "Allocation of new net_conf failed\n");
3463 goto disconnect;
3464 }
3465
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003466 *new_net_conf = *old_net_conf;
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003467
3468 if (verify_tfm) {
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003469 strcpy(new_net_conf->verify_alg, p->verify_alg);
3470 new_net_conf->verify_alg_len = strlen(p->verify_alg) + 1;
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003471 crypto_free_hash(mdev->tconn->verify_tfm);
3472 mdev->tconn->verify_tfm = verify_tfm;
3473 dev_info(DEV, "using verify-alg: \"%s\"\n", p->verify_alg);
3474 }
3475 if (csums_tfm) {
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003476 strcpy(new_net_conf->csums_alg, p->csums_alg);
3477 new_net_conf->csums_alg_len = strlen(p->csums_alg) + 1;
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003478 crypto_free_hash(mdev->tconn->csums_tfm);
3479 mdev->tconn->csums_tfm = csums_tfm;
3480 dev_info(DEV, "using csums-alg: \"%s\"\n", p->csums_alg);
3481 }
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003482 rcu_assign_pointer(tconn->net_conf, new_net_conf);
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003483 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003484 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003485
Philipp Reisner813472c2011-05-03 16:47:02 +02003486 if (new_disk_conf) {
3487 rcu_assign_pointer(mdev->ldev->disk_conf, new_disk_conf);
3488 put_ldev(mdev);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003489 }
Philipp Reisner813472c2011-05-03 16:47:02 +02003490
3491 if (new_plan) {
3492 old_plan = mdev->rs_plan_s;
3493 rcu_assign_pointer(mdev->rs_plan_s, new_plan);
3494 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003495
3496 mutex_unlock(&mdev->tconn->conf_update);
3497 synchronize_rcu();
3498 if (new_net_conf)
3499 kfree(old_net_conf);
3500 kfree(old_disk_conf);
Philipp Reisner813472c2011-05-03 16:47:02 +02003501 kfree(old_plan);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003502
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003503 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003504
Philipp Reisner813472c2011-05-03 16:47:02 +02003505reconnect:
3506 if (new_disk_conf) {
3507 put_ldev(mdev);
3508 kfree(new_disk_conf);
3509 }
3510 mutex_unlock(&mdev->tconn->conf_update);
3511 return -EIO;
3512
Philipp Reisnerb411b362009-09-25 16:07:19 -07003513disconnect:
Philipp Reisner813472c2011-05-03 16:47:02 +02003514 kfree(new_plan);
3515 if (new_disk_conf) {
3516 put_ldev(mdev);
3517 kfree(new_disk_conf);
3518 }
Philipp Reisnera0095502011-05-03 13:14:15 +02003519 mutex_unlock(&mdev->tconn->conf_update);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003520 /* just for completeness: actually not needed,
3521 * as this is not reached if csums_tfm was ok. */
3522 crypto_free_hash(csums_tfm);
3523 /* but free the verify_tfm again, if csums_tfm did not work out */
3524 crypto_free_hash(verify_tfm);
Philipp Reisner38fa9982011-03-15 18:24:49 +01003525 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003526 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003527}
3528
Philipp Reisnerb411b362009-09-25 16:07:19 -07003529/* warn if the arguments differ by more than 12.5% */
3530static void warn_if_differ_considerably(struct drbd_conf *mdev,
3531 const char *s, sector_t a, sector_t b)
3532{
3533 sector_t d;
3534 if (a == 0 || b == 0)
3535 return;
3536 d = (a > b) ? (a - b) : (b - a);
3537 if (d > (a>>3) || d > (b>>3))
3538 dev_warn(DEV, "Considerable difference in %s: %llus vs. %llus\n", s,
3539 (unsigned long long)a, (unsigned long long)b);
3540}
3541
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003542static int receive_sizes(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003543{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003544 struct drbd_conf *mdev;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003545 struct p_sizes *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003546 enum determine_dev_size dd = unchanged;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003547 sector_t p_size, p_usize, my_usize;
3548 int ldsc = 0; /* local disk size changed */
Philipp Reisnere89b5912010-03-24 17:11:33 +01003549 enum dds_flags ddsf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003550
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003551 mdev = vnr_to_mdev(tconn, pi->vnr);
3552 if (!mdev)
3553 return config_unknown_volume(tconn, pi);
3554
Philipp Reisnerb411b362009-09-25 16:07:19 -07003555 p_size = be64_to_cpu(p->d_size);
3556 p_usize = be64_to_cpu(p->u_size);
3557
Philipp Reisnerb411b362009-09-25 16:07:19 -07003558 /* just store the peer's disk size for now.
3559 * we still need to figure out whether we accept that. */
3560 mdev->p_size = p_size;
3561
Philipp Reisnerb411b362009-09-25 16:07:19 -07003562 if (get_ldev(mdev)) {
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003563 rcu_read_lock();
3564 my_usize = rcu_dereference(mdev->ldev->disk_conf)->disk_size;
3565 rcu_read_unlock();
3566
Philipp Reisnerb411b362009-09-25 16:07:19 -07003567 warn_if_differ_considerably(mdev, "lower level device sizes",
3568 p_size, drbd_get_max_capacity(mdev->ldev));
3569 warn_if_differ_considerably(mdev, "user requested size",
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003570 p_usize, my_usize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003571
3572 /* if this is the first connect, or an otherwise expected
3573 * param exchange, choose the minimum */
3574 if (mdev->state.conn == C_WF_REPORT_PARAMS)
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003575 p_usize = min_not_zero(my_usize, p_usize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003576
3577 /* Never shrink a device with usable data during connect.
3578 But allow online shrinking if we are connected. */
Philipp Reisneref5e44a2011-05-03 13:27:43 +02003579 if (drbd_new_dev_size(mdev, mdev->ldev, p_usize, 0) <
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003580 drbd_get_capacity(mdev->this_bdev) &&
3581 mdev->state.disk >= D_OUTDATED &&
3582 mdev->state.conn < C_CONNECTED) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003583 dev_err(DEV, "The peer's disk size is too small!\n");
Philipp Reisner38fa9982011-03-15 18:24:49 +01003584 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003585 put_ldev(mdev);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003586 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003587 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003588
3589 if (my_usize != p_usize) {
3590 struct disk_conf *old_disk_conf, *new_disk_conf = NULL;
3591
3592 new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3593 if (!new_disk_conf) {
3594 dev_err(DEV, "Allocation of new disk_conf failed\n");
3595 put_ldev(mdev);
3596 return -ENOMEM;
3597 }
3598
3599 mutex_lock(&mdev->tconn->conf_update);
3600 old_disk_conf = mdev->ldev->disk_conf;
3601 *new_disk_conf = *old_disk_conf;
3602 new_disk_conf->disk_size = p_usize;
3603
3604 rcu_assign_pointer(mdev->ldev->disk_conf, new_disk_conf);
3605 mutex_unlock(&mdev->tconn->conf_update);
3606 synchronize_rcu();
3607 kfree(old_disk_conf);
3608
3609 dev_info(DEV, "Peer sets u_size to %lu sectors\n",
3610 (unsigned long)my_usize);
3611 }
3612
Philipp Reisnerb411b362009-09-25 16:07:19 -07003613 put_ldev(mdev);
3614 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003615
Philipp Reisnere89b5912010-03-24 17:11:33 +01003616 ddsf = be16_to_cpu(p->dds_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003617 if (get_ldev(mdev)) {
Bart Van Assche24c48302011-05-21 18:32:29 +02003618 dd = drbd_determine_dev_size(mdev, ddsf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003619 put_ldev(mdev);
3620 if (dd == dev_size_error)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003621 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003622 drbd_md_sync(mdev);
3623 } else {
3624 /* I am diskless, need to accept the peer's size. */
3625 drbd_set_my_capacity(mdev, p_size);
3626 }
3627
Philipp Reisner99432fc2011-05-20 16:39:13 +02003628 mdev->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
3629 drbd_reconsider_max_bio_size(mdev);
3630
Philipp Reisnerb411b362009-09-25 16:07:19 -07003631 if (get_ldev(mdev)) {
3632 if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) {
3633 mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev);
3634 ldsc = 1;
3635 }
3636
Philipp Reisnerb411b362009-09-25 16:07:19 -07003637 put_ldev(mdev);
3638 }
3639
3640 if (mdev->state.conn > C_WF_REPORT_PARAMS) {
3641 if (be64_to_cpu(p->c_size) !=
3642 drbd_get_capacity(mdev->this_bdev) || ldsc) {
3643 /* we have different sizes, probably peer
3644 * needs to know my new size... */
Philipp Reisnere89b5912010-03-24 17:11:33 +01003645 drbd_send_sizes(mdev, 0, ddsf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003646 }
3647 if (test_and_clear_bit(RESIZE_PENDING, &mdev->flags) ||
3648 (dd == grew && mdev->state.conn == C_CONNECTED)) {
3649 if (mdev->state.pdsk >= D_INCONSISTENT &&
Philipp Reisnere89b5912010-03-24 17:11:33 +01003650 mdev->state.disk >= D_INCONSISTENT) {
3651 if (ddsf & DDSF_NO_RESYNC)
3652 dev_info(DEV, "Resync of new storage suppressed with --assume-clean\n");
3653 else
3654 resync_after_online_grow(mdev);
3655 } else
Philipp Reisnerb411b362009-09-25 16:07:19 -07003656 set_bit(RESYNC_AFTER_NEG, &mdev->flags);
3657 }
3658 }
3659
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003660 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003661}
3662
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003663static int receive_uuids(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003664{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003665 struct drbd_conf *mdev;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003666 struct p_uuids *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003667 u64 *p_uuid;
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003668 int i, updated_uuids = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003669
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003670 mdev = vnr_to_mdev(tconn, pi->vnr);
3671 if (!mdev)
3672 return config_unknown_volume(tconn, pi);
3673
Philipp Reisnerb411b362009-09-25 16:07:19 -07003674 p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO);
3675
3676 for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
3677 p_uuid[i] = be64_to_cpu(p->uuid[i]);
3678
3679 kfree(mdev->p_uuid);
3680 mdev->p_uuid = p_uuid;
3681
3682 if (mdev->state.conn < C_CONNECTED &&
3683 mdev->state.disk < D_INCONSISTENT &&
3684 mdev->state.role == R_PRIMARY &&
3685 (mdev->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
3686 dev_err(DEV, "Can only connect to data with current UUID=%016llX\n",
3687 (unsigned long long)mdev->ed_uuid);
Philipp Reisner38fa9982011-03-15 18:24:49 +01003688 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003689 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003690 }
3691
3692 if (get_ldev(mdev)) {
3693 int skip_initial_sync =
3694 mdev->state.conn == C_CONNECTED &&
Philipp Reisner31890f42011-01-19 14:12:51 +01003695 mdev->tconn->agreed_pro_version >= 90 &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003696 mdev->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
3697 (p_uuid[UI_FLAGS] & 8);
3698 if (skip_initial_sync) {
3699 dev_info(DEV, "Accepted new current UUID, preparing to skip initial sync\n");
3700 drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write,
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003701 "clear_n_write from receive_uuids",
3702 BM_LOCKED_TEST_ALLOWED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003703 _drbd_uuid_set(mdev, UI_CURRENT, p_uuid[UI_CURRENT]);
3704 _drbd_uuid_set(mdev, UI_BITMAP, 0);
3705 _drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
3706 CS_VERBOSE, NULL);
3707 drbd_md_sync(mdev);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003708 updated_uuids = 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003709 }
3710 put_ldev(mdev);
Philipp Reisner18a50fa2010-06-21 14:14:15 +02003711 } else if (mdev->state.disk < D_INCONSISTENT &&
3712 mdev->state.role == R_PRIMARY) {
3713 /* I am a diskless primary, the peer just created a new current UUID
3714 for me. */
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003715 updated_uuids = drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003716 }
3717
3718 /* Before we test for the disk state, we should wait until an eventually
3719 ongoing cluster wide state change is finished. That is important if
3720 we are primary and are detaching from our disk. We need to see the
3721 new disk state... */
Philipp Reisner8410da82011-02-11 20:11:10 +01003722 mutex_lock(mdev->state_mutex);
3723 mutex_unlock(mdev->state_mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003724 if (mdev->state.conn >= C_CONNECTED && mdev->state.disk < D_INCONSISTENT)
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003725 updated_uuids |= drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
3726
3727 if (updated_uuids)
3728 drbd_print_uuids(mdev, "receiver updated UUIDs to");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003729
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003730 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003731}
3732
3733/**
3734 * convert_state() - Converts the peer's view of the cluster state to our point of view
3735 * @ps: The state as seen by the peer.
3736 */
3737static union drbd_state convert_state(union drbd_state ps)
3738{
3739 union drbd_state ms;
3740
3741 static enum drbd_conns c_tab[] = {
Philipp Reisner369bea62011-07-06 23:04:44 +02003742 [C_WF_REPORT_PARAMS] = C_WF_REPORT_PARAMS,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003743 [C_CONNECTED] = C_CONNECTED,
3744
3745 [C_STARTING_SYNC_S] = C_STARTING_SYNC_T,
3746 [C_STARTING_SYNC_T] = C_STARTING_SYNC_S,
3747 [C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */
3748 [C_VERIFY_S] = C_VERIFY_T,
3749 [C_MASK] = C_MASK,
3750 };
3751
3752 ms.i = ps.i;
3753
3754 ms.conn = c_tab[ps.conn];
3755 ms.peer = ps.role;
3756 ms.role = ps.peer;
3757 ms.pdsk = ps.disk;
3758 ms.disk = ps.pdsk;
3759 ms.peer_isp = (ps.aftr_isp | ps.user_isp);
3760
3761 return ms;
3762}
3763
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003764static int receive_req_state(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003765{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003766 struct drbd_conf *mdev;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003767 struct p_req_state *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003768 union drbd_state mask, val;
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +01003769 enum drbd_state_rv rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003770
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003771 mdev = vnr_to_mdev(tconn, pi->vnr);
3772 if (!mdev)
3773 return -EIO;
3774
Philipp Reisnerb411b362009-09-25 16:07:19 -07003775 mask.i = be32_to_cpu(p->mask);
3776 val.i = be32_to_cpu(p->val);
3777
Lars Ellenberg427c0432012-08-01 12:43:01 +02003778 if (test_bit(RESOLVE_CONFLICTS, &mdev->tconn->flags) &&
Philipp Reisner8410da82011-02-11 20:11:10 +01003779 mutex_is_locked(mdev->state_mutex)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003780 drbd_send_sr_reply(mdev, SS_CONCURRENT_ST_CHG);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003781 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003782 }
3783
3784 mask = convert_state(mask);
3785 val = convert_state(val);
3786
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003787 rv = drbd_change_state(mdev, CS_VERBOSE, mask, val);
3788 drbd_send_sr_reply(mdev, rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003789
Philipp Reisnerb411b362009-09-25 16:07:19 -07003790 drbd_md_sync(mdev);
3791
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003792 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003793}
3794
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003795static int receive_req_conn_state(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003796{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003797 struct p_req_state *p = pi->data;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003798 union drbd_state mask, val;
3799 enum drbd_state_rv rv;
3800
3801 mask.i = be32_to_cpu(p->mask);
3802 val.i = be32_to_cpu(p->val);
3803
Lars Ellenberg427c0432012-08-01 12:43:01 +02003804 if (test_bit(RESOLVE_CONFLICTS, &tconn->flags) &&
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003805 mutex_is_locked(&tconn->cstate_mutex)) {
3806 conn_send_sr_reply(tconn, SS_CONCURRENT_ST_CHG);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003807 return 0;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003808 }
3809
3810 mask = convert_state(mask);
3811 val = convert_state(val);
3812
Philipp Reisner778bcf22011-03-28 12:55:03 +02003813 rv = conn_request_state(tconn, mask, val, CS_VERBOSE | CS_LOCAL_ONLY | CS_IGN_OUTD_FAIL);
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003814 conn_send_sr_reply(tconn, rv);
3815
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003816 return 0;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003817}
3818
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003819static int receive_state(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003820{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003821 struct drbd_conf *mdev;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003822 struct p_state *p = pi->data;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003823 union drbd_state os, ns, peer_state;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003824 enum drbd_disk_state real_peer_disk;
Philipp Reisner65d922c2010-06-16 16:18:09 +02003825 enum chg_state_flags cs_flags;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003826 int rv;
3827
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003828 mdev = vnr_to_mdev(tconn, pi->vnr);
3829 if (!mdev)
3830 return config_unknown_volume(tconn, pi);
3831
Philipp Reisnerb411b362009-09-25 16:07:19 -07003832 peer_state.i = be32_to_cpu(p->state);
3833
3834 real_peer_disk = peer_state.disk;
3835 if (peer_state.disk == D_NEGOTIATING) {
3836 real_peer_disk = mdev->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT;
3837 dev_info(DEV, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
3838 }
3839
Philipp Reisner87eeee42011-01-19 14:16:30 +01003840 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003841 retry:
Philipp Reisner78bae592011-03-28 15:40:12 +02003842 os = ns = drbd_read_state(mdev);
Philipp Reisner87eeee42011-01-19 14:16:30 +01003843 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003844
Philipp Reisnerb8853db2011-12-13 11:09:16 +01003845 /* If some other part of the code (asender thread, timeout)
3846 * already decided to close the connection again,
3847 * we must not "re-establish" it here. */
3848 if (os.conn <= C_TEAR_DOWN)
Lars Ellenberg58ffa582012-07-26 14:09:49 +02003849 return -ECONNRESET;
Philipp Reisnerb8853db2011-12-13 11:09:16 +01003850
Philipp Reisner9bcd2522011-09-29 13:00:14 +02003851 /* If this is the "end of sync" confirmation, usually the peer disk
3852 * transitions from D_INCONSISTENT to D_UP_TO_DATE. For empty (0 bits
3853 * set) resync started in PausedSyncT, or if the timing of pause-/
3854 * unpause-sync events has been "just right", the peer disk may
3855 * transition from D_CONSISTENT to D_UP_TO_DATE as well.
3856 */
3857 if ((os.pdsk == D_INCONSISTENT || os.pdsk == D_CONSISTENT) &&
3858 real_peer_disk == D_UP_TO_DATE &&
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003859 os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
3860 /* If we are (becoming) SyncSource, but peer is still in sync
3861 * preparation, ignore its uptodate-ness to avoid flapping, it
3862 * will change to inconsistent once the peer reaches active
3863 * syncing states.
3864 * It may have changed syncer-paused flags, however, so we
3865 * cannot ignore this completely. */
3866 if (peer_state.conn > C_CONNECTED &&
3867 peer_state.conn < C_SYNC_SOURCE)
3868 real_peer_disk = D_INCONSISTENT;
3869
3870 /* if peer_state changes to connected at the same time,
3871 * it explicitly notifies us that it finished resync.
3872 * Maybe we should finish it up, too? */
3873 else if (os.conn >= C_SYNC_SOURCE &&
3874 peer_state.conn == C_CONNECTED) {
3875 if (drbd_bm_total_weight(mdev) <= mdev->rs_failed)
3876 drbd_resync_finished(mdev);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003877 return 0;
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003878 }
3879 }
3880
Lars Ellenberg58ffa582012-07-26 14:09:49 +02003881 /* explicit verify finished notification, stop sector reached. */
3882 if (os.conn == C_VERIFY_T && os.disk == D_UP_TO_DATE &&
3883 peer_state.conn == C_CONNECTED && real_peer_disk == D_UP_TO_DATE) {
3884 ov_out_of_sync_print(mdev);
3885 drbd_resync_finished(mdev);
3886 return 0;
3887 }
3888
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003889 /* peer says his disk is inconsistent, while we think it is uptodate,
3890 * and this happens while the peer still thinks we have a sync going on,
3891 * but we think we are already done with the sync.
3892 * We ignore this to avoid flapping pdsk.
3893 * This should not happen, if the peer is a recent version of drbd. */
3894 if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT &&
3895 os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE)
3896 real_peer_disk = D_UP_TO_DATE;
3897
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003898 if (ns.conn == C_WF_REPORT_PARAMS)
3899 ns.conn = C_CONNECTED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003900
Philipp Reisner67531712010-10-27 12:21:30 +02003901 if (peer_state.conn == C_AHEAD)
3902 ns.conn = C_BEHIND;
3903
Philipp Reisnerb411b362009-09-25 16:07:19 -07003904 if (mdev->p_uuid && peer_state.disk >= D_NEGOTIATING &&
3905 get_ldev_if_state(mdev, D_NEGOTIATING)) {
3906 int cr; /* consider resync */
3907
3908 /* if we established a new connection */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003909 cr = (os.conn < C_CONNECTED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003910 /* if we had an established connection
3911 * and one of the nodes newly attaches a disk */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003912 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003913 (peer_state.disk == D_NEGOTIATING ||
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003914 os.disk == D_NEGOTIATING));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003915 /* if we have both been inconsistent, and the peer has been
3916 * forced to be UpToDate with --overwrite-data */
3917 cr |= test_bit(CONSIDER_RESYNC, &mdev->flags);
3918 /* if we had been plain connected, and the admin requested to
3919 * start a sync by "invalidate" or "invalidate-remote" */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003920 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003921 (peer_state.conn >= C_STARTING_SYNC_S &&
3922 peer_state.conn <= C_WF_BITMAP_T));
3923
3924 if (cr)
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003925 ns.conn = drbd_sync_handshake(mdev, peer_state.role, real_peer_disk);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003926
3927 put_ldev(mdev);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003928 if (ns.conn == C_MASK) {
3929 ns.conn = C_CONNECTED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003930 if (mdev->state.disk == D_NEGOTIATING) {
Lars Ellenberg82f59cc2010-10-16 12:13:47 +02003931 drbd_force_state(mdev, NS(disk, D_FAILED));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003932 } else if (peer_state.disk == D_NEGOTIATING) {
3933 dev_err(DEV, "Disk attach process on the peer node was aborted.\n");
3934 peer_state.disk = D_DISKLESS;
Lars Ellenberg580b9762010-02-26 23:15:23 +01003935 real_peer_disk = D_DISKLESS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003936 } else {
Philipp Reisner8169e412011-03-15 18:40:27 +01003937 if (test_and_clear_bit(CONN_DRY_RUN, &mdev->tconn->flags))
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003938 return -EIO;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003939 D_ASSERT(os.conn == C_WF_REPORT_PARAMS);
Philipp Reisner38fa9982011-03-15 18:24:49 +01003940 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003941 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003942 }
3943 }
3944 }
3945
Philipp Reisner87eeee42011-01-19 14:16:30 +01003946 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisner78bae592011-03-28 15:40:12 +02003947 if (os.i != drbd_read_state(mdev).i)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003948 goto retry;
3949 clear_bit(CONSIDER_RESYNC, &mdev->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003950 ns.peer = peer_state.role;
3951 ns.pdsk = real_peer_disk;
3952 ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003953 if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003954 ns.disk = mdev->new_state_tmp.disk;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003955 cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
Philipp Reisner2aebfab2011-03-28 16:48:11 +02003956 if (ns.pdsk == D_CONSISTENT && drbd_suspended(mdev) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
Philipp Reisner481c6f52010-06-22 14:03:27 +02003957 test_bit(NEW_CUR_UUID, &mdev->flags)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01003958 /* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this
Philipp Reisner481c6f52010-06-22 14:03:27 +02003959 for temporal network outages! */
Philipp Reisner87eeee42011-01-19 14:16:30 +01003960 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisner481c6f52010-06-22 14:03:27 +02003961 dev_err(DEV, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
Philipp Reisner2f5cdd02011-02-21 14:29:27 +01003962 tl_clear(mdev->tconn);
Philipp Reisner481c6f52010-06-22 14:03:27 +02003963 drbd_uuid_new_current(mdev);
3964 clear_bit(NEW_CUR_UUID, &mdev->flags);
Philipp Reisner38fa9982011-03-15 18:24:49 +01003965 conn_request_state(mdev->tconn, NS2(conn, C_PROTOCOL_ERROR, susp, 0), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003966 return -EIO;
Philipp Reisner481c6f52010-06-22 14:03:27 +02003967 }
Philipp Reisner65d922c2010-06-16 16:18:09 +02003968 rv = _drbd_set_state(mdev, ns, cs_flags, NULL);
Philipp Reisner78bae592011-03-28 15:40:12 +02003969 ns = drbd_read_state(mdev);
Philipp Reisner87eeee42011-01-19 14:16:30 +01003970 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003971
3972 if (rv < SS_SUCCESS) {
Philipp Reisner38fa9982011-03-15 18:24:49 +01003973 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003974 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003975 }
3976
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003977 if (os.conn > C_WF_REPORT_PARAMS) {
3978 if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003979 peer_state.disk != D_NEGOTIATING ) {
3980 /* we want resync, peer has not yet decided to sync... */
3981 /* Nowadays only used when forcing a node into primary role and
3982 setting its disk to UpToDate with that */
3983 drbd_send_uuids(mdev);
Philipp Reisner43de7c82011-11-10 13:16:13 +01003984 drbd_send_current_state(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003985 }
3986 }
3987
Philipp Reisner08b165b2011-09-05 16:22:33 +02003988 clear_bit(DISCARD_MY_DATA, &mdev->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003989
3990 drbd_md_sync(mdev); /* update connected indicator, la_size, ... */
3991
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003992 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003993}
3994
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003995static int receive_sync_uuid(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003996{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003997 struct drbd_conf *mdev;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003998 struct p_rs_uuid *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003999
4000 mdev = vnr_to_mdev(tconn, pi->vnr);
4001 if (!mdev)
4002 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004003
4004 wait_event(mdev->misc_wait,
4005 mdev->state.conn == C_WF_SYNC_UUID ||
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02004006 mdev->state.conn == C_BEHIND ||
Philipp Reisnerb411b362009-09-25 16:07:19 -07004007 mdev->state.conn < C_CONNECTED ||
4008 mdev->state.disk < D_NEGOTIATING);
4009
4010 /* D_ASSERT( mdev->state.conn == C_WF_SYNC_UUID ); */
4011
Philipp Reisnerb411b362009-09-25 16:07:19 -07004012 /* Here the _drbd_uuid_ functions are right, current should
4013 _not_ be rotated into the history */
4014 if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
4015 _drbd_uuid_set(mdev, UI_CURRENT, be64_to_cpu(p->uuid));
4016 _drbd_uuid_set(mdev, UI_BITMAP, 0UL);
4017
Lars Ellenberg62b0da32011-01-20 13:25:21 +01004018 drbd_print_uuids(mdev, "updated sync uuid");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004019 drbd_start_resync(mdev, C_SYNC_TARGET);
4020
4021 put_ldev(mdev);
4022 } else
4023 dev_err(DEV, "Ignoring SyncUUID packet!\n");
4024
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004025 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004026}
4027
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004028/**
4029 * receive_bitmap_plain
4030 *
4031 * Return 0 when done, 1 when another iteration is needed, and a negative error
4032 * code upon failure.
4033 */
4034static int
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004035receive_bitmap_plain(struct drbd_conf *mdev, unsigned int size,
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004036 unsigned long *p, struct bm_xfer_ctx *c)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004037{
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004038 unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE -
4039 drbd_header_size(mdev->tconn);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004040 unsigned int num_words = min_t(size_t, data_size / sizeof(*p),
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004041 c->bm_words - c->word_offset);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004042 unsigned int want = num_words * sizeof(*p);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004043 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004044
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004045 if (want != size) {
4046 dev_err(DEV, "%s:want (%u) != size (%u)\n", __func__, want, size);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004047 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004048 }
4049 if (want == 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004050 return 0;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004051 err = drbd_recv_all(mdev->tconn, p, want);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004052 if (err)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004053 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004054
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004055 drbd_bm_merge_lel(mdev, c->word_offset, num_words, p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004056
4057 c->word_offset += num_words;
4058 c->bit_offset = c->word_offset * BITS_PER_LONG;
4059 if (c->bit_offset > c->bm_bits)
4060 c->bit_offset = c->bm_bits;
4061
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004062 return 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004063}
4064
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004065static enum drbd_bitmap_code dcbp_get_code(struct p_compressed_bm *p)
4066{
4067 return (enum drbd_bitmap_code)(p->encoding & 0x0f);
4068}
4069
4070static int dcbp_get_start(struct p_compressed_bm *p)
4071{
4072 return (p->encoding & 0x80) != 0;
4073}
4074
4075static int dcbp_get_pad_bits(struct p_compressed_bm *p)
4076{
4077 return (p->encoding >> 4) & 0x7;
4078}
4079
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004080/**
4081 * recv_bm_rle_bits
4082 *
4083 * Return 0 when done, 1 when another iteration is needed, and a negative error
4084 * code upon failure.
4085 */
4086static int
Philipp Reisnerb411b362009-09-25 16:07:19 -07004087recv_bm_rle_bits(struct drbd_conf *mdev,
4088 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01004089 struct bm_xfer_ctx *c,
4090 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004091{
4092 struct bitstream bs;
4093 u64 look_ahead;
4094 u64 rl;
4095 u64 tmp;
4096 unsigned long s = c->bit_offset;
4097 unsigned long e;
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004098 int toggle = dcbp_get_start(p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004099 int have;
4100 int bits;
4101
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004102 bitstream_init(&bs, p->code, len, dcbp_get_pad_bits(p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004103
4104 bits = bitstream_get_bits(&bs, &look_ahead, 64);
4105 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004106 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004107
4108 for (have = bits; have > 0; s += rl, toggle = !toggle) {
4109 bits = vli_decode_bits(&rl, look_ahead);
4110 if (bits <= 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004111 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004112
4113 if (toggle) {
4114 e = s + rl -1;
4115 if (e >= c->bm_bits) {
4116 dev_err(DEV, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004117 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004118 }
4119 _drbd_bm_set_bits(mdev, s, e);
4120 }
4121
4122 if (have < bits) {
4123 dev_err(DEV, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n",
4124 have, bits, look_ahead,
4125 (unsigned int)(bs.cur.b - p->code),
4126 (unsigned int)bs.buf_len);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004127 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004128 }
4129 look_ahead >>= bits;
4130 have -= bits;
4131
4132 bits = bitstream_get_bits(&bs, &tmp, 64 - have);
4133 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004134 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004135 look_ahead |= tmp << have;
4136 have += bits;
4137 }
4138
4139 c->bit_offset = s;
4140 bm_xfer_ctx_bit_to_word_offset(c);
4141
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004142 return (s != c->bm_bits);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004143}
4144
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004145/**
4146 * decode_bitmap_c
4147 *
4148 * Return 0 when done, 1 when another iteration is needed, and a negative error
4149 * code upon failure.
4150 */
4151static int
Philipp Reisnerb411b362009-09-25 16:07:19 -07004152decode_bitmap_c(struct drbd_conf *mdev,
4153 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01004154 struct bm_xfer_ctx *c,
4155 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004156{
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004157 if (dcbp_get_code(p) == RLE_VLI_Bits)
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004158 return recv_bm_rle_bits(mdev, p, c, len - sizeof(*p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004159
4160 /* other variants had been implemented for evaluation,
4161 * but have been dropped as this one turned out to be "best"
4162 * during all our tests. */
4163
4164 dev_err(DEV, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
Philipp Reisner38fa9982011-03-15 18:24:49 +01004165 conn_request_state(mdev->tconn, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004166 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004167}
4168
4169void INFO_bm_xfer_stats(struct drbd_conf *mdev,
4170 const char *direction, struct bm_xfer_ctx *c)
4171{
4172 /* what would it take to transfer it "plaintext" */
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004173 unsigned int header_size = drbd_header_size(mdev->tconn);
4174 unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE - header_size;
4175 unsigned int plain =
4176 header_size * (DIV_ROUND_UP(c->bm_words, data_size) + 1) +
4177 c->bm_words * sizeof(unsigned long);
4178 unsigned int total = c->bytes[0] + c->bytes[1];
4179 unsigned int r;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004180
4181 /* total can not be zero. but just in case: */
4182 if (total == 0)
4183 return;
4184
4185 /* don't report if not compressed */
4186 if (total >= plain)
4187 return;
4188
4189 /* total < plain. check for overflow, still */
4190 r = (total > UINT_MAX/1000) ? (total / (plain/1000))
4191 : (1000 * total / plain);
4192
4193 if (r > 1000)
4194 r = 1000;
4195
4196 r = 1000 - r;
4197 dev_info(DEV, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
4198 "total %u; compression: %u.%u%%\n",
4199 direction,
4200 c->bytes[1], c->packets[1],
4201 c->bytes[0], c->packets[0],
4202 total, r/10, r % 10);
4203}
4204
4205/* Since we are processing the bitfield from lower addresses to higher,
4206 it does not matter if the process it in 32 bit chunks or 64 bit
4207 chunks as long as it is little endian. (Understand it as byte stream,
4208 beginning with the lowest byte...) If we would use big endian
4209 we would need to process it from the highest address to the lowest,
4210 in order to be agnostic to the 32 vs 64 bits issue.
4211
4212 returns 0 on failure, 1 if we successfully received it. */
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004213static int receive_bitmap(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004214{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004215 struct drbd_conf *mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004216 struct bm_xfer_ctx c;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004217 int err;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004218
4219 mdev = vnr_to_mdev(tconn, pi->vnr);
4220 if (!mdev)
4221 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004222
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004223 drbd_bm_lock(mdev, "receive bitmap", BM_LOCKED_SET_ALLOWED);
4224 /* you are supposed to send additional out-of-sync information
4225 * if you actually set bits during this phase */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004226
Philipp Reisnerb411b362009-09-25 16:07:19 -07004227 c = (struct bm_xfer_ctx) {
4228 .bm_bits = drbd_bm_bits(mdev),
4229 .bm_words = drbd_bm_words(mdev),
4230 };
4231
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004232 for(;;) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004233 if (pi->cmd == P_BITMAP)
4234 err = receive_bitmap_plain(mdev, pi->size, pi->data, &c);
4235 else if (pi->cmd == P_COMPRESSED_BITMAP) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004236 /* MAYBE: sanity check that we speak proto >= 90,
4237 * and the feature is enabled! */
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004238 struct p_compressed_bm *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004239
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004240 if (pi->size > DRBD_SOCKET_BUFFER_SIZE - drbd_header_size(tconn)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004241 dev_err(DEV, "ReportCBitmap packet too large\n");
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004242 err = -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004243 goto out;
4244 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004245 if (pi->size <= sizeof(*p)) {
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004246 dev_err(DEV, "ReportCBitmap packet too small (l:%u)\n", pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004247 err = -EIO;
Andreas Gruenbacher78fcbda2010-12-10 22:18:27 +01004248 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004249 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004250 err = drbd_recv_all(mdev->tconn, p, pi->size);
4251 if (err)
4252 goto out;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004253 err = decode_bitmap_c(mdev, p, &c, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004254 } else {
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004255 dev_warn(DEV, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", pi->cmd);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004256 err = -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004257 goto out;
4258 }
4259
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004260 c.packets[pi->cmd == P_BITMAP]++;
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004261 c.bytes[pi->cmd == P_BITMAP] += drbd_header_size(tconn) + pi->size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004262
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004263 if (err <= 0) {
4264 if (err < 0)
4265 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004266 break;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004267 }
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004268 err = drbd_recv_header(mdev->tconn, pi);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004269 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004270 goto out;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004271 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004272
4273 INFO_bm_xfer_stats(mdev, "receive", &c);
4274
4275 if (mdev->state.conn == C_WF_BITMAP_T) {
Andreas Gruenbacherde1f8e42010-12-10 21:04:00 +01004276 enum drbd_state_rv rv;
4277
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004278 err = drbd_send_bitmap(mdev);
4279 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004280 goto out;
4281 /* Omit CS_ORDERED with this state transition to avoid deadlocks. */
Andreas Gruenbacherde1f8e42010-12-10 21:04:00 +01004282 rv = _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
4283 D_ASSERT(rv == SS_SUCCESS);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004284 } else if (mdev->state.conn != C_WF_BITMAP_S) {
4285 /* admin may have requested C_DISCONNECTING,
4286 * other threads may have noticed network errors */
4287 dev_info(DEV, "unexpected cstate (%s) in receive_bitmap\n",
4288 drbd_conn_str(mdev->state.conn));
4289 }
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004290 err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004291
Philipp Reisnerb411b362009-09-25 16:07:19 -07004292 out:
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004293 drbd_bm_unlock(mdev);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004294 if (!err && mdev->state.conn == C_WF_BITMAP_S)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004295 drbd_start_resync(mdev, C_SYNC_SOURCE);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004296 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004297}
4298
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004299static int receive_skip(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004300{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004301 conn_warn(tconn, "skipping unknown optional packet type %d, l: %d!\n",
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004302 pi->cmd, pi->size);
Philipp Reisner2de876e2011-03-15 14:38:01 +01004303
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004304 return ignore_remaining_packet(tconn, pi);
Philipp Reisner2de876e2011-03-15 14:38:01 +01004305}
4306
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004307static int receive_UnplugRemote(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004308{
Philipp Reisnerb411b362009-09-25 16:07:19 -07004309 /* Make sure we've acked all the TCP data associated
4310 * with the data requests being unplugged */
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004311 drbd_tcp_quickack(tconn->data.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004312
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004313 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004314}
4315
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004316static int receive_out_of_sync(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisner73a01a12010-10-27 14:33:00 +02004317{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004318 struct drbd_conf *mdev;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004319 struct p_block_desc *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004320
4321 mdev = vnr_to_mdev(tconn, pi->vnr);
4322 if (!mdev)
4323 return -EIO;
Philipp Reisner73a01a12010-10-27 14:33:00 +02004324
Lars Ellenbergf735e3632010-12-17 21:06:18 +01004325 switch (mdev->state.conn) {
4326 case C_WF_SYNC_UUID:
4327 case C_WF_BITMAP_T:
4328 case C_BEHIND:
4329 break;
4330 default:
4331 dev_err(DEV, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n",
4332 drbd_conn_str(mdev->state.conn));
4333 }
4334
Philipp Reisner73a01a12010-10-27 14:33:00 +02004335 drbd_set_out_of_sync(mdev, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
4336
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004337 return 0;
Philipp Reisner73a01a12010-10-27 14:33:00 +02004338}
4339
Philipp Reisner02918be2010-08-20 14:35:10 +02004340struct data_cmd {
4341 int expect_payload;
4342 size_t pkt_size;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004343 int (*fn)(struct drbd_tconn *, struct packet_info *);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004344};
4345
Philipp Reisner02918be2010-08-20 14:35:10 +02004346static struct data_cmd drbd_cmd_handler[] = {
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004347 [P_DATA] = { 1, sizeof(struct p_data), receive_Data },
4348 [P_DATA_REPLY] = { 1, sizeof(struct p_data), receive_DataReply },
4349 [P_RS_DATA_REPLY] = { 1, sizeof(struct p_data), receive_RSDataReply } ,
4350 [P_BARRIER] = { 0, sizeof(struct p_barrier), receive_Barrier } ,
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004351 [P_BITMAP] = { 1, 0, receive_bitmap } ,
4352 [P_COMPRESSED_BITMAP] = { 1, 0, receive_bitmap } ,
4353 [P_UNPLUG_REMOTE] = { 0, 0, receive_UnplugRemote },
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004354 [P_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
4355 [P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004356 [P_SYNC_PARAM] = { 1, 0, receive_SyncParam },
4357 [P_SYNC_PARAM89] = { 1, 0, receive_SyncParam },
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004358 [P_PROTOCOL] = { 1, sizeof(struct p_protocol), receive_protocol },
4359 [P_UUIDS] = { 0, sizeof(struct p_uuids), receive_uuids },
4360 [P_SIZES] = { 0, sizeof(struct p_sizes), receive_sizes },
4361 [P_STATE] = { 0, sizeof(struct p_state), receive_state },
4362 [P_STATE_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_state },
4363 [P_SYNC_UUID] = { 0, sizeof(struct p_rs_uuid), receive_sync_uuid },
4364 [P_OV_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
4365 [P_OV_REPLY] = { 1, sizeof(struct p_block_req), receive_DataRequest },
4366 [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest },
4367 [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip },
4368 [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
4369 [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_conn_state },
Philipp Reisner036b17e2011-05-16 17:38:11 +02004370 [P_PROTOCOL_UPDATE] = { 1, sizeof(struct p_protocol), receive_protocol },
Philipp Reisner02918be2010-08-20 14:35:10 +02004371};
4372
Philipp Reisnereefc2f72011-02-08 12:55:24 +01004373static void drbdd(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004374{
Philipp Reisner77351055b2011-02-07 17:24:26 +01004375 struct packet_info pi;
Philipp Reisner02918be2010-08-20 14:35:10 +02004376 size_t shs; /* sub header size */
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004377 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004378
Philipp Reisnereefc2f72011-02-08 12:55:24 +01004379 while (get_t_state(&tconn->receiver) == RUNNING) {
Andreas Gruenbacherdeebe192011-03-25 00:01:04 +01004380 struct data_cmd *cmd;
4381
Philipp Reisnereefc2f72011-02-08 12:55:24 +01004382 drbd_thread_current_set_cpu(&tconn->receiver);
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004383 if (drbd_recv_header(tconn, &pi))
Philipp Reisner02918be2010-08-20 14:35:10 +02004384 goto err_out;
4385
Andreas Gruenbacherdeebe192011-03-25 00:01:04 +01004386 cmd = &drbd_cmd_handler[pi.cmd];
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004387 if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) || !cmd->fn)) {
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004388 conn_err(tconn, "Unexpected data packet %s (0x%04x)",
4389 cmdname(pi.cmd), pi.cmd);
Philipp Reisner02918be2010-08-20 14:35:10 +02004390 goto err_out;
Lars Ellenberg0b33a912009-11-16 15:58:04 +01004391 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004392
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004393 shs = cmd->pkt_size;
4394 if (pi.size > shs && !cmd->expect_payload) {
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004395 conn_err(tconn, "No payload expected %s l:%d\n",
4396 cmdname(pi.cmd), pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02004397 goto err_out;
4398 }
4399
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02004400 if (shs) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004401 err = drbd_recv_all_warn(tconn, pi.data, shs);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004402 if (err)
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02004403 goto err_out;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004404 pi.size -= shs;
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02004405 }
4406
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004407 err = cmd->fn(tconn, &pi);
4408 if (err) {
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004409 conn_err(tconn, "error receiving %s, e: %d l: %d!\n",
4410 cmdname(pi.cmd), err, pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02004411 goto err_out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004412 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004413 }
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004414 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004415
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004416 err_out:
4417 conn_request_state(tconn, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004418}
4419
Philipp Reisner0e29d162011-02-18 14:23:11 +01004420void conn_flush_workqueue(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004421{
4422 struct drbd_wq_barrier barr;
4423
4424 barr.w.cb = w_prev_work_done;
Philipp Reisner0e29d162011-02-18 14:23:11 +01004425 barr.w.tconn = tconn;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004426 init_completion(&barr.done);
Lars Ellenbergd5b27b02011-11-14 15:42:37 +01004427 drbd_queue_work(&tconn->sender_work, &barr.w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004428 wait_for_completion(&barr.done);
4429}
4430
Philipp Reisner81fa2e62011-05-04 15:10:30 +02004431static void conn_disconnect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004432{
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02004433 struct drbd_conf *mdev;
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004434 enum drbd_conns oc;
Philipp Reisner376694a2011-11-07 10:54:28 +01004435 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004436
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004437 if (tconn->cstate == C_STANDALONE)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004438 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004439
Philipp Reisnerb8853db2011-12-13 11:09:16 +01004440 /* We are about to start the cleanup after connection loss.
4441 * Make sure drbd_make_request knows about that.
4442 * Usually we should be in some network failure state already,
4443 * but just in case we are not, we fix it up here.
4444 */
4445 conn_request_state(tconn, NS(conn, C_NETWORK_FAILURE), CS_HARD);
4446
Philipp Reisnerb411b362009-09-25 16:07:19 -07004447 /* asender does not clean up anything. it must not interfere, either */
Philipp Reisner360cc742011-02-08 14:29:53 +01004448 drbd_thread_stop(&tconn->asender);
4449 drbd_free_sock(tconn);
4450
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02004451 rcu_read_lock();
4452 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
4453 kref_get(&mdev->kref);
4454 rcu_read_unlock();
4455 drbd_disconnected(mdev);
4456 kref_put(&mdev->kref, &drbd_minor_destroy);
4457 rcu_read_lock();
4458 }
4459 rcu_read_unlock();
4460
Philipp Reisner12038a32011-11-09 19:18:00 +01004461 if (!list_empty(&tconn->current_epoch->list))
4462 conn_err(tconn, "ASSERTION FAILED: tconn->current_epoch->list not empty\n");
4463 /* ok, no more ee's on the fly, it is safe to reset the epoch_size */
4464 atomic_set(&tconn->current_epoch->epoch_size, 0);
Lars Ellenbergb6dd1a82011-11-28 15:04:49 +01004465 tconn->send.seen_any_write_yet = false;
Philipp Reisner12038a32011-11-09 19:18:00 +01004466
Philipp Reisner360cc742011-02-08 14:29:53 +01004467 conn_info(tconn, "Connection closed\n");
4468
Philipp Reisnercb703452011-03-24 11:03:07 +01004469 if (conn_highest_role(tconn) == R_PRIMARY && conn_highest_pdsk(tconn) >= D_UNKNOWN)
4470 conn_try_outdate_peer_async(tconn);
4471
Philipp Reisner360cc742011-02-08 14:29:53 +01004472 spin_lock_irq(&tconn->req_lock);
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004473 oc = tconn->cstate;
4474 if (oc >= C_UNCONNECTED)
Philipp Reisner376694a2011-11-07 10:54:28 +01004475 _conn_request_state(tconn, NS(conn, C_UNCONNECTED), CS_VERBOSE);
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004476
Philipp Reisner360cc742011-02-08 14:29:53 +01004477 spin_unlock_irq(&tconn->req_lock);
4478
Lars Ellenbergf3dfa402011-05-02 10:45:05 +02004479 if (oc == C_DISCONNECTING)
Lars Ellenbergd9cc6e22011-04-27 10:25:28 +02004480 conn_request_state(tconn, NS(conn, C_STANDALONE), CS_VERBOSE | CS_HARD);
Philipp Reisner360cc742011-02-08 14:29:53 +01004481}
4482
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02004483static int drbd_disconnected(struct drbd_conf *mdev)
Philipp Reisner360cc742011-02-08 14:29:53 +01004484{
Philipp Reisner360cc742011-02-08 14:29:53 +01004485 unsigned int i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004486
Philipp Reisner85719572010-07-21 10:20:17 +02004487 /* wait for current activity to cease. */
Philipp Reisner87eeee42011-01-19 14:16:30 +01004488 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004489 _drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
4490 _drbd_wait_ee_list_empty(mdev, &mdev->sync_ee);
4491 _drbd_wait_ee_list_empty(mdev, &mdev->read_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01004492 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004493
4494 /* We do not have data structures that would allow us to
4495 * get the rs_pending_cnt down to 0 again.
4496 * * On C_SYNC_TARGET we do not have any data structures describing
4497 * the pending RSDataRequest's we have sent.
4498 * * On C_SYNC_SOURCE there is no data structure that tracks
4499 * the P_RS_DATA_REPLY blocks that we sent to the SyncTarget.
4500 * And no, it is not the sum of the reference counts in the
4501 * resync_LRU. The resync_LRU tracks the whole operation including
4502 * the disk-IO, while the rs_pending_cnt only tracks the blocks
4503 * on the fly. */
4504 drbd_rs_cancel_all(mdev);
4505 mdev->rs_total = 0;
4506 mdev->rs_failed = 0;
4507 atomic_set(&mdev->rs_pending_cnt, 0);
4508 wake_up(&mdev->misc_wait);
4509
Philipp Reisnerb411b362009-09-25 16:07:19 -07004510 del_timer_sync(&mdev->resync_timer);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004511 resync_timer_fn((unsigned long)mdev);
4512
Philipp Reisnerb411b362009-09-25 16:07:19 -07004513 /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
4514 * w_make_resync_request etc. which may still be on the worker queue
4515 * to be "canceled" */
Philipp Reisnera21e9292011-02-08 15:08:49 +01004516 drbd_flush_workqueue(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004517
Andreas Gruenbachera990be42011-04-06 17:56:48 +02004518 drbd_finish_peer_reqs(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004519
Philipp Reisnerd10b4ea2011-11-30 23:25:36 +01004520 /* This second workqueue flush is necessary, since drbd_finish_peer_reqs()
4521 might have issued a work again. The one before drbd_finish_peer_reqs() is
4522 necessary to reclain net_ee in drbd_finish_peer_reqs(). */
4523 drbd_flush_workqueue(mdev);
4524
Philipp Reisnerb411b362009-09-25 16:07:19 -07004525 kfree(mdev->p_uuid);
4526 mdev->p_uuid = NULL;
4527
Philipp Reisner2aebfab2011-03-28 16:48:11 +02004528 if (!drbd_suspended(mdev))
Philipp Reisner2f5cdd02011-02-21 14:29:27 +01004529 tl_clear(mdev->tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004530
Philipp Reisnerb411b362009-09-25 16:07:19 -07004531 drbd_md_sync(mdev);
4532
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004533 /* serialize with bitmap writeout triggered by the state change,
4534 * if any. */
4535 wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
4536
Philipp Reisnerb411b362009-09-25 16:07:19 -07004537 /* tcp_close and release of sendpage pages can be deferred. I don't
4538 * want to use SO_LINGER, because apparently it can be deferred for
4539 * more than 20 seconds (longest time I checked).
4540 *
4541 * Actually we don't care for exactly when the network stack does its
4542 * put_page(), but release our reference on these pages right here.
4543 */
Andreas Gruenbacher7721f562011-04-06 17:14:02 +02004544 i = drbd_free_peer_reqs(mdev, &mdev->net_ee);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004545 if (i)
4546 dev_info(DEV, "net_ee not empty, killed %u entries\n", i);
Lars Ellenberg435f0742010-09-06 12:30:25 +02004547 i = atomic_read(&mdev->pp_in_use_by_net);
4548 if (i)
4549 dev_info(DEV, "pp_in_use_by_net = %d, expected 0\n", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004550 i = atomic_read(&mdev->pp_in_use);
4551 if (i)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02004552 dev_info(DEV, "pp_in_use = %d, expected 0\n", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004553
4554 D_ASSERT(list_empty(&mdev->read_ee));
4555 D_ASSERT(list_empty(&mdev->active_ee));
4556 D_ASSERT(list_empty(&mdev->sync_ee));
4557 D_ASSERT(list_empty(&mdev->done_ee));
4558
Philipp Reisner360cc742011-02-08 14:29:53 +01004559 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004560}
4561
4562/*
4563 * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version
4564 * we can agree on is stored in agreed_pro_version.
4565 *
4566 * feature flags and the reserved array should be enough room for future
4567 * enhancements of the handshake protocol, and possible plugins...
4568 *
4569 * for now, they are expected to be zero, but ignored.
4570 */
Andreas Gruenbacher60381782011-03-28 17:05:50 +02004571static int drbd_send_features(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004572{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004573 struct drbd_socket *sock;
4574 struct p_connection_features *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004575
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004576 sock = &tconn->data;
4577 p = conn_prepare_command(tconn, sock);
4578 if (!p)
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004579 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004580 memset(p, 0, sizeof(*p));
4581 p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
4582 p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004583 return conn_send_command(tconn, sock, P_CONNECTION_FEATURES, sizeof(*p), NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004584}
4585
4586/*
4587 * return values:
4588 * 1 yes, we have a valid connection
4589 * 0 oops, did not work out, please try again
4590 * -1 peer talks different language,
4591 * no point in trying again, please go standalone.
4592 */
Andreas Gruenbacher60381782011-03-28 17:05:50 +02004593static int drbd_do_features(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004594{
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004595 /* ASSERT current == tconn->receiver ... */
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004596 struct p_connection_features *p;
4597 const int expect = sizeof(struct p_connection_features);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004598 struct packet_info pi;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004599 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004600
Andreas Gruenbacher60381782011-03-28 17:05:50 +02004601 err = drbd_send_features(tconn);
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004602 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004603 return 0;
4604
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004605 err = drbd_recv_header(tconn, &pi);
4606 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004607 return 0;
4608
Andreas Gruenbacher60381782011-03-28 17:05:50 +02004609 if (pi.cmd != P_CONNECTION_FEATURES) {
4610 conn_err(tconn, "expected ConnectionFeatures packet, received: %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004611 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004612 return -1;
4613 }
4614
Philipp Reisner77351055b2011-02-07 17:24:26 +01004615 if (pi.size != expect) {
Andreas Gruenbacher60381782011-03-28 17:05:50 +02004616 conn_err(tconn, "expected ConnectionFeatures length: %u, received: %u\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004617 expect, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004618 return -1;
4619 }
4620
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004621 p = pi.data;
4622 err = drbd_recv_all_warn(tconn, p, expect);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004623 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004624 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004625
Philipp Reisnerb411b362009-09-25 16:07:19 -07004626 p->protocol_min = be32_to_cpu(p->protocol_min);
4627 p->protocol_max = be32_to_cpu(p->protocol_max);
4628 if (p->protocol_max == 0)
4629 p->protocol_max = p->protocol_min;
4630
4631 if (PRO_VERSION_MAX < p->protocol_min ||
4632 PRO_VERSION_MIN > p->protocol_max)
4633 goto incompat;
4634
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004635 tconn->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004636
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004637 conn_info(tconn, "Handshake successful: "
4638 "Agreed network protocol version %d\n", tconn->agreed_pro_version);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004639
4640 return 1;
4641
4642 incompat:
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004643 conn_err(tconn, "incompatible DRBD dialects: "
Philipp Reisnerb411b362009-09-25 16:07:19 -07004644 "I support %d-%d, peer supports %d-%d\n",
4645 PRO_VERSION_MIN, PRO_VERSION_MAX,
4646 p->protocol_min, p->protocol_max);
4647 return -1;
4648}
4649
4650#if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
Philipp Reisner13e60372011-02-08 09:54:40 +01004651static int drbd_do_auth(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004652{
4653 dev_err(DEV, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
4654 dev_err(DEV, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004655 return -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004656}
4657#else
4658#define CHALLENGE_LEN 64
Johannes Thomab10d96c2010-01-07 16:02:50 +01004659
4660/* Return value:
4661 1 - auth succeeded,
4662 0 - failed, try again (network error),
4663 -1 - auth failed, don't try again.
4664*/
4665
Philipp Reisner13e60372011-02-08 09:54:40 +01004666static int drbd_do_auth(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004667{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004668 struct drbd_socket *sock;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004669 char my_challenge[CHALLENGE_LEN]; /* 64 Bytes... */
4670 struct scatterlist sg;
4671 char *response = NULL;
4672 char *right_response = NULL;
4673 char *peers_ch = NULL;
Philipp Reisner44ed1672011-04-19 17:10:19 +02004674 unsigned int key_len;
4675 char secret[SHARED_SECRET_MAX]; /* 64 byte */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004676 unsigned int resp_size;
4677 struct hash_desc desc;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004678 struct packet_info pi;
Philipp Reisner44ed1672011-04-19 17:10:19 +02004679 struct net_conf *nc;
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004680 int err, rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004681
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004682 /* FIXME: Put the challenge/response into the preallocated socket buffer. */
4683
Philipp Reisner44ed1672011-04-19 17:10:19 +02004684 rcu_read_lock();
4685 nc = rcu_dereference(tconn->net_conf);
4686 key_len = strlen(nc->shared_secret);
4687 memcpy(secret, nc->shared_secret, key_len);
4688 rcu_read_unlock();
4689
Philipp Reisner13e60372011-02-08 09:54:40 +01004690 desc.tfm = tconn->cram_hmac_tfm;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004691 desc.flags = 0;
4692
Philipp Reisner44ed1672011-04-19 17:10:19 +02004693 rv = crypto_hash_setkey(tconn->cram_hmac_tfm, (u8 *)secret, key_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004694 if (rv) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004695 conn_err(tconn, "crypto_hash_setkey() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004696 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004697 goto fail;
4698 }
4699
4700 get_random_bytes(my_challenge, CHALLENGE_LEN);
4701
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004702 sock = &tconn->data;
4703 if (!conn_prepare_command(tconn, sock)) {
4704 rv = 0;
4705 goto fail;
4706 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004707 rv = !conn_send_command(tconn, sock, P_AUTH_CHALLENGE, 0,
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004708 my_challenge, CHALLENGE_LEN);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004709 if (!rv)
4710 goto fail;
4711
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004712 err = drbd_recv_header(tconn, &pi);
4713 if (err) {
4714 rv = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004715 goto fail;
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004716 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004717
Philipp Reisner77351055b2011-02-07 17:24:26 +01004718 if (pi.cmd != P_AUTH_CHALLENGE) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004719 conn_err(tconn, "expected AuthChallenge packet, received: %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004720 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004721 rv = 0;
4722 goto fail;
4723 }
4724
Philipp Reisner77351055b2011-02-07 17:24:26 +01004725 if (pi.size > CHALLENGE_LEN * 2) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004726 conn_err(tconn, "expected AuthChallenge payload too big.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004727 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004728 goto fail;
4729 }
4730
Philipp Reisner77351055b2011-02-07 17:24:26 +01004731 peers_ch = kmalloc(pi.size, GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004732 if (peers_ch == NULL) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004733 conn_err(tconn, "kmalloc of peers_ch failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004734 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004735 goto fail;
4736 }
4737
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004738 err = drbd_recv_all_warn(tconn, peers_ch, pi.size);
4739 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004740 rv = 0;
4741 goto fail;
4742 }
4743
Philipp Reisner13e60372011-02-08 09:54:40 +01004744 resp_size = crypto_hash_digestsize(tconn->cram_hmac_tfm);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004745 response = kmalloc(resp_size, GFP_NOIO);
4746 if (response == NULL) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004747 conn_err(tconn, "kmalloc of response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004748 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004749 goto fail;
4750 }
4751
4752 sg_init_table(&sg, 1);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004753 sg_set_buf(&sg, peers_ch, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004754
4755 rv = crypto_hash_digest(&desc, &sg, sg.length, response);
4756 if (rv) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004757 conn_err(tconn, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004758 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004759 goto fail;
4760 }
4761
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004762 if (!conn_prepare_command(tconn, sock)) {
4763 rv = 0;
4764 goto fail;
4765 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004766 rv = !conn_send_command(tconn, sock, P_AUTH_RESPONSE, 0,
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004767 response, resp_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004768 if (!rv)
4769 goto fail;
4770
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004771 err = drbd_recv_header(tconn, &pi);
4772 if (err) {
4773 rv = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004774 goto fail;
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004775 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004776
Philipp Reisner77351055b2011-02-07 17:24:26 +01004777 if (pi.cmd != P_AUTH_RESPONSE) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004778 conn_err(tconn, "expected AuthResponse packet, received: %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004779 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004780 rv = 0;
4781 goto fail;
4782 }
4783
Philipp Reisner77351055b2011-02-07 17:24:26 +01004784 if (pi.size != resp_size) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004785 conn_err(tconn, "expected AuthResponse payload of wrong size\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004786 rv = 0;
4787 goto fail;
4788 }
4789
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004790 err = drbd_recv_all_warn(tconn, response , resp_size);
4791 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004792 rv = 0;
4793 goto fail;
4794 }
4795
4796 right_response = kmalloc(resp_size, GFP_NOIO);
Julia Lawall2d1ee872009-12-27 22:27:11 +01004797 if (right_response == NULL) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004798 conn_err(tconn, "kmalloc of right_response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004799 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004800 goto fail;
4801 }
4802
4803 sg_set_buf(&sg, my_challenge, CHALLENGE_LEN);
4804
4805 rv = crypto_hash_digest(&desc, &sg, sg.length, right_response);
4806 if (rv) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004807 conn_err(tconn, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004808 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004809 goto fail;
4810 }
4811
4812 rv = !memcmp(response, right_response, resp_size);
4813
4814 if (rv)
Philipp Reisner44ed1672011-04-19 17:10:19 +02004815 conn_info(tconn, "Peer authenticated using %d bytes HMAC\n",
4816 resp_size);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004817 else
4818 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004819
4820 fail:
4821 kfree(peers_ch);
4822 kfree(response);
4823 kfree(right_response);
4824
4825 return rv;
4826}
4827#endif
4828
4829int drbdd_init(struct drbd_thread *thi)
4830{
Philipp Reisner392c8802011-02-09 10:33:31 +01004831 struct drbd_tconn *tconn = thi->tconn;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004832 int h;
4833
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004834 conn_info(tconn, "receiver (re)started\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004835
4836 do {
Philipp Reisner81fa2e62011-05-04 15:10:30 +02004837 h = conn_connect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004838 if (h == 0) {
Philipp Reisner81fa2e62011-05-04 15:10:30 +02004839 conn_disconnect(tconn);
Philipp Reisner20ee6392011-01-18 15:28:59 +01004840 schedule_timeout_interruptible(HZ);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004841 }
4842 if (h == -1) {
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004843 conn_warn(tconn, "Discarding network configuration.\n");
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004844 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004845 }
4846 } while (h == 0);
4847
Philipp Reisner91fd4da2011-04-20 17:47:29 +02004848 if (h > 0)
4849 drbdd(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004850
Philipp Reisner81fa2e62011-05-04 15:10:30 +02004851 conn_disconnect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004852
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004853 conn_info(tconn, "receiver terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004854 return 0;
4855}
4856
4857/* ********* acknowledge sender ******** */
4858
Andreas Gruenbachere05e1e52011-03-25 15:16:26 +01004859static int got_conn_RqSReply(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004860{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004861 struct p_req_state_reply *p = pi->data;
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004862 int retcode = be32_to_cpu(p->retcode);
4863
4864 if (retcode >= SS_SUCCESS) {
4865 set_bit(CONN_WD_ST_CHG_OKAY, &tconn->flags);
4866 } else {
4867 set_bit(CONN_WD_ST_CHG_FAIL, &tconn->flags);
4868 conn_err(tconn, "Requested state change failed by peer: %s (%d)\n",
4869 drbd_set_st_err_str(retcode), retcode);
4870 }
4871 wake_up(&tconn->ping_wait);
4872
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004873 return 0;
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004874}
4875
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004876static int got_RqSReply(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004877{
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004878 struct drbd_conf *mdev;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004879 struct p_req_state_reply *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004880 int retcode = be32_to_cpu(p->retcode);
4881
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004882 mdev = vnr_to_mdev(tconn, pi->vnr);
4883 if (!mdev)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004884 return -EIO;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004885
Philipp Reisner4d0fc3f2012-01-20 13:52:27 +01004886 if (test_bit(CONN_WD_ST_CHG_REQ, &tconn->flags)) {
4887 D_ASSERT(tconn->agreed_pro_version < 100);
4888 return got_conn_RqSReply(tconn, pi);
4889 }
4890
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004891 if (retcode >= SS_SUCCESS) {
4892 set_bit(CL_ST_CHG_SUCCESS, &mdev->flags);
4893 } else {
4894 set_bit(CL_ST_CHG_FAIL, &mdev->flags);
4895 dev_err(DEV, "Requested state change failed by peer: %s (%d)\n",
4896 drbd_set_st_err_str(retcode), retcode);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004897 }
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004898 wake_up(&mdev->state_wait);
4899
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004900 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004901}
4902
Andreas Gruenbachere05e1e52011-03-25 15:16:26 +01004903static int got_Ping(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004904{
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004905 return drbd_send_ping_ack(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004906
4907}
4908
Andreas Gruenbachere05e1e52011-03-25 15:16:26 +01004909static int got_PingAck(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004910{
4911 /* restore idle timeout */
Philipp Reisner2a67d8b2011-02-09 14:10:32 +01004912 tconn->meta.socket->sk->sk_rcvtimeo = tconn->net_conf->ping_int*HZ;
4913 if (!test_and_set_bit(GOT_PING_ACK, &tconn->flags))
4914 wake_up(&tconn->ping_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004915
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004916 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004917}
4918
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004919static int got_IsInSync(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004920{
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004921 struct drbd_conf *mdev;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004922 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004923 sector_t sector = be64_to_cpu(p->sector);
4924 int blksize = be32_to_cpu(p->blksize);
4925
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004926 mdev = vnr_to_mdev(tconn, pi->vnr);
4927 if (!mdev)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004928 return -EIO;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004929
Philipp Reisner31890f42011-01-19 14:12:51 +01004930 D_ASSERT(mdev->tconn->agreed_pro_version >= 89);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004931
4932 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4933
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004934 if (get_ldev(mdev)) {
4935 drbd_rs_complete_io(mdev, sector);
4936 drbd_set_in_sync(mdev, sector, blksize);
4937 /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
4938 mdev->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
4939 put_ldev(mdev);
4940 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004941 dec_rs_pending(mdev);
Philipp Reisner778f2712010-07-06 11:14:00 +02004942 atomic_add(blksize >> 9, &mdev->rs_sect_in);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004943
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004944 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004945}
4946
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004947static int
4948validate_req_change_req_state(struct drbd_conf *mdev, u64 id, sector_t sector,
4949 struct rb_root *root, const char *func,
4950 enum drbd_req_event what, bool missing_ok)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004951{
4952 struct drbd_request *req;
4953 struct bio_and_error m;
4954
Philipp Reisner87eeee42011-01-19 14:16:30 +01004955 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004956 req = find_request(mdev, root, id, sector, missing_ok, func);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004957 if (unlikely(!req)) {
Philipp Reisner87eeee42011-01-19 14:16:30 +01004958 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02004959 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004960 }
4961 __req_mod(req, what, &m);
Philipp Reisner87eeee42011-01-19 14:16:30 +01004962 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004963
4964 if (m.bio)
4965 complete_master_bio(mdev, &m);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02004966 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004967}
4968
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004969static int got_BlockAck(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004970{
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004971 struct drbd_conf *mdev;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004972 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004973 sector_t sector = be64_to_cpu(p->sector);
4974 int blksize = be32_to_cpu(p->blksize);
4975 enum drbd_req_event what;
4976
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004977 mdev = vnr_to_mdev(tconn, pi->vnr);
4978 if (!mdev)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004979 return -EIO;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004980
Philipp Reisnerb411b362009-09-25 16:07:19 -07004981 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4982
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01004983 if (p->block_id == ID_SYNCER) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004984 drbd_set_in_sync(mdev, sector, blksize);
4985 dec_rs_pending(mdev);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004986 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004987 }
Andreas Gruenbachere05e1e52011-03-25 15:16:26 +01004988 switch (pi->cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004989 case P_RS_WRITE_ACK:
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004990 what = WRITE_ACKED_BY_PEER_AND_SIS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004991 break;
4992 case P_WRITE_ACK:
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004993 what = WRITE_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004994 break;
4995 case P_RECV_ACK:
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004996 what = RECV_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004997 break;
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02004998 case P_SUPERSEDED:
4999 what = CONFLICT_RESOLVED;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01005000 break;
5001 case P_RETRY_WRITE:
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01005002 what = POSTPONE_WRITE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005003 break;
5004 default:
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005005 BUG();
Philipp Reisnerb411b362009-09-25 16:07:19 -07005006 }
5007
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005008 return validate_req_change_req_state(mdev, p->block_id, sector,
5009 &mdev->write_requests, __func__,
5010 what, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005011}
5012
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005013static int got_NegAck(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005014{
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005015 struct drbd_conf *mdev;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005016 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005017 sector_t sector = be64_to_cpu(p->sector);
Philipp Reisner2deb8332011-01-17 18:39:18 +01005018 int size = be32_to_cpu(p->blksize);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005019 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005020
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005021 mdev = vnr_to_mdev(tconn, pi->vnr);
5022 if (!mdev)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005023 return -EIO;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005024
Philipp Reisnerb411b362009-09-25 16:07:19 -07005025 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
5026
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01005027 if (p->block_id == ID_SYNCER) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07005028 dec_rs_pending(mdev);
5029 drbd_rs_failed_io(mdev, sector, size);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005030 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005031 }
Philipp Reisner2deb8332011-01-17 18:39:18 +01005032
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005033 err = validate_req_change_req_state(mdev, p->block_id, sector,
5034 &mdev->write_requests, __func__,
Philipp Reisner303d1442011-04-13 16:24:47 -07005035 NEG_ACKED, true);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005036 if (err) {
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01005037 /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs.
5038 The master bio might already be completed, therefore the
5039 request is no longer in the collision hash. */
5040 /* In Protocol B we might already have got a P_RECV_ACK
5041 but then get a P_NEG_ACK afterwards. */
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01005042 drbd_set_out_of_sync(mdev, sector, size);
Philipp Reisner2deb8332011-01-17 18:39:18 +01005043 }
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005044 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005045}
5046
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005047static int got_NegDReply(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005048{
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005049 struct drbd_conf *mdev;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005050 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005051 sector_t sector = be64_to_cpu(p->sector);
5052
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005053 mdev = vnr_to_mdev(tconn, pi->vnr);
5054 if (!mdev)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005055 return -EIO;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005056
Philipp Reisnerb411b362009-09-25 16:07:19 -07005057 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01005058
Philipp Reisner380207d2011-11-11 12:31:20 +01005059 dev_err(DEV, "Got NegDReply; Sector %llus, len %u.\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07005060 (unsigned long long)sector, be32_to_cpu(p->blksize));
5061
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005062 return validate_req_change_req_state(mdev, p->block_id, sector,
5063 &mdev->read_requests, __func__,
5064 NEG_ACKED, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005065}
5066
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005067static int got_NegRSDReply(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005068{
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005069 struct drbd_conf *mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005070 sector_t sector;
5071 int size;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005072 struct p_block_ack *p = pi->data;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005073
5074 mdev = vnr_to_mdev(tconn, pi->vnr);
5075 if (!mdev)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005076 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005077
5078 sector = be64_to_cpu(p->sector);
5079 size = be32_to_cpu(p->blksize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005080
5081 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
5082
5083 dec_rs_pending(mdev);
5084
5085 if (get_ldev_if_state(mdev, D_FAILED)) {
5086 drbd_rs_complete_io(mdev, sector);
Andreas Gruenbachere05e1e52011-03-25 15:16:26 +01005087 switch (pi->cmd) {
Philipp Reisnerd612d302010-12-27 10:53:28 +01005088 case P_NEG_RS_DREPLY:
5089 drbd_rs_failed_io(mdev, sector, size);
5090 case P_RS_CANCEL:
5091 break;
5092 default:
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005093 BUG();
Philipp Reisnerd612d302010-12-27 10:53:28 +01005094 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005095 put_ldev(mdev);
5096 }
5097
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005098 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005099}
5100
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005101static int got_BarrierAck(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005102{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005103 struct p_barrier_ack *p = pi->data;
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005104 struct drbd_conf *mdev;
5105 int vnr;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005106
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005107 tl_release(tconn, p->barrier, be32_to_cpu(p->set_size));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005108
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005109 rcu_read_lock();
5110 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
5111 if (mdev->state.conn == C_AHEAD &&
5112 atomic_read(&mdev->ap_in_flight) == 0 &&
5113 !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &mdev->flags)) {
5114 mdev->start_resync_timer.expires = jiffies + HZ;
5115 add_timer(&mdev->start_resync_timer);
5116 }
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02005117 }
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005118 rcu_read_unlock();
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02005119
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005120 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005121}
5122
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005123static int got_OVResult(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005124{
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005125 struct drbd_conf *mdev;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005126 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005127 struct drbd_work *w;
5128 sector_t sector;
5129 int size;
5130
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005131 mdev = vnr_to_mdev(tconn, pi->vnr);
5132 if (!mdev)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005133 return -EIO;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005134
Philipp Reisnerb411b362009-09-25 16:07:19 -07005135 sector = be64_to_cpu(p->sector);
5136 size = be32_to_cpu(p->blksize);
5137
5138 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
5139
5140 if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
Andreas Gruenbacher8f7bed72010-12-19 23:53:14 +01005141 drbd_ov_out_of_sync_found(mdev, sector, size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005142 else
Andreas Gruenbacher8f7bed72010-12-19 23:53:14 +01005143 ov_out_of_sync_print(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005144
Lars Ellenberg1d53f092010-09-05 01:13:24 +02005145 if (!get_ldev(mdev))
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005146 return 0;
Lars Ellenberg1d53f092010-09-05 01:13:24 +02005147
Philipp Reisnerb411b362009-09-25 16:07:19 -07005148 drbd_rs_complete_io(mdev, sector);
5149 dec_rs_pending(mdev);
5150
Lars Ellenbergea5442a2010-11-05 09:48:01 +01005151 --mdev->ov_left;
5152
5153 /* let's advance progress step marks only for every other megabyte */
5154 if ((mdev->ov_left & 0x200) == 0x200)
5155 drbd_advance_rs_marks(mdev, mdev->ov_left);
5156
5157 if (mdev->ov_left == 0) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07005158 w = kmalloc(sizeof(*w), GFP_NOIO);
5159 if (w) {
5160 w->cb = w_ov_finished;
Philipp Reisnera21e9292011-02-08 15:08:49 +01005161 w->mdev = mdev;
Lars Ellenbergd5b27b02011-11-14 15:42:37 +01005162 drbd_queue_work(&mdev->tconn->sender_work, w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005163 } else {
5164 dev_err(DEV, "kmalloc(w) failed.");
Andreas Gruenbacher8f7bed72010-12-19 23:53:14 +01005165 ov_out_of_sync_print(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005166 drbd_resync_finished(mdev);
5167 }
5168 }
Lars Ellenberg1d53f092010-09-05 01:13:24 +02005169 put_ldev(mdev);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005170 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005171}
5172
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005173static int got_skip(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisner0ced55a2010-04-30 15:26:20 +02005174{
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005175 return 0;
Philipp Reisner0ced55a2010-04-30 15:26:20 +02005176}
5177
Andreas Gruenbachera990be42011-04-06 17:56:48 +02005178static int tconn_finish_peer_reqs(struct drbd_tconn *tconn)
Philipp Reisner32862ec2011-02-08 16:41:01 +01005179{
Philipp Reisner082a3432011-03-15 16:05:42 +01005180 struct drbd_conf *mdev;
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005181 int vnr, not_empty = 0;
Philipp Reisner32862ec2011-02-08 16:41:01 +01005182
5183 do {
5184 clear_bit(SIGNAL_ASENDER, &tconn->flags);
5185 flush_signals(current);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005186
5187 rcu_read_lock();
5188 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
5189 kref_get(&mdev->kref);
5190 rcu_read_unlock();
Philipp Reisnerd3fcb492011-04-13 14:46:05 -07005191 if (drbd_finish_peer_reqs(mdev)) {
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005192 kref_put(&mdev->kref, &drbd_minor_destroy);
5193 return 1;
Philipp Reisnerd3fcb492011-04-13 14:46:05 -07005194 }
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005195 kref_put(&mdev->kref, &drbd_minor_destroy);
5196 rcu_read_lock();
Philipp Reisner082a3432011-03-15 16:05:42 +01005197 }
Philipp Reisner32862ec2011-02-08 16:41:01 +01005198 set_bit(SIGNAL_ASENDER, &tconn->flags);
Philipp Reisner082a3432011-03-15 16:05:42 +01005199
5200 spin_lock_irq(&tconn->req_lock);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005201 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
Philipp Reisner082a3432011-03-15 16:05:42 +01005202 not_empty = !list_empty(&mdev->done_ee);
5203 if (not_empty)
5204 break;
5205 }
5206 spin_unlock_irq(&tconn->req_lock);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005207 rcu_read_unlock();
Philipp Reisner32862ec2011-02-08 16:41:01 +01005208 } while (not_empty);
5209
5210 return 0;
5211}
5212
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01005213struct asender_cmd {
5214 size_t pkt_size;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005215 int (*fn)(struct drbd_tconn *tconn, struct packet_info *);
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01005216};
5217
5218static struct asender_cmd asender_tbl[] = {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005219 [P_PING] = { 0, got_Ping },
5220 [P_PING_ACK] = { 0, got_PingAck },
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005221 [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
5222 [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
5223 [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02005224 [P_SUPERSEDED] = { sizeof(struct p_block_ack), got_BlockAck },
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005225 [P_NEG_ACK] = { sizeof(struct p_block_ack), got_NegAck },
5226 [P_NEG_DREPLY] = { sizeof(struct p_block_ack), got_NegDReply },
5227 [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), got_NegRSDReply },
5228 [P_OV_RESULT] = { sizeof(struct p_block_ack), got_OVResult },
5229 [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck },
5230 [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
5231 [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync },
5232 [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip },
5233 [P_RS_CANCEL] = { sizeof(struct p_block_ack), got_NegRSDReply },
5234 [P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), got_conn_RqSReply },
5235 [P_RETRY_WRITE] = { sizeof(struct p_block_ack), got_BlockAck },
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01005236};
5237
Philipp Reisnerb411b362009-09-25 16:07:19 -07005238int drbd_asender(struct drbd_thread *thi)
5239{
Philipp Reisner392c8802011-02-09 10:33:31 +01005240 struct drbd_tconn *tconn = thi->tconn;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005241 struct asender_cmd *cmd = NULL;
Philipp Reisner77351055b2011-02-07 17:24:26 +01005242 struct packet_info pi;
Philipp Reisner257d0af2011-01-26 12:15:29 +01005243 int rv;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005244 void *buf = tconn->meta.rbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005245 int received = 0;
Andreas Gruenbacher52b061a2011-03-30 11:38:49 +02005246 unsigned int header_size = drbd_header_size(tconn);
5247 int expect = header_size;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005248 bool ping_timeout_active = false;
5249 struct net_conf *nc;
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005250 int ping_timeo, tcp_cork, ping_int;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005251
Philipp Reisnerb411b362009-09-25 16:07:19 -07005252 current->policy = SCHED_RR; /* Make this a realtime task! */
5253 current->rt_priority = 2; /* more important than all other tasks */
5254
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +01005255 while (get_t_state(thi) == RUNNING) {
Philipp Reisner80822282011-02-08 12:46:30 +01005256 drbd_thread_current_set_cpu(thi);
Philipp Reisner44ed1672011-04-19 17:10:19 +02005257
5258 rcu_read_lock();
5259 nc = rcu_dereference(tconn->net_conf);
5260 ping_timeo = nc->ping_timeo;
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005261 tcp_cork = nc->tcp_cork;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005262 ping_int = nc->ping_int;
5263 rcu_read_unlock();
5264
Philipp Reisner32862ec2011-02-08 16:41:01 +01005265 if (test_and_clear_bit(SEND_PING, &tconn->flags)) {
Andreas Gruenbachera17647a2011-04-01 12:49:42 +02005266 if (drbd_send_ping(tconn)) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01005267 conn_err(tconn, "drbd_send_ping has failed\n");
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01005268 goto reconnect;
5269 }
Philipp Reisner44ed1672011-04-19 17:10:19 +02005270 tconn->meta.socket->sk->sk_rcvtimeo = ping_timeo * HZ / 10;
5271 ping_timeout_active = true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005272 }
5273
Philipp Reisner32862ec2011-02-08 16:41:01 +01005274 /* TODO: conditionally cork; it may hurt latency if we cork without
5275 much to send */
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005276 if (tcp_cork)
Philipp Reisner32862ec2011-02-08 16:41:01 +01005277 drbd_tcp_cork(tconn->meta.socket);
Andreas Gruenbachera990be42011-04-06 17:56:48 +02005278 if (tconn_finish_peer_reqs(tconn)) {
5279 conn_err(tconn, "tconn_finish_peer_reqs() failed\n");
Philipp Reisner32862ec2011-02-08 16:41:01 +01005280 goto reconnect;
Philipp Reisner082a3432011-03-15 16:05:42 +01005281 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005282 /* but unconditionally uncork unless disabled */
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005283 if (tcp_cork)
Philipp Reisner32862ec2011-02-08 16:41:01 +01005284 drbd_tcp_uncork(tconn->meta.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005285
5286 /* short circuit, recv_msg would return EINTR anyways. */
5287 if (signal_pending(current))
5288 continue;
5289
Philipp Reisner32862ec2011-02-08 16:41:01 +01005290 rv = drbd_recv_short(tconn->meta.socket, buf, expect-received, 0);
5291 clear_bit(SIGNAL_ASENDER, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005292
5293 flush_signals(current);
5294
5295 /* Note:
5296 * -EINTR (on meta) we got a signal
5297 * -EAGAIN (on meta) rcvtimeo expired
5298 * -ECONNRESET other side closed the connection
5299 * -ERESTARTSYS (on data) we got a signal
5300 * rv < 0 other than above: unexpected error!
5301 * rv == expected: full header or command
5302 * rv < expected: "woken" by signal during receive
5303 * rv == 0 : "connection shut down by peer"
5304 */
5305 if (likely(rv > 0)) {
5306 received += rv;
5307 buf += rv;
5308 } else if (rv == 0) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01005309 conn_err(tconn, "meta connection shut down by peer.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005310 goto reconnect;
5311 } else if (rv == -EAGAIN) {
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02005312 /* If the data socket received something meanwhile,
5313 * that is good enough: peer is still alive. */
Philipp Reisner32862ec2011-02-08 16:41:01 +01005314 if (time_after(tconn->last_received,
5315 jiffies - tconn->meta.socket->sk->sk_rcvtimeo))
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02005316 continue;
Lars Ellenbergf36af182011-03-09 22:44:55 +01005317 if (ping_timeout_active) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01005318 conn_err(tconn, "PingAck did not arrive in time.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005319 goto reconnect;
5320 }
Philipp Reisner32862ec2011-02-08 16:41:01 +01005321 set_bit(SEND_PING, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005322 continue;
5323 } else if (rv == -EINTR) {
5324 continue;
5325 } else {
Philipp Reisner32862ec2011-02-08 16:41:01 +01005326 conn_err(tconn, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005327 goto reconnect;
5328 }
5329
5330 if (received == expect && cmd == NULL) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005331 if (decode_header(tconn, tconn->meta.rbuf, &pi))
Philipp Reisnerb411b362009-09-25 16:07:19 -07005332 goto reconnect;
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01005333 cmd = &asender_tbl[pi.cmd];
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005334 if (pi.cmd >= ARRAY_SIZE(asender_tbl) || !cmd->fn) {
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02005335 conn_err(tconn, "Unexpected meta packet %s (0x%04x)\n",
5336 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005337 goto disconnect;
5338 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005339 expect = header_size + cmd->pkt_size;
Andreas Gruenbacher52b061a2011-03-30 11:38:49 +02005340 if (pi.size != expect - header_size) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01005341 conn_err(tconn, "Wrong packet size on meta (c: %d, l: %d)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01005342 pi.cmd, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005343 goto reconnect;
Philipp Reisner257d0af2011-01-26 12:15:29 +01005344 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005345 }
5346 if (received == expect) {
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005347 bool err;
Philipp Reisnera4fbda82011-03-16 11:13:17 +01005348
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005349 err = cmd->fn(tconn, &pi);
5350 if (err) {
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005351 conn_err(tconn, "%pf failed\n", cmd->fn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005352 goto reconnect;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005353 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005354
Philipp Reisnera4fbda82011-03-16 11:13:17 +01005355 tconn->last_received = jiffies;
5356
Philipp Reisner44ed1672011-04-19 17:10:19 +02005357 if (cmd == &asender_tbl[P_PING_ACK]) {
5358 /* restore idle timeout */
5359 tconn->meta.socket->sk->sk_rcvtimeo = ping_int * HZ;
5360 ping_timeout_active = false;
5361 }
Lars Ellenbergf36af182011-03-09 22:44:55 +01005362
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005363 buf = tconn->meta.rbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005364 received = 0;
Andreas Gruenbacher52b061a2011-03-30 11:38:49 +02005365 expect = header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005366 cmd = NULL;
5367 }
5368 }
5369
5370 if (0) {
5371reconnect:
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01005372 conn_request_state(tconn, NS(conn, C_NETWORK_FAILURE), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005373 }
5374 if (0) {
5375disconnect:
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01005376 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005377 }
Philipp Reisner32862ec2011-02-08 16:41:01 +01005378 clear_bit(SIGNAL_ASENDER, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005379
Philipp Reisner32862ec2011-02-08 16:41:01 +01005380 conn_info(tconn, "asender terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005381
5382 return 0;
5383}