blob: e8cd4c4acc65df5b1d4501d8a643d73aa6b52f55 [file] [log] [blame]
Philipp Reisnerb411b362009-09-25 16:07:19 -07001/*
2 drbd_receiver.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23 */
24
25
Philipp Reisnerb411b362009-09-25 16:07:19 -070026#include <linux/module.h>
27
28#include <asm/uaccess.h>
29#include <net/sock.h>
30
Philipp Reisnerb411b362009-09-25 16:07:19 -070031#include <linux/drbd.h>
32#include <linux/fs.h>
33#include <linux/file.h>
34#include <linux/in.h>
35#include <linux/mm.h>
36#include <linux/memcontrol.h>
37#include <linux/mm_inline.h>
38#include <linux/slab.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070039#include <linux/pkt_sched.h>
40#define __KERNEL_SYSCALLS__
41#include <linux/unistd.h>
42#include <linux/vmalloc.h>
43#include <linux/random.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070044#include <linux/string.h>
45#include <linux/scatterlist.h>
46#include "drbd_int.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070047#include "drbd_req.h"
48
49#include "drbd_vli.h"
50
Philipp Reisner77351055b2011-02-07 17:24:26 +010051struct packet_info {
52 enum drbd_packet cmd;
Andreas Gruenbachere2857212011-03-25 00:57:38 +010053 unsigned int size;
54 unsigned int vnr;
Andreas Gruenbachere6589832011-03-30 12:54:42 +020055 void *data;
Philipp Reisner77351055b2011-02-07 17:24:26 +010056};
57
Philipp Reisnerb411b362009-09-25 16:07:19 -070058enum finish_epoch {
59 FE_STILL_LIVE,
60 FE_DESTROYED,
61 FE_RECYCLED,
62};
63
Andreas Gruenbacher60381782011-03-28 17:05:50 +020064static int drbd_do_features(struct drbd_tconn *tconn);
Philipp Reisner13e60372011-02-08 09:54:40 +010065static int drbd_do_auth(struct drbd_tconn *tconn);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +020066static int drbd_disconnected(struct drbd_conf *mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -070067
Philipp Reisner1e9dd292011-11-10 15:14:53 +010068static enum finish_epoch drbd_may_finish_epoch(struct drbd_tconn *, struct drbd_epoch *, enum epoch_event);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +010069static int e_end_block(struct drbd_work *, int);
Philipp Reisnerb411b362009-09-25 16:07:19 -070070
Philipp Reisnerb411b362009-09-25 16:07:19 -070071
72#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
73
Lars Ellenberg45bb9122010-05-14 17:10:48 +020074/*
75 * some helper functions to deal with single linked page lists,
76 * page->private being our "next" pointer.
77 */
78
79/* If at least n pages are linked at head, get n pages off.
80 * Otherwise, don't modify head, and return NULL.
81 * Locking is the responsibility of the caller.
82 */
83static struct page *page_chain_del(struct page **head, int n)
84{
85 struct page *page;
86 struct page *tmp;
87
88 BUG_ON(!n);
89 BUG_ON(!head);
90
91 page = *head;
Philipp Reisner23ce4222010-05-20 13:35:31 +020092
93 if (!page)
94 return NULL;
95
Lars Ellenberg45bb9122010-05-14 17:10:48 +020096 while (page) {
97 tmp = page_chain_next(page);
98 if (--n == 0)
99 break; /* found sufficient pages */
100 if (tmp == NULL)
101 /* insufficient pages, don't use any of them. */
102 return NULL;
103 page = tmp;
104 }
105
106 /* add end of list marker for the returned list */
107 set_page_private(page, 0);
108 /* actual return value, and adjustment of head */
109 page = *head;
110 *head = tmp;
111 return page;
112}
113
114/* may be used outside of locks to find the tail of a (usually short)
115 * "private" page chain, before adding it back to a global chain head
116 * with page_chain_add() under a spinlock. */
117static struct page *page_chain_tail(struct page *page, int *len)
118{
119 struct page *tmp;
120 int i = 1;
121 while ((tmp = page_chain_next(page)))
122 ++i, page = tmp;
123 if (len)
124 *len = i;
125 return page;
126}
127
128static int page_chain_free(struct page *page)
129{
130 struct page *tmp;
131 int i = 0;
132 page_chain_for_each_safe(page, tmp) {
133 put_page(page);
134 ++i;
135 }
136 return i;
137}
138
139static void page_chain_add(struct page **head,
140 struct page *chain_first, struct page *chain_last)
141{
142#if 1
143 struct page *tmp;
144 tmp = page_chain_tail(chain_first, NULL);
145 BUG_ON(tmp != chain_last);
146#endif
147
148 /* add chain to head */
149 set_page_private(chain_last, (unsigned long)*head);
150 *head = chain_first;
151}
152
Andreas Gruenbacher18c2d522011-04-07 21:08:50 +0200153static struct page *__drbd_alloc_pages(struct drbd_conf *mdev,
154 unsigned int number)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700155{
156 struct page *page = NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200157 struct page *tmp = NULL;
Andreas Gruenbacher18c2d522011-04-07 21:08:50 +0200158 unsigned int i = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700159
160 /* Yes, testing drbd_pp_vacant outside the lock is racy.
161 * So what. It saves a spin_lock. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200162 if (drbd_pp_vacant >= number) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700163 spin_lock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200164 page = page_chain_del(&drbd_pp_pool, number);
165 if (page)
166 drbd_pp_vacant -= number;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700167 spin_unlock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200168 if (page)
169 return page;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700170 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200171
Philipp Reisnerb411b362009-09-25 16:07:19 -0700172 /* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD
173 * "criss-cross" setup, that might cause write-out on some other DRBD,
174 * which in turn might block on the other node at this very place. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200175 for (i = 0; i < number; i++) {
176 tmp = alloc_page(GFP_TRY);
177 if (!tmp)
178 break;
179 set_page_private(tmp, (unsigned long)page);
180 page = tmp;
181 }
182
183 if (i == number)
184 return page;
185
186 /* Not enough pages immediately available this time.
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200187 * No need to jump around here, drbd_alloc_pages will retry this
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200188 * function "soon". */
189 if (page) {
190 tmp = page_chain_tail(page, NULL);
191 spin_lock(&drbd_pp_lock);
192 page_chain_add(&drbd_pp_pool, page, tmp);
193 drbd_pp_vacant += i;
194 spin_unlock(&drbd_pp_lock);
195 }
196 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700197}
198
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200199static void reclaim_finished_net_peer_reqs(struct drbd_conf *mdev,
200 struct list_head *to_be_freed)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700201{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100202 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700203 struct list_head *le, *tle;
204
205 /* The EEs are always appended to the end of the list. Since
206 they are sent in order over the wire, they have to finish
207 in order. As soon as we see the first not finished we can
208 stop to examine the list... */
209
210 list_for_each_safe(le, tle, &mdev->net_ee) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100211 peer_req = list_entry(le, struct drbd_peer_request, w.list);
Andreas Gruenbacher045417f2011-04-07 21:34:24 +0200212 if (drbd_peer_req_has_active_page(peer_req))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700213 break;
214 list_move(le, to_be_freed);
215 }
216}
217
218static void drbd_kick_lo_and_reclaim_net(struct drbd_conf *mdev)
219{
220 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100221 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700222
Philipp Reisner87eeee42011-01-19 14:16:30 +0100223 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200224 reclaim_finished_net_peer_reqs(mdev, &reclaimed);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100225 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700226
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100227 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +0200228 drbd_free_net_peer_req(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700229}
230
231/**
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200232 * drbd_alloc_pages() - Returns @number pages, retries forever (or until signalled)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700233 * @mdev: DRBD device.
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200234 * @number: number of pages requested
235 * @retry: whether to retry, if not enough pages are available right now
Philipp Reisnerb411b362009-09-25 16:07:19 -0700236 *
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200237 * Tries to allocate number pages, first from our own page pool, then from
238 * the kernel, unless this allocation would exceed the max_buffers setting.
239 * Possibly retry until DRBD frees sufficient pages somewhere else.
240 *
241 * Returns a page chain linked via page->private.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700242 */
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200243struct page *drbd_alloc_pages(struct drbd_conf *mdev, unsigned int number,
244 bool retry)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700245{
246 struct page *page = NULL;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200247 struct net_conf *nc;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700248 DEFINE_WAIT(wait);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200249 int mxb;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700250
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200251 /* Yes, we may run up to @number over max_buffers. If we
252 * follow it strictly, the admin will get it wrong anyways. */
Philipp Reisner44ed1672011-04-19 17:10:19 +0200253 rcu_read_lock();
254 nc = rcu_dereference(mdev->tconn->net_conf);
255 mxb = nc ? nc->max_buffers : 1000000;
256 rcu_read_unlock();
257
258 if (atomic_read(&mdev->pp_in_use) < mxb)
Andreas Gruenbacher18c2d522011-04-07 21:08:50 +0200259 page = __drbd_alloc_pages(mdev, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700260
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200261 while (page == NULL) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700262 prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
263
264 drbd_kick_lo_and_reclaim_net(mdev);
265
Philipp Reisner44ed1672011-04-19 17:10:19 +0200266 if (atomic_read(&mdev->pp_in_use) < mxb) {
Andreas Gruenbacher18c2d522011-04-07 21:08:50 +0200267 page = __drbd_alloc_pages(mdev, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700268 if (page)
269 break;
270 }
271
272 if (!retry)
273 break;
274
275 if (signal_pending(current)) {
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200276 dev_warn(DEV, "drbd_alloc_pages interrupted!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700277 break;
278 }
279
280 schedule();
281 }
282 finish_wait(&drbd_pp_wait, &wait);
283
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200284 if (page)
285 atomic_add(number, &mdev->pp_in_use);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700286 return page;
287}
288
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200289/* Must not be used from irq, as that may deadlock: see drbd_alloc_pages.
Philipp Reisner87eeee42011-01-19 14:16:30 +0100290 * Is also used from inside an other spin_lock_irq(&mdev->tconn->req_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200291 * Either links the page chain back to the global pool,
292 * or returns all pages to the system. */
Andreas Gruenbacher5cc287e2011-04-07 21:02:59 +0200293static void drbd_free_pages(struct drbd_conf *mdev, struct page *page, int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700294{
Lars Ellenberg435f0742010-09-06 12:30:25 +0200295 atomic_t *a = is_net ? &mdev->pp_in_use_by_net : &mdev->pp_in_use;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700296 int i;
Lars Ellenberg435f0742010-09-06 12:30:25 +0200297
Philipp Reisner81a5d602011-02-22 19:53:16 -0500298 if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200299 i = page_chain_free(page);
300 else {
301 struct page *tmp;
302 tmp = page_chain_tail(page, &i);
303 spin_lock(&drbd_pp_lock);
304 page_chain_add(&drbd_pp_pool, page, tmp);
305 drbd_pp_vacant += i;
306 spin_unlock(&drbd_pp_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700307 }
Lars Ellenberg435f0742010-09-06 12:30:25 +0200308 i = atomic_sub_return(i, a);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200309 if (i < 0)
Lars Ellenberg435f0742010-09-06 12:30:25 +0200310 dev_warn(DEV, "ASSERTION FAILED: %s: %d < 0\n",
311 is_net ? "pp_in_use_by_net" : "pp_in_use", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700312 wake_up(&drbd_pp_wait);
313}
314
315/*
316You need to hold the req_lock:
317 _drbd_wait_ee_list_empty()
318
319You must not have the req_lock:
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +0200320 drbd_free_peer_req()
Andreas Gruenbacher0db55362011-04-06 16:09:15 +0200321 drbd_alloc_peer_req()
Andreas Gruenbacher7721f562011-04-06 17:14:02 +0200322 drbd_free_peer_reqs()
Philipp Reisnerb411b362009-09-25 16:07:19 -0700323 drbd_ee_fix_bhs()
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200324 drbd_finish_peer_reqs()
Philipp Reisnerb411b362009-09-25 16:07:19 -0700325 drbd_clear_done_ee()
326 drbd_wait_ee_list_empty()
327*/
328
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100329struct drbd_peer_request *
Andreas Gruenbacher0db55362011-04-06 16:09:15 +0200330drbd_alloc_peer_req(struct drbd_conf *mdev, u64 id, sector_t sector,
331 unsigned int data_size, gfp_t gfp_mask) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700332{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100333 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700334 struct page *page;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200335 unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700336
Andreas Gruenbacher0cf9d272010-12-07 10:43:29 +0100337 if (drbd_insert_fault(mdev, DRBD_FAULT_AL_EE))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700338 return NULL;
339
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100340 peer_req = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
341 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700342 if (!(gfp_mask & __GFP_NOWARN))
Andreas Gruenbacher0db55362011-04-06 16:09:15 +0200343 dev_err(DEV, "%s: allocation failed\n", __func__);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700344 return NULL;
345 }
346
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200347 page = drbd_alloc_pages(mdev, nr_pages, (gfp_mask & __GFP_WAIT));
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200348 if (!page)
349 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700350
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100351 drbd_clear_interval(&peer_req->i);
352 peer_req->i.size = data_size;
353 peer_req->i.sector = sector;
354 peer_req->i.local = false;
355 peer_req->i.waiting = false;
Andreas Gruenbacher53840642011-01-28 10:31:04 +0100356
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100357 peer_req->epoch = NULL;
Philipp Reisnera21e9292011-02-08 15:08:49 +0100358 peer_req->w.mdev = mdev;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100359 peer_req->pages = page;
360 atomic_set(&peer_req->pending_bios, 0);
361 peer_req->flags = 0;
Andreas Gruenbacher9a8e7752011-01-11 14:04:09 +0100362 /*
363 * The block_id is opaque to the receiver. It is not endianness
364 * converted, and sent back to the sender unchanged.
365 */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100366 peer_req->block_id = id;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700367
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100368 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700369
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200370 fail:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100371 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700372 return NULL;
373}
374
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +0200375void __drbd_free_peer_req(struct drbd_conf *mdev, struct drbd_peer_request *peer_req,
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100376 int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700377{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100378 if (peer_req->flags & EE_HAS_DIGEST)
379 kfree(peer_req->digest);
Andreas Gruenbacher5cc287e2011-04-07 21:02:59 +0200380 drbd_free_pages(mdev, peer_req->pages, is_net);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100381 D_ASSERT(atomic_read(&peer_req->pending_bios) == 0);
382 D_ASSERT(drbd_interval_empty(&peer_req->i));
383 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700384}
385
Andreas Gruenbacher7721f562011-04-06 17:14:02 +0200386int drbd_free_peer_reqs(struct drbd_conf *mdev, struct list_head *list)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700387{
388 LIST_HEAD(work_list);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100389 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700390 int count = 0;
Lars Ellenberg435f0742010-09-06 12:30:25 +0200391 int is_net = list == &mdev->net_ee;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700392
Philipp Reisner87eeee42011-01-19 14:16:30 +0100393 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700394 list_splice_init(list, &work_list);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100395 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700396
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100397 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +0200398 __drbd_free_peer_req(mdev, peer_req, is_net);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700399 count++;
400 }
401 return count;
402}
403
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200404/*
405 * See also comments in _req_mod(,BARRIER_ACKED) and receive_Barrier.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700406 */
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200407static int drbd_finish_peer_reqs(struct drbd_conf *mdev)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700408{
409 LIST_HEAD(work_list);
410 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100411 struct drbd_peer_request *peer_req, *t;
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100412 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700413
Philipp Reisner87eeee42011-01-19 14:16:30 +0100414 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200415 reclaim_finished_net_peer_reqs(mdev, &reclaimed);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700416 list_splice_init(&mdev->done_ee, &work_list);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100417 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700418
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100419 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +0200420 drbd_free_net_peer_req(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700421
422 /* possible callbacks here:
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +0100423 * e_end_block, and e_end_resync_block, e_send_discard_write.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700424 * all ignore the last argument.
425 */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100426 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100427 int err2;
428
Philipp Reisnerb411b362009-09-25 16:07:19 -0700429 /* list_del not necessary, next/prev members not touched */
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100430 err2 = peer_req->w.cb(&peer_req->w, !!err);
431 if (!err)
432 err = err2;
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +0200433 drbd_free_peer_req(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700434 }
435 wake_up(&mdev->ee_wait);
436
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100437 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700438}
439
Andreas Gruenbacherd4da1532011-04-07 00:06:56 +0200440static void _drbd_wait_ee_list_empty(struct drbd_conf *mdev,
441 struct list_head *head)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700442{
443 DEFINE_WAIT(wait);
444
445 /* avoids spin_lock/unlock
446 * and calling prepare_to_wait in the fast path */
447 while (!list_empty(head)) {
448 prepare_to_wait(&mdev->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100449 spin_unlock_irq(&mdev->tconn->req_lock);
Jens Axboe7eaceac2011-03-10 08:52:07 +0100450 io_schedule();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700451 finish_wait(&mdev->ee_wait, &wait);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100452 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700453 }
454}
455
Andreas Gruenbacherd4da1532011-04-07 00:06:56 +0200456static void drbd_wait_ee_list_empty(struct drbd_conf *mdev,
457 struct list_head *head)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700458{
Philipp Reisner87eeee42011-01-19 14:16:30 +0100459 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700460 _drbd_wait_ee_list_empty(mdev, head);
Philipp Reisner87eeee42011-01-19 14:16:30 +0100461 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700462}
463
464/* see also kernel_accept; which is only present since 2.6.18.
465 * also we want to log which part of it failed, exactly */
Philipp Reisner76536202011-02-07 14:09:54 +0100466static int drbd_accept(const char **what, struct socket *sock, struct socket **newsock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700467{
468 struct sock *sk = sock->sk;
469 int err = 0;
470
471 *what = "listen";
472 err = sock->ops->listen(sock, 5);
473 if (err < 0)
474 goto out;
475
476 *what = "sock_create_lite";
477 err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol,
478 newsock);
479 if (err < 0)
480 goto out;
481
482 *what = "accept";
483 err = sock->ops->accept(sock, *newsock, 0);
484 if (err < 0) {
485 sock_release(*newsock);
486 *newsock = NULL;
487 goto out;
488 }
489 (*newsock)->ops = sock->ops;
Philipp Reisnerdd9b3602012-02-23 12:52:31 +0100490 __module_get((*newsock)->ops->owner);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700491
492out:
493 return err;
494}
495
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100496static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700497{
498 mm_segment_t oldfs;
499 struct kvec iov = {
500 .iov_base = buf,
501 .iov_len = size,
502 };
503 struct msghdr msg = {
504 .msg_iovlen = 1,
505 .msg_iov = (struct iovec *)&iov,
506 .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
507 };
508 int rv;
509
510 oldfs = get_fs();
511 set_fs(KERNEL_DS);
512 rv = sock_recvmsg(sock, &msg, size, msg.msg_flags);
513 set_fs(oldfs);
514
515 return rv;
516}
517
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100518static int drbd_recv(struct drbd_tconn *tconn, void *buf, size_t size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700519{
520 mm_segment_t oldfs;
521 struct kvec iov = {
522 .iov_base = buf,
523 .iov_len = size,
524 };
525 struct msghdr msg = {
526 .msg_iovlen = 1,
527 .msg_iov = (struct iovec *)&iov,
528 .msg_flags = MSG_WAITALL | MSG_NOSIGNAL
529 };
530 int rv;
531
532 oldfs = get_fs();
533 set_fs(KERNEL_DS);
534
535 for (;;) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100536 rv = sock_recvmsg(tconn->data.socket, &msg, size, msg.msg_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700537 if (rv == size)
538 break;
539
540 /* Note:
541 * ECONNRESET other side closed the connection
542 * ERESTARTSYS (on sock) we got a signal
543 */
544
545 if (rv < 0) {
546 if (rv == -ECONNRESET)
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100547 conn_info(tconn, "sock was reset by peer\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700548 else if (rv != -ERESTARTSYS)
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100549 conn_err(tconn, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700550 break;
551 } else if (rv == 0) {
Philipp Reisnerde0ff332011-02-07 16:56:20 +0100552 conn_info(tconn, "sock was shut down by peer\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700553 break;
554 } else {
555 /* signal came in, or peer/link went down,
556 * after we read a partial message
557 */
558 /* D_ASSERT(signal_pending(current)); */
559 break;
560 }
561 };
562
563 set_fs(oldfs);
564
565 if (rv != size)
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100566 conn_request_state(tconn, NS(conn, C_BROKEN_PIPE), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700567
568 return rv;
569}
570
Andreas Gruenbacherc6967742011-03-17 17:15:20 +0100571static int drbd_recv_all(struct drbd_tconn *tconn, void *buf, size_t size)
572{
573 int err;
574
575 err = drbd_recv(tconn, buf, size);
576 if (err != size) {
577 if (err >= 0)
578 err = -EIO;
579 } else
580 err = 0;
581 return err;
582}
583
Andreas Gruenbachera5c31902011-03-24 03:28:04 +0100584static int drbd_recv_all_warn(struct drbd_tconn *tconn, void *buf, size_t size)
585{
586 int err;
587
588 err = drbd_recv_all(tconn, buf, size);
589 if (err && !signal_pending(current))
590 conn_warn(tconn, "short read (expected size %d)\n", (int)size);
591 return err;
592}
593
Lars Ellenberg5dbf1672010-05-25 16:18:01 +0200594/* quoting tcp(7):
595 * On individual connections, the socket buffer size must be set prior to the
596 * listen(2) or connect(2) calls in order to have it take effect.
597 * This is our wrapper to do so.
598 */
599static void drbd_setbufsize(struct socket *sock, unsigned int snd,
600 unsigned int rcv)
601{
602 /* open coded SO_SNDBUF, SO_RCVBUF */
603 if (snd) {
604 sock->sk->sk_sndbuf = snd;
605 sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
606 }
607 if (rcv) {
608 sock->sk->sk_rcvbuf = rcv;
609 sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
610 }
611}
612
Philipp Reisnereac3e992011-02-07 14:05:07 +0100613static struct socket *drbd_try_connect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700614{
615 const char *what;
616 struct socket *sock;
617 struct sockaddr_in6 src_in6;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200618 struct sockaddr_in6 peer_in6;
619 struct net_conf *nc;
620 int err, peer_addr_len, my_addr_len;
Andreas Gruenbacher69ef82d2011-05-11 14:34:35 +0200621 int sndbuf_size, rcvbuf_size, connect_int;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700622 int disconnect_on_error = 1;
623
Philipp Reisner44ed1672011-04-19 17:10:19 +0200624 rcu_read_lock();
625 nc = rcu_dereference(tconn->net_conf);
626 if (!nc) {
627 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700628 return NULL;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200629 }
Philipp Reisner44ed1672011-04-19 17:10:19 +0200630 sndbuf_size = nc->sndbuf_size;
631 rcvbuf_size = nc->rcvbuf_size;
Andreas Gruenbacher69ef82d2011-05-11 14:34:35 +0200632 connect_int = nc->connect_int;
Andreas Gruenbacher089c0752011-06-14 18:28:09 +0200633 rcu_read_unlock();
Philipp Reisner44ed1672011-04-19 17:10:19 +0200634
Andreas Gruenbacher089c0752011-06-14 18:28:09 +0200635 my_addr_len = min_t(int, tconn->my_addr_len, sizeof(src_in6));
636 memcpy(&src_in6, &tconn->my_addr, my_addr_len);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200637
Andreas Gruenbacher089c0752011-06-14 18:28:09 +0200638 if (((struct sockaddr *)&tconn->my_addr)->sa_family == AF_INET6)
Philipp Reisner44ed1672011-04-19 17:10:19 +0200639 src_in6.sin6_port = 0;
640 else
641 ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
642
Andreas Gruenbacher089c0752011-06-14 18:28:09 +0200643 peer_addr_len = min_t(int, tconn->peer_addr_len, sizeof(src_in6));
644 memcpy(&peer_in6, &tconn->peer_addr, peer_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700645
646 what = "sock_create_kern";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200647 err = sock_create_kern(((struct sockaddr *)&src_in6)->sa_family,
648 SOCK_STREAM, IPPROTO_TCP, &sock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700649 if (err < 0) {
650 sock = NULL;
651 goto out;
652 }
653
654 sock->sk->sk_rcvtimeo =
Andreas Gruenbacher69ef82d2011-05-11 14:34:35 +0200655 sock->sk->sk_sndtimeo = connect_int * HZ;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200656 drbd_setbufsize(sock, sndbuf_size, rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700657
658 /* explicitly bind to the configured IP as source IP
659 * for the outgoing connections.
660 * This is needed for multihomed hosts and to be
661 * able to use lo: interfaces for drbd.
662 * Make sure to use 0 as port number, so linux selects
663 * a free one dynamically.
664 */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700665 what = "bind before connect";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200666 err = sock->ops->bind(sock, (struct sockaddr *) &src_in6, my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700667 if (err < 0)
668 goto out;
669
670 /* connect may fail, peer not yet available.
671 * stay C_WF_CONNECTION, don't go Disconnecting! */
672 disconnect_on_error = 0;
673 what = "connect";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200674 err = sock->ops->connect(sock, (struct sockaddr *) &peer_in6, peer_addr_len, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700675
676out:
677 if (err < 0) {
678 if (sock) {
679 sock_release(sock);
680 sock = NULL;
681 }
682 switch (-err) {
683 /* timeout, busy, signal pending */
684 case ETIMEDOUT: case EAGAIN: case EINPROGRESS:
685 case EINTR: case ERESTARTSYS:
686 /* peer not (yet) available, network problem */
687 case ECONNREFUSED: case ENETUNREACH:
688 case EHOSTDOWN: case EHOSTUNREACH:
689 disconnect_on_error = 0;
690 break;
691 default:
Philipp Reisnereac3e992011-02-07 14:05:07 +0100692 conn_err(tconn, "%s failed, err = %d\n", what, err);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700693 }
694 if (disconnect_on_error)
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100695 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700696 }
Philipp Reisner44ed1672011-04-19 17:10:19 +0200697
Philipp Reisnerb411b362009-09-25 16:07:19 -0700698 return sock;
699}
700
Philipp Reisner76536202011-02-07 14:09:54 +0100701static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700702{
Philipp Reisner44ed1672011-04-19 17:10:19 +0200703 int timeo, err, my_addr_len;
Andreas Gruenbacher69ef82d2011-05-11 14:34:35 +0200704 int sndbuf_size, rcvbuf_size, connect_int;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700705 struct socket *s_estab = NULL, *s_listen;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200706 struct sockaddr_in6 my_addr;
707 struct net_conf *nc;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700708 const char *what;
709
Philipp Reisner44ed1672011-04-19 17:10:19 +0200710 rcu_read_lock();
711 nc = rcu_dereference(tconn->net_conf);
712 if (!nc) {
713 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700714 return NULL;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200715 }
Philipp Reisner44ed1672011-04-19 17:10:19 +0200716 sndbuf_size = nc->sndbuf_size;
717 rcvbuf_size = nc->rcvbuf_size;
Andreas Gruenbacher69ef82d2011-05-11 14:34:35 +0200718 connect_int = nc->connect_int;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200719 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700720
Andreas Gruenbacher089c0752011-06-14 18:28:09 +0200721 my_addr_len = min_t(int, tconn->my_addr_len, sizeof(struct sockaddr_in6));
722 memcpy(&my_addr, &tconn->my_addr, my_addr_len);
723
Philipp Reisnerb411b362009-09-25 16:07:19 -0700724 what = "sock_create_kern";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200725 err = sock_create_kern(((struct sockaddr *)&my_addr)->sa_family,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700726 SOCK_STREAM, IPPROTO_TCP, &s_listen);
727 if (err) {
728 s_listen = NULL;
729 goto out;
730 }
731
Andreas Gruenbacher69ef82d2011-05-11 14:34:35 +0200732 timeo = connect_int * HZ;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700733 timeo += (random32() & 1) ? timeo / 7 : -timeo / 7; /* 28.5% random jitter */
734
735 s_listen->sk->sk_reuse = 1; /* SO_REUSEADDR */
736 s_listen->sk->sk_rcvtimeo = timeo;
737 s_listen->sk->sk_sndtimeo = timeo;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200738 drbd_setbufsize(s_listen, sndbuf_size, rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700739
740 what = "bind before listen";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200741 err = s_listen->ops->bind(s_listen, (struct sockaddr *)&my_addr, my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700742 if (err < 0)
743 goto out;
744
Philipp Reisner76536202011-02-07 14:09:54 +0100745 err = drbd_accept(&what, s_listen, &s_estab);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700746
747out:
748 if (s_listen)
749 sock_release(s_listen);
750 if (err < 0) {
751 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
Philipp Reisner76536202011-02-07 14:09:54 +0100752 conn_err(tconn, "%s failed, err = %d\n", what, err);
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100753 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700754 }
755 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700756
757 return s_estab;
758}
759
Andreas Gruenbachere6589832011-03-30 12:54:42 +0200760static int decode_header(struct drbd_tconn *, void *, struct packet_info *);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700761
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200762static int send_first_packet(struct drbd_tconn *tconn, struct drbd_socket *sock,
763 enum drbd_packet cmd)
764{
765 if (!conn_prepare_command(tconn, sock))
766 return -EIO;
Andreas Gruenbachere6589832011-03-30 12:54:42 +0200767 return conn_send_command(tconn, sock, cmd, 0, NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700768}
769
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200770static int receive_first_packet(struct drbd_tconn *tconn, struct socket *sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700771{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200772 unsigned int header_size = drbd_header_size(tconn);
773 struct packet_info pi;
774 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700775
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200776 err = drbd_recv_short(sock, tconn->data.rbuf, header_size, 0);
777 if (err != header_size) {
778 if (err >= 0)
779 err = -EIO;
780 return err;
781 }
782 err = decode_header(tconn, tconn->data.rbuf, &pi);
783 if (err)
784 return err;
785 return pi.cmd;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700786}
787
788/**
789 * drbd_socket_okay() - Free the socket if its connection is not okay
Philipp Reisnerb411b362009-09-25 16:07:19 -0700790 * @sock: pointer to the pointer to the socket.
791 */
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100792static int drbd_socket_okay(struct socket **sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700793{
794 int rr;
795 char tb[4];
796
797 if (!*sock)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100798 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700799
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100800 rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700801
802 if (rr > 0 || rr == -EAGAIN) {
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100803 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700804 } else {
805 sock_release(*sock);
806 *sock = NULL;
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100807 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700808 }
809}
Philipp Reisner2325eb62011-03-15 16:56:18 +0100810/* Gets called if a connection is established, or if a new minor gets created
811 in a connection */
Philipp Reisnerc141ebd2011-05-05 16:13:10 +0200812int drbd_connected(struct drbd_conf *mdev)
Philipp Reisner907599e2011-02-08 11:25:37 +0100813{
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100814 int err;
Philipp Reisner907599e2011-02-08 11:25:37 +0100815
816 atomic_set(&mdev->packet_seq, 0);
817 mdev->peer_seq = 0;
818
Philipp Reisner8410da82011-02-11 20:11:10 +0100819 mdev->state_mutex = mdev->tconn->agreed_pro_version < 100 ?
820 &mdev->tconn->cstate_mutex :
821 &mdev->own_state_mutex;
822
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100823 err = drbd_send_sync_param(mdev);
824 if (!err)
825 err = drbd_send_sizes(mdev, 0, 0);
826 if (!err)
827 err = drbd_send_uuids(mdev);
828 if (!err)
Philipp Reisner43de7c82011-11-10 13:16:13 +0100829 err = drbd_send_current_state(mdev);
Philipp Reisner907599e2011-02-08 11:25:37 +0100830 clear_bit(USE_DEGR_WFC_T, &mdev->flags);
831 clear_bit(RESIZE_PENDING, &mdev->flags);
Philipp Reisner8b924f12011-03-01 11:08:28 +0100832 mod_timer(&mdev->request_timer, jiffies + HZ); /* just start it here. */
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100833 return err;
Philipp Reisner907599e2011-02-08 11:25:37 +0100834}
835
Philipp Reisnerb411b362009-09-25 16:07:19 -0700836/*
837 * return values:
838 * 1 yes, we have a valid connection
839 * 0 oops, did not work out, please try again
840 * -1 peer talks different language,
841 * no point in trying again, please go standalone.
842 * -2 We do not have a network config...
843 */
Philipp Reisner81fa2e62011-05-04 15:10:30 +0200844static int conn_connect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700845{
Philipp Reisner7da35862011-12-19 22:42:56 +0100846 struct drbd_socket sock, msock;
Philipp Reisnerc141ebd2011-05-05 16:13:10 +0200847 struct drbd_conf *mdev;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200848 struct net_conf *nc;
Philipp Reisnerc141ebd2011-05-05 16:13:10 +0200849 int vnr, timeout, try, h, ok;
Philipp Reisner08b165b2011-09-05 16:22:33 +0200850 bool discard_my_data;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700851
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100852 if (conn_request_state(tconn, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700853 return -2;
854
Philipp Reisner7da35862011-12-19 22:42:56 +0100855 mutex_init(&sock.mutex);
856 sock.sbuf = tconn->data.sbuf;
857 sock.rbuf = tconn->data.rbuf;
858 sock.socket = NULL;
859 mutex_init(&msock.mutex);
860 msock.sbuf = tconn->meta.sbuf;
861 msock.rbuf = tconn->meta.rbuf;
862 msock.socket = NULL;
863
Philipp Reisner907599e2011-02-08 11:25:37 +0100864 clear_bit(DISCARD_CONCURRENT, &tconn->flags);
Andreas Gruenbacher0916e0e2011-03-21 14:10:15 +0100865
866 /* Assume that the peer only understands protocol 80 until we know better. */
867 tconn->agreed_pro_version = 80;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700868
Philipp Reisnerb411b362009-09-25 16:07:19 -0700869 do {
Andreas Gruenbacher2bf89622011-03-28 16:33:12 +0200870 struct socket *s;
871
Philipp Reisnerb411b362009-09-25 16:07:19 -0700872 for (try = 0;;) {
873 /* 3 tries, this should take less than a second! */
Philipp Reisner907599e2011-02-08 11:25:37 +0100874 s = drbd_try_connect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700875 if (s || ++try >= 3)
876 break;
877 /* give the other side time to call bind() & listen() */
Philipp Reisner20ee6392011-01-18 15:28:59 +0100878 schedule_timeout_interruptible(HZ / 10);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700879 }
880
881 if (s) {
Philipp Reisner7da35862011-12-19 22:42:56 +0100882 if (!sock.socket) {
883 sock.socket = s;
884 send_first_packet(tconn, &sock, P_INITIAL_DATA);
885 } else if (!msock.socket) {
886 msock.socket = s;
887 send_first_packet(tconn, &msock, P_INITIAL_META);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700888 } else {
Philipp Reisner81fa2e62011-05-04 15:10:30 +0200889 conn_err(tconn, "Logic error in conn_connect()\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700890 goto out_release_sockets;
891 }
892 }
893
Philipp Reisner7da35862011-12-19 22:42:56 +0100894 if (sock.socket && msock.socket) {
895 rcu_read_lock();
896 nc = rcu_dereference(tconn->net_conf);
897 timeout = nc->ping_timeo * HZ / 10;
898 rcu_read_unlock();
899 schedule_timeout_interruptible(timeout);
900 ok = drbd_socket_okay(&sock.socket);
901 ok = drbd_socket_okay(&msock.socket) && ok;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700902 if (ok)
903 break;
904 }
905
906retry:
Philipp Reisner907599e2011-02-08 11:25:37 +0100907 s = drbd_wait_for_connect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700908 if (s) {
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200909 try = receive_first_packet(tconn, s);
Philipp Reisner7da35862011-12-19 22:42:56 +0100910 drbd_socket_okay(&sock.socket);
911 drbd_socket_okay(&msock.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700912 switch (try) {
Andreas Gruenbachere5d6f332011-03-28 16:44:40 +0200913 case P_INITIAL_DATA:
Philipp Reisner7da35862011-12-19 22:42:56 +0100914 if (sock.socket) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100915 conn_warn(tconn, "initial packet S crossed\n");
Philipp Reisner7da35862011-12-19 22:42:56 +0100916 sock_release(sock.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700917 }
Philipp Reisner7da35862011-12-19 22:42:56 +0100918 sock.socket = s;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700919 break;
Andreas Gruenbachere5d6f332011-03-28 16:44:40 +0200920 case P_INITIAL_META:
Philipp Reisner7da35862011-12-19 22:42:56 +0100921 if (msock.socket) {
Philipp Reisner907599e2011-02-08 11:25:37 +0100922 conn_warn(tconn, "initial packet M crossed\n");
Philipp Reisner7da35862011-12-19 22:42:56 +0100923 sock_release(msock.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700924 }
Philipp Reisner7da35862011-12-19 22:42:56 +0100925 msock.socket = s;
Philipp Reisner907599e2011-02-08 11:25:37 +0100926 set_bit(DISCARD_CONCURRENT, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700927 break;
928 default:
Philipp Reisner907599e2011-02-08 11:25:37 +0100929 conn_warn(tconn, "Error receiving initial packet\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700930 sock_release(s);
931 if (random32() & 1)
932 goto retry;
933 }
934 }
935
Philipp Reisnerbbeb6412011-02-10 13:45:46 +0100936 if (tconn->cstate <= C_DISCONNECTING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700937 goto out_release_sockets;
938 if (signal_pending(current)) {
939 flush_signals(current);
940 smp_rmb();
Philipp Reisner907599e2011-02-08 11:25:37 +0100941 if (get_t_state(&tconn->receiver) == EXITING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700942 goto out_release_sockets;
943 }
944
Philipp Reisner7da35862011-12-19 22:42:56 +0100945 if (sock.socket && &msock.socket) {
946 ok = drbd_socket_okay(&sock.socket);
947 ok = drbd_socket_okay(&msock.socket) && ok;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700948 if (ok)
949 break;
950 }
951 } while (1);
952
Philipp Reisner7da35862011-12-19 22:42:56 +0100953 sock.socket->sk->sk_reuse = 1; /* SO_REUSEADDR */
954 msock.socket->sk->sk_reuse = 1; /* SO_REUSEADDR */
Andreas Gruenbacher2bf89622011-03-28 16:33:12 +0200955
Philipp Reisner7da35862011-12-19 22:42:56 +0100956 sock.socket->sk->sk_allocation = GFP_NOIO;
957 msock.socket->sk->sk_allocation = GFP_NOIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700958
Philipp Reisner7da35862011-12-19 22:42:56 +0100959 sock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
960 msock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700961
Philipp Reisnerb411b362009-09-25 16:07:19 -0700962 /* NOT YET ...
Philipp Reisner7da35862011-12-19 22:42:56 +0100963 * sock.socket->sk->sk_sndtimeo = tconn->net_conf->timeout*HZ/10;
964 * sock.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
Andreas Gruenbacher60381782011-03-28 17:05:50 +0200965 * first set it to the P_CONNECTION_FEATURES timeout,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700966 * which we set to 4x the configured ping_timeout. */
Philipp Reisner44ed1672011-04-19 17:10:19 +0200967 rcu_read_lock();
968 nc = rcu_dereference(tconn->net_conf);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700969
Philipp Reisner7da35862011-12-19 22:42:56 +0100970 sock.socket->sk->sk_sndtimeo =
971 sock.socket->sk->sk_rcvtimeo = nc->ping_timeo*4*HZ/10;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200972
Philipp Reisner7da35862011-12-19 22:42:56 +0100973 msock.socket->sk->sk_rcvtimeo = nc->ping_int*HZ;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200974 timeout = nc->timeout * HZ / 10;
Philipp Reisner08b165b2011-09-05 16:22:33 +0200975 discard_my_data = nc->discard_my_data;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200976 rcu_read_unlock();
977
Philipp Reisner7da35862011-12-19 22:42:56 +0100978 msock.socket->sk->sk_sndtimeo = timeout;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700979
980 /* we don't want delays.
Lucas De Marchi25985ed2011-03-30 22:57:33 -0300981 * we use TCP_CORK where appropriate, though */
Philipp Reisner7da35862011-12-19 22:42:56 +0100982 drbd_tcp_nodelay(sock.socket);
983 drbd_tcp_nodelay(msock.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700984
Philipp Reisner7da35862011-12-19 22:42:56 +0100985 tconn->data.socket = sock.socket;
986 tconn->meta.socket = msock.socket;
Philipp Reisner907599e2011-02-08 11:25:37 +0100987 tconn->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700988
Andreas Gruenbacher60381782011-03-28 17:05:50 +0200989 h = drbd_do_features(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700990 if (h <= 0)
991 return h;
992
Philipp Reisner907599e2011-02-08 11:25:37 +0100993 if (tconn->cram_hmac_tfm) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700994 /* drbd_request_state(mdev, NS(conn, WFAuth)); */
Philipp Reisner907599e2011-02-08 11:25:37 +0100995 switch (drbd_do_auth(tconn)) {
Johannes Thomab10d96c2010-01-07 16:02:50 +0100996 case -1:
Philipp Reisner907599e2011-02-08 11:25:37 +0100997 conn_err(tconn, "Authentication of peer failed\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700998 return -1;
Johannes Thomab10d96c2010-01-07 16:02:50 +0100999 case 0:
Philipp Reisner907599e2011-02-08 11:25:37 +01001000 conn_err(tconn, "Authentication of peer failed, trying again.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01001001 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001002 }
1003 }
1004
Philipp Reisner7da35862011-12-19 22:42:56 +01001005 tconn->data.socket->sk->sk_sndtimeo = timeout;
1006 tconn->data.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001007
Andreas Gruenbacher387eb302011-03-16 01:05:37 +01001008 if (drbd_send_protocol(tconn) == -EOPNOTSUPP)
Philipp Reisner7e2455c2010-04-22 14:50:23 +02001009 return -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001010
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02001011 rcu_read_lock();
1012 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
1013 kref_get(&mdev->kref);
1014 rcu_read_unlock();
Philipp Reisner08b165b2011-09-05 16:22:33 +02001015
1016 if (discard_my_data)
1017 set_bit(DISCARD_MY_DATA, &mdev->flags);
1018 else
1019 clear_bit(DISCARD_MY_DATA, &mdev->flags);
1020
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02001021 drbd_connected(mdev);
1022 kref_put(&mdev->kref, &drbd_minor_destroy);
1023 rcu_read_lock();
1024 }
1025 rcu_read_unlock();
1026
Philipp Reisner823bd832012-11-08 15:04:36 +01001027 if (conn_request_state(tconn, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE) < SS_SUCCESS)
1028 return 0;
1029
1030 drbd_thread_start(&tconn->asender);
1031
Philipp Reisner08b165b2011-09-05 16:22:33 +02001032 mutex_lock(&tconn->conf_update);
1033 /* The discard_my_data flag is a single-shot modifier to the next
1034 * connection attempt, the handshake of which is now well underway.
1035 * No need for rcu style copying of the whole struct
1036 * just to clear a single value. */
1037 tconn->net_conf->discard_my_data = 0;
1038 mutex_unlock(&tconn->conf_update);
1039
Philipp Reisnerd3fcb492011-04-13 14:46:05 -07001040 return h;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001041
1042out_release_sockets:
Philipp Reisner7da35862011-12-19 22:42:56 +01001043 if (sock.socket)
1044 sock_release(sock.socket);
1045 if (msock.socket)
1046 sock_release(msock.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001047 return -1;
1048}
1049
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001050static int decode_header(struct drbd_tconn *tconn, void *header, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001051{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001052 unsigned int header_size = drbd_header_size(tconn);
1053
Andreas Gruenbacher0c8e36d2011-03-30 16:00:17 +02001054 if (header_size == sizeof(struct p_header100) &&
1055 *(__be32 *)header == cpu_to_be32(DRBD_MAGIC_100)) {
1056 struct p_header100 *h = header;
1057 if (h->pad != 0) {
1058 conn_err(tconn, "Header padding is not zero\n");
1059 return -EINVAL;
1060 }
1061 pi->vnr = be16_to_cpu(h->volume);
1062 pi->cmd = be16_to_cpu(h->command);
1063 pi->size = be32_to_cpu(h->length);
1064 } else if (header_size == sizeof(struct p_header95) &&
1065 *(__be16 *)header == cpu_to_be16(DRBD_MAGIC_BIG)) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001066 struct p_header95 *h = header;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001067 pi->cmd = be16_to_cpu(h->command);
Andreas Gruenbacherb55d84b2011-03-22 13:17:47 +01001068 pi->size = be32_to_cpu(h->length);
1069 pi->vnr = 0;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001070 } else if (header_size == sizeof(struct p_header80) &&
1071 *(__be32 *)header == cpu_to_be32(DRBD_MAGIC)) {
1072 struct p_header80 *h = header;
1073 pi->cmd = be16_to_cpu(h->command);
1074 pi->size = be16_to_cpu(h->length);
Philipp Reisner77351055b2011-02-07 17:24:26 +01001075 pi->vnr = 0;
Philipp Reisner02918be2010-08-20 14:35:10 +02001076 } else {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001077 conn_err(tconn, "Wrong magic value 0x%08x in protocol version %d\n",
1078 be32_to_cpu(*(__be32 *)header),
1079 tconn->agreed_pro_version);
Andreas Gruenbacher8172f3e2011-03-16 17:22:39 +01001080 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001081 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001082 pi->data = header + header_size;
Andreas Gruenbacher8172f3e2011-03-16 17:22:39 +01001083 return 0;
Philipp Reisner257d0af2011-01-26 12:15:29 +01001084}
1085
Philipp Reisner9ba7aa02011-02-07 17:32:41 +01001086static int drbd_recv_header(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisner257d0af2011-01-26 12:15:29 +01001087{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001088 void *buffer = tconn->data.rbuf;
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001089 int err;
Philipp Reisner257d0af2011-01-26 12:15:29 +01001090
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001091 err = drbd_recv_all_warn(tconn, buffer, drbd_header_size(tconn));
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001092 if (err)
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001093 return err;
Philipp Reisner257d0af2011-01-26 12:15:29 +01001094
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001095 err = decode_header(tconn, buffer, pi);
Philipp Reisner9ba7aa02011-02-07 17:32:41 +01001096 tconn->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001097
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001098 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001099}
1100
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001101static void drbd_flush(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001102{
1103 int rv;
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001104 struct drbd_conf *mdev;
1105 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001106
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001107 if (tconn->write_ordering >= WO_bdev_flush) {
Lars Ellenberg615e0872011-11-17 14:32:12 +01001108 rcu_read_lock();
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001109 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
Lars Ellenberg615e0872011-11-17 14:32:12 +01001110 if (!get_ldev(mdev))
1111 continue;
1112 kref_get(&mdev->kref);
1113 rcu_read_unlock();
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001114
Lars Ellenberg615e0872011-11-17 14:32:12 +01001115 rv = blkdev_issue_flush(mdev->ldev->backing_bdev,
1116 GFP_NOIO, NULL);
1117 if (rv) {
1118 dev_info(DEV, "local disk flush failed with status %d\n", rv);
1119 /* would rather check on EOPNOTSUPP, but that is not reliable.
1120 * don't try again for ANY return value != 0
1121 * if (rv == -EOPNOTSUPP) */
1122 drbd_bump_write_ordering(tconn, WO_drain_io);
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001123 }
Lars Ellenberg615e0872011-11-17 14:32:12 +01001124 put_ldev(mdev);
1125 kref_put(&mdev->kref, &drbd_minor_destroy);
1126
1127 rcu_read_lock();
1128 if (rv)
1129 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001130 }
Lars Ellenberg615e0872011-11-17 14:32:12 +01001131 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07001132 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001133}
1134
1135/**
1136 * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it.
1137 * @mdev: DRBD device.
1138 * @epoch: Epoch object.
1139 * @ev: Epoch event.
1140 */
Philipp Reisner1e9dd292011-11-10 15:14:53 +01001141static enum finish_epoch drbd_may_finish_epoch(struct drbd_tconn *tconn,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001142 struct drbd_epoch *epoch,
1143 enum epoch_event ev)
1144{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001145 int epoch_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001146 struct drbd_epoch *next_epoch;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001147 enum finish_epoch rv = FE_STILL_LIVE;
1148
Philipp Reisner12038a32011-11-09 19:18:00 +01001149 spin_lock(&tconn->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001150 do {
1151 next_epoch = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001152
1153 epoch_size = atomic_read(&epoch->epoch_size);
1154
1155 switch (ev & ~EV_CLEANUP) {
1156 case EV_PUT:
1157 atomic_dec(&epoch->active);
1158 break;
1159 case EV_GOT_BARRIER_NR:
1160 set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001161 break;
1162 case EV_BECAME_LAST:
1163 /* nothing to do*/
1164 break;
1165 }
1166
Philipp Reisnerb411b362009-09-25 16:07:19 -07001167 if (epoch_size != 0 &&
1168 atomic_read(&epoch->active) == 0 &&
Philipp Reisner85d735132011-07-18 15:45:15 +02001169 (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) || ev & EV_CLEANUP)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001170 if (!(ev & EV_CLEANUP)) {
Philipp Reisner12038a32011-11-09 19:18:00 +01001171 spin_unlock(&tconn->epoch_lock);
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001172 drbd_send_b_ack(epoch->tconn, epoch->barrier_nr, epoch_size);
Philipp Reisner12038a32011-11-09 19:18:00 +01001173 spin_lock(&tconn->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001174 }
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001175#if 0
1176 /* FIXME: dec unacked on connection, once we have
1177 * something to count pending connection packets in. */
Philipp Reisner85d735132011-07-18 15:45:15 +02001178 if (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags))
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001179 dec_unacked(epoch->tconn);
1180#endif
Philipp Reisnerb411b362009-09-25 16:07:19 -07001181
Philipp Reisner12038a32011-11-09 19:18:00 +01001182 if (tconn->current_epoch != epoch) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001183 next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
1184 list_del(&epoch->list);
1185 ev = EV_BECAME_LAST | (ev & EV_CLEANUP);
Philipp Reisner12038a32011-11-09 19:18:00 +01001186 tconn->epochs--;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001187 kfree(epoch);
1188
1189 if (rv == FE_STILL_LIVE)
1190 rv = FE_DESTROYED;
1191 } else {
1192 epoch->flags = 0;
1193 atomic_set(&epoch->epoch_size, 0);
Uwe Kleine-König698f9312010-07-02 20:41:51 +02001194 /* atomic_set(&epoch->active, 0); is already zero */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001195 if (rv == FE_STILL_LIVE)
1196 rv = FE_RECYCLED;
1197 }
1198 }
1199
1200 if (!next_epoch)
1201 break;
1202
1203 epoch = next_epoch;
1204 } while (1);
1205
Philipp Reisner12038a32011-11-09 19:18:00 +01001206 spin_unlock(&tconn->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001207
Philipp Reisnerb411b362009-09-25 16:07:19 -07001208 return rv;
1209}
1210
1211/**
1212 * drbd_bump_write_ordering() - Fall back to an other write ordering method
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001213 * @tconn: DRBD connection.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001214 * @wo: Write ordering method to try.
1215 */
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001216void drbd_bump_write_ordering(struct drbd_tconn *tconn, enum write_ordering_e wo)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001217{
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001218 struct disk_conf *dc;
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001219 struct drbd_conf *mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001220 enum write_ordering_e pwo;
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001221 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001222 static char *write_ordering_str[] = {
1223 [WO_none] = "none",
1224 [WO_drain_io] = "drain",
1225 [WO_bdev_flush] = "flush",
Philipp Reisnerb411b362009-09-25 16:07:19 -07001226 };
1227
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001228 pwo = tconn->write_ordering;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001229 wo = min(pwo, wo);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001230 rcu_read_lock();
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001231 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
Philipp Reisner27eb13e2012-03-30 14:12:15 +02001232 if (!get_ldev_if_state(mdev, D_ATTACHING))
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001233 continue;
1234 dc = rcu_dereference(mdev->ldev->disk_conf);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001235
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001236 if (wo == WO_bdev_flush && !dc->disk_flushes)
1237 wo = WO_drain_io;
1238 if (wo == WO_drain_io && !dc->disk_drain)
1239 wo = WO_none;
1240 put_ldev(mdev);
1241 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001242 rcu_read_unlock();
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001243 tconn->write_ordering = wo;
1244 if (pwo != tconn->write_ordering || wo == WO_bdev_flush)
1245 conn_info(tconn, "Method to ensure write ordering: %s\n", write_ordering_str[tconn->write_ordering]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001246}
1247
1248/**
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001249 * drbd_submit_peer_request()
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001250 * @mdev: DRBD device.
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001251 * @peer_req: peer request
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001252 * @rw: flag field, see bio->bi_rw
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001253 *
1254 * May spread the pages to multiple bios,
1255 * depending on bio_add_page restrictions.
1256 *
1257 * Returns 0 if all bios have been submitted,
1258 * -ENOMEM if we could not allocate enough bios,
1259 * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a
1260 * single page to an empty bio (which should never happen and likely indicates
1261 * that the lower level IO stack is in some way broken). This has been observed
1262 * on certain Xen deployments.
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001263 */
1264/* TODO allocate from our own bio_set. */
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001265int drbd_submit_peer_request(struct drbd_conf *mdev,
1266 struct drbd_peer_request *peer_req,
1267 const unsigned rw, const int fault_type)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001268{
1269 struct bio *bios = NULL;
1270 struct bio *bio;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001271 struct page *page = peer_req->pages;
1272 sector_t sector = peer_req->i.sector;
1273 unsigned ds = peer_req->i.size;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001274 unsigned n_bios = 0;
1275 unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT;
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001276 int err = -ENOMEM;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001277
1278 /* In most cases, we will only need one bio. But in case the lower
1279 * level restrictions happen to be different at this offset on this
1280 * side than those of the sending peer, we may need to submit the
Lars Ellenbergda4a75d2011-02-23 17:02:01 +01001281 * request in more than one bio.
1282 *
1283 * Plain bio_alloc is good enough here, this is no DRBD internally
1284 * generated bio, but a bio allocated on behalf of the peer.
1285 */
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001286next_bio:
1287 bio = bio_alloc(GFP_NOIO, nr_pages);
1288 if (!bio) {
1289 dev_err(DEV, "submit_ee: Allocation of a bio failed\n");
1290 goto fail;
1291 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001292 /* > peer_req->i.sector, unless this is the first bio */
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001293 bio->bi_sector = sector;
1294 bio->bi_bdev = mdev->ldev->backing_bdev;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001295 bio->bi_rw = rw;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001296 bio->bi_private = peer_req;
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001297 bio->bi_end_io = drbd_peer_request_endio;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001298
1299 bio->bi_next = bios;
1300 bios = bio;
1301 ++n_bios;
1302
1303 page_chain_for_each(page) {
1304 unsigned len = min_t(unsigned, ds, PAGE_SIZE);
1305 if (!bio_add_page(bio, page, len, 0)) {
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001306 /* A single page must always be possible!
1307 * But in case it fails anyways,
1308 * we deal with it, and complain (below). */
1309 if (bio->bi_vcnt == 0) {
1310 dev_err(DEV,
1311 "bio_add_page failed for len=%u, "
1312 "bi_vcnt=0 (bi_sector=%llu)\n",
1313 len, (unsigned long long)bio->bi_sector);
1314 err = -ENOSPC;
1315 goto fail;
1316 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001317 goto next_bio;
1318 }
1319 ds -= len;
1320 sector += len >> 9;
1321 --nr_pages;
1322 }
1323 D_ASSERT(page == NULL);
1324 D_ASSERT(ds == 0);
1325
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001326 atomic_set(&peer_req->pending_bios, n_bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001327 do {
1328 bio = bios;
1329 bios = bios->bi_next;
1330 bio->bi_next = NULL;
1331
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001332 drbd_generic_make_request(mdev, fault_type, bio);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001333 } while (bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001334 return 0;
1335
1336fail:
1337 while (bios) {
1338 bio = bios;
1339 bios = bios->bi_next;
1340 bio_put(bio);
1341 }
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001342 return err;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001343}
1344
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001345static void drbd_remove_epoch_entry_interval(struct drbd_conf *mdev,
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001346 struct drbd_peer_request *peer_req)
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001347{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001348 struct drbd_interval *i = &peer_req->i;
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001349
1350 drbd_remove_interval(&mdev->write_requests, i);
1351 drbd_clear_interval(i);
1352
Andreas Gruenbacher6c852be2011-02-04 15:38:52 +01001353 /* Wake up any processes waiting for this peer request to complete. */
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001354 if (i->waiting)
1355 wake_up(&mdev->misc_wait);
1356}
1357
Philipp Reisner77fede52011-11-10 21:19:11 +01001358void conn_wait_active_ee_empty(struct drbd_tconn *tconn)
1359{
1360 struct drbd_conf *mdev;
1361 int vnr;
1362
1363 rcu_read_lock();
1364 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
1365 kref_get(&mdev->kref);
1366 rcu_read_unlock();
1367 drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
1368 kref_put(&mdev->kref, &drbd_minor_destroy);
1369 rcu_read_lock();
1370 }
1371 rcu_read_unlock();
1372}
1373
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001374static int receive_Barrier(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001375{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001376 int rv;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001377 struct p_barrier *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001378 struct drbd_epoch *epoch;
1379
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001380 /* FIXME these are unacked on connection,
1381 * not a specific (peer)device.
1382 */
Philipp Reisner12038a32011-11-09 19:18:00 +01001383 tconn->current_epoch->barrier_nr = p->barrier;
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001384 tconn->current_epoch->tconn = tconn;
Philipp Reisner1e9dd292011-11-10 15:14:53 +01001385 rv = drbd_may_finish_epoch(tconn, tconn->current_epoch, EV_GOT_BARRIER_NR);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001386
1387 /* P_BARRIER_ACK may imply that the corresponding extent is dropped from
1388 * the activity log, which means it would not be resynced in case the
1389 * R_PRIMARY crashes now.
1390 * Therefore we must send the barrier_ack after the barrier request was
1391 * completed. */
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001392 switch (tconn->write_ordering) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001393 case WO_none:
1394 if (rv == FE_RECYCLED)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001395 return 0;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001396
1397 /* receiver context, in the writeout path of the other node.
1398 * avoid potential distributed deadlock */
1399 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1400 if (epoch)
1401 break;
1402 else
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001403 conn_warn(tconn, "Allocation of an epoch failed, slowing down\n");
Philipp Reisner2451fc32010-08-24 13:43:11 +02001404 /* Fall through */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001405
1406 case WO_bdev_flush:
1407 case WO_drain_io:
Philipp Reisner77fede52011-11-10 21:19:11 +01001408 conn_wait_active_ee_empty(tconn);
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001409 drbd_flush(tconn);
Philipp Reisner2451fc32010-08-24 13:43:11 +02001410
Philipp Reisner12038a32011-11-09 19:18:00 +01001411 if (atomic_read(&tconn->current_epoch->epoch_size)) {
Philipp Reisner2451fc32010-08-24 13:43:11 +02001412 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1413 if (epoch)
1414 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001415 }
1416
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001417 return 0;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001418 default:
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001419 conn_err(tconn, "Strangeness in tconn->write_ordering %d\n", tconn->write_ordering);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001420 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001421 }
1422
1423 epoch->flags = 0;
1424 atomic_set(&epoch->epoch_size, 0);
1425 atomic_set(&epoch->active, 0);
1426
Philipp Reisner12038a32011-11-09 19:18:00 +01001427 spin_lock(&tconn->epoch_lock);
1428 if (atomic_read(&tconn->current_epoch->epoch_size)) {
1429 list_add(&epoch->list, &tconn->current_epoch->list);
1430 tconn->current_epoch = epoch;
1431 tconn->epochs++;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001432 } else {
1433 /* The current_epoch got recycled while we allocated this one... */
1434 kfree(epoch);
1435 }
Philipp Reisner12038a32011-11-09 19:18:00 +01001436 spin_unlock(&tconn->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001437
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001438 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001439}
1440
1441/* used from receive_RSDataReply (recv_resync_read)
1442 * and from receive_Data */
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +01001443static struct drbd_peer_request *
1444read_in_block(struct drbd_conf *mdev, u64 id, sector_t sector,
1445 int data_size) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001446{
Lars Ellenberg66660322010-04-06 12:15:04 +02001447 const sector_t capacity = drbd_get_capacity(mdev->this_bdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001448 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001449 struct page *page;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001450 int dgs, ds, err;
Philipp Reisnera0638452011-01-19 14:31:32 +01001451 void *dig_in = mdev->tconn->int_dig_in;
1452 void *dig_vv = mdev->tconn->int_dig_vv;
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001453 unsigned long *data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001454
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001455 dgs = 0;
1456 if (mdev->tconn->peer_integrity_tfm) {
1457 dgs = crypto_hash_digestsize(mdev->tconn->peer_integrity_tfm);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001458 /*
1459 * FIXME: Receive the incoming digest into the receive buffer
1460 * here, together with its struct p_data?
1461 */
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001462 err = drbd_recv_all_warn(mdev->tconn, dig_in, dgs);
1463 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001464 return NULL;
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001465 data_size -= dgs;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001466 }
1467
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01001468 if (!expect(data_size != 0))
1469 return NULL;
1470 if (!expect(IS_ALIGNED(data_size, 512)))
1471 return NULL;
1472 if (!expect(data_size <= DRBD_MAX_BIO_SIZE))
1473 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001474
Lars Ellenberg66660322010-04-06 12:15:04 +02001475 /* even though we trust out peer,
1476 * we sometimes have to double check. */
1477 if (sector + (data_size>>9) > capacity) {
Lars Ellenbergfdda6542011-01-24 15:11:01 +01001478 dev_err(DEV, "request from peer beyond end of local disk: "
1479 "capacity: %llus < sector: %llus + size: %u\n",
Lars Ellenberg66660322010-04-06 12:15:04 +02001480 (unsigned long long)capacity,
1481 (unsigned long long)sector, data_size);
1482 return NULL;
1483 }
1484
Philipp Reisnerb411b362009-09-25 16:07:19 -07001485 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
1486 * "criss-cross" setup, that might cause write-out on some other DRBD,
1487 * which in turn might block on the other node at this very place. */
Andreas Gruenbacher0db55362011-04-06 16:09:15 +02001488 peer_req = drbd_alloc_peer_req(mdev, id, sector, data_size, GFP_NOIO);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001489 if (!peer_req)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001490 return NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001491
Philipp Reisnerb411b362009-09-25 16:07:19 -07001492 ds = data_size;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001493 page = peer_req->pages;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001494 page_chain_for_each(page) {
1495 unsigned len = min_t(int, ds, PAGE_SIZE);
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001496 data = kmap(page);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001497 err = drbd_recv_all_warn(mdev->tconn, data, len);
Andreas Gruenbacher0cf9d272010-12-07 10:43:29 +01001498 if (drbd_insert_fault(mdev, DRBD_FAULT_RECEIVE)) {
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001499 dev_err(DEV, "Fault injection: Corrupting data on receive\n");
1500 data[0] = data[0] ^ (unsigned long)-1;
1501 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001502 kunmap(page);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001503 if (err) {
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +02001504 drbd_free_peer_req(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001505 return NULL;
1506 }
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001507 ds -= len;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001508 }
1509
1510 if (dgs) {
Andreas Gruenbacher5b614ab2011-04-27 21:00:12 +02001511 drbd_csum_ee(mdev, mdev->tconn->peer_integrity_tfm, peer_req, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001512 if (memcmp(dig_in, dig_vv, dgs)) {
Lars Ellenberg470be442010-11-10 10:36:52 +01001513 dev_err(DEV, "Digest integrity check FAILED: %llus +%u\n",
1514 (unsigned long long)sector, data_size);
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +02001515 drbd_free_peer_req(mdev, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001516 return NULL;
1517 }
1518 }
1519 mdev->recv_cnt += data_size>>9;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001520 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001521}
1522
1523/* drbd_drain_block() just takes a data block
1524 * out of the socket input buffer, and discards it.
1525 */
1526static int drbd_drain_block(struct drbd_conf *mdev, int data_size)
1527{
1528 struct page *page;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001529 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001530 void *data;
1531
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001532 if (!data_size)
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001533 return 0;
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001534
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +02001535 page = drbd_alloc_pages(mdev, 1, 1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001536
1537 data = kmap(page);
1538 while (data_size) {
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001539 unsigned int len = min_t(int, data_size, PAGE_SIZE);
1540
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001541 err = drbd_recv_all_warn(mdev->tconn, data, len);
1542 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001543 break;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001544 data_size -= len;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001545 }
1546 kunmap(page);
Andreas Gruenbacher5cc287e2011-04-07 21:02:59 +02001547 drbd_free_pages(mdev, page, 0);
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001548 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001549}
1550
1551static int recv_dless_read(struct drbd_conf *mdev, struct drbd_request *req,
1552 sector_t sector, int data_size)
1553{
1554 struct bio_vec *bvec;
1555 struct bio *bio;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001556 int dgs, err, i, expect;
Philipp Reisnera0638452011-01-19 14:31:32 +01001557 void *dig_in = mdev->tconn->int_dig_in;
1558 void *dig_vv = mdev->tconn->int_dig_vv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001559
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001560 dgs = 0;
1561 if (mdev->tconn->peer_integrity_tfm) {
1562 dgs = crypto_hash_digestsize(mdev->tconn->peer_integrity_tfm);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001563 err = drbd_recv_all_warn(mdev->tconn, dig_in, dgs);
1564 if (err)
1565 return err;
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001566 data_size -= dgs;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001567 }
1568
Philipp Reisnerb411b362009-09-25 16:07:19 -07001569 /* optimistically update recv_cnt. if receiving fails below,
1570 * we disconnect anyways, and counters will be reset. */
1571 mdev->recv_cnt += data_size>>9;
1572
1573 bio = req->master_bio;
1574 D_ASSERT(sector == bio->bi_sector);
1575
1576 bio_for_each_segment(bvec, bio, i) {
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001577 void *mapped = kmap(bvec->bv_page) + bvec->bv_offset;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001578 expect = min_t(int, data_size, bvec->bv_len);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001579 err = drbd_recv_all_warn(mdev->tconn, mapped, expect);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001580 kunmap(bvec->bv_page);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001581 if (err)
1582 return err;
1583 data_size -= expect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001584 }
1585
1586 if (dgs) {
Andreas Gruenbacher5b614ab2011-04-27 21:00:12 +02001587 drbd_csum_bio(mdev, mdev->tconn->peer_integrity_tfm, bio, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001588 if (memcmp(dig_in, dig_vv, dgs)) {
1589 dev_err(DEV, "Digest integrity check FAILED. Broken NICs?\n");
Andreas Gruenbacher28284ce2011-03-16 17:54:02 +01001590 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001591 }
1592 }
1593
1594 D_ASSERT(data_size == 0);
Andreas Gruenbacher28284ce2011-03-16 17:54:02 +01001595 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001596}
1597
Andreas Gruenbachera990be42011-04-06 17:56:48 +02001598/*
1599 * e_end_resync_block() is called in asender context via
1600 * drbd_finish_peer_reqs().
1601 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001602static int e_end_resync_block(struct drbd_work *w, int unused)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001603{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001604 struct drbd_peer_request *peer_req =
1605 container_of(w, struct drbd_peer_request, w);
Philipp Reisner00d56942011-02-09 18:09:48 +01001606 struct drbd_conf *mdev = w->mdev;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001607 sector_t sector = peer_req->i.sector;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001608 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001609
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001610 D_ASSERT(drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001611
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001612 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
1613 drbd_set_in_sync(mdev, sector, peer_req->i.size);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001614 err = drbd_send_ack(mdev, P_RS_WRITE_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001615 } else {
1616 /* Record failure to sync */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001617 drbd_rs_failed_io(mdev, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001618
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001619 err = drbd_send_ack(mdev, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001620 }
1621 dec_unacked(mdev);
1622
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001623 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001624}
1625
1626static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_size) __releases(local)
1627{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001628 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001629
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001630 peer_req = read_in_block(mdev, ID_SYNCER, sector, data_size);
1631 if (!peer_req)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001632 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001633
1634 dec_rs_pending(mdev);
1635
Philipp Reisnerb411b362009-09-25 16:07:19 -07001636 inc_unacked(mdev);
1637 /* corresponding dec_unacked() in e_end_resync_block()
1638 * respective _drbd_clear_done_ee */
1639
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001640 peer_req->w.cb = e_end_resync_block;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001641
Philipp Reisner87eeee42011-01-19 14:16:30 +01001642 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001643 list_add(&peer_req->w.list, &mdev->sync_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001644 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001645
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001646 atomic_add(data_size >> 9, &mdev->rs_sect_ev);
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001647 if (drbd_submit_peer_request(mdev, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0)
Andreas Gruenbachere1c1b0f2011-03-16 17:58:27 +01001648 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001649
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001650 /* don't care for the reason here */
1651 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01001652 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001653 list_del(&peer_req->w.list);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001654 spin_unlock_irq(&mdev->tconn->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02001655
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +02001656 drbd_free_peer_req(mdev, peer_req);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001657fail:
1658 put_ldev(mdev);
Andreas Gruenbachere1c1b0f2011-03-16 17:58:27 +01001659 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001660}
1661
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001662static struct drbd_request *
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001663find_request(struct drbd_conf *mdev, struct rb_root *root, u64 id,
1664 sector_t sector, bool missing_ok, const char *func)
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001665{
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001666 struct drbd_request *req;
1667
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001668 /* Request object according to our peer */
1669 req = (struct drbd_request *)(unsigned long)id;
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001670 if (drbd_contains_interval(root, sector, &req->i) && req->i.local)
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001671 return req;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001672 if (!missing_ok) {
Andreas Gruenbacher5af172e2011-07-15 09:43:23 +02001673 dev_err(DEV, "%s: failed to find request 0x%lx, sector %llus\n", func,
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001674 (unsigned long)id, (unsigned long long)sector);
1675 }
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001676 return NULL;
1677}
1678
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001679static int receive_DataReply(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001680{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001681 struct drbd_conf *mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001682 struct drbd_request *req;
1683 sector_t sector;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001684 int err;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001685 struct p_data *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001686
1687 mdev = vnr_to_mdev(tconn, pi->vnr);
1688 if (!mdev)
1689 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001690
1691 sector = be64_to_cpu(p->sector);
1692
Philipp Reisner87eeee42011-01-19 14:16:30 +01001693 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001694 req = find_request(mdev, &mdev->read_requests, p->block_id, sector, false, __func__);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001695 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001696 if (unlikely(!req))
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001697 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001698
Bart Van Assche24c48302011-05-21 18:32:29 +02001699 /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid
Philipp Reisnerb411b362009-09-25 16:07:19 -07001700 * special casing it there for the various failure cases.
1701 * still no race with drbd_fail_pending_reads */
Andreas Gruenbachere2857212011-03-25 00:57:38 +01001702 err = recv_dless_read(mdev, req, sector, pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001703 if (!err)
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001704 req_mod(req, DATA_RECEIVED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001705 /* else: nothing. handled from drbd_disconnect...
1706 * I don't think we may complete this just yet
1707 * in case we are "on-disconnect: freeze" */
1708
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001709 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001710}
1711
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001712static int receive_RSDataReply(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001713{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001714 struct drbd_conf *mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001715 sector_t sector;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001716 int err;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001717 struct p_data *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001718
1719 mdev = vnr_to_mdev(tconn, pi->vnr);
1720 if (!mdev)
1721 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001722
1723 sector = be64_to_cpu(p->sector);
1724 D_ASSERT(p->block_id == ID_SYNCER);
1725
1726 if (get_ldev(mdev)) {
1727 /* data is submitted to disk within recv_resync_read.
1728 * corresponding put_ldev done below on error,
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001729 * or in drbd_peer_request_endio. */
Andreas Gruenbachere2857212011-03-25 00:57:38 +01001730 err = recv_resync_read(mdev, sector, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001731 } else {
1732 if (__ratelimit(&drbd_ratelimit_state))
1733 dev_err(DEV, "Can not write resync data to local disk.\n");
1734
Andreas Gruenbachere2857212011-03-25 00:57:38 +01001735 err = drbd_drain_block(mdev, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001736
Andreas Gruenbachere2857212011-03-25 00:57:38 +01001737 drbd_send_ack_dp(mdev, P_NEG_ACK, p, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001738 }
1739
Andreas Gruenbachere2857212011-03-25 00:57:38 +01001740 atomic_add(pi->size >> 9, &mdev->rs_sect_in);
Philipp Reisner778f2712010-07-06 11:14:00 +02001741
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001742 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001743}
1744
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001745static void restart_conflicting_writes(struct drbd_conf *mdev,
1746 sector_t sector, int size)
1747{
1748 struct drbd_interval *i;
1749 struct drbd_request *req;
1750
1751 drbd_for_each_overlap(i, &mdev->write_requests, sector, size) {
1752 if (!i->local)
1753 continue;
1754 req = container_of(i, struct drbd_request, i);
1755 if (req->rq_state & RQ_LOCAL_PENDING ||
1756 !(req->rq_state & RQ_POSTPONED))
1757 continue;
Lars Ellenberg2312f0b32011-11-24 10:36:25 +01001758 /* as it is RQ_POSTPONED, this will cause it to
1759 * be queued on the retry workqueue. */
1760 __req_mod(req, DISCARD_WRITE, NULL);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001761 }
1762}
1763
Andreas Gruenbachera990be42011-04-06 17:56:48 +02001764/*
1765 * e_end_block() is called in asender context via drbd_finish_peer_reqs().
Philipp Reisnerb411b362009-09-25 16:07:19 -07001766 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001767static int e_end_block(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001768{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001769 struct drbd_peer_request *peer_req =
1770 container_of(w, struct drbd_peer_request, w);
Philipp Reisner00d56942011-02-09 18:09:48 +01001771 struct drbd_conf *mdev = w->mdev;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001772 sector_t sector = peer_req->i.sector;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001773 int err = 0, pcmd;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001774
Philipp Reisner303d1442011-04-13 16:24:47 -07001775 if (peer_req->flags & EE_SEND_WRITE_ACK) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001776 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001777 pcmd = (mdev->state.conn >= C_SYNC_SOURCE &&
1778 mdev->state.conn <= C_PAUSED_SYNC_T &&
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001779 peer_req->flags & EE_MAY_SET_IN_SYNC) ?
Philipp Reisnerb411b362009-09-25 16:07:19 -07001780 P_RS_WRITE_ACK : P_WRITE_ACK;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001781 err = drbd_send_ack(mdev, pcmd, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001782 if (pcmd == P_RS_WRITE_ACK)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001783 drbd_set_in_sync(mdev, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001784 } else {
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001785 err = drbd_send_ack(mdev, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001786 /* we expect it to be marked out of sync anyways...
1787 * maybe assert this? */
1788 }
1789 dec_unacked(mdev);
1790 }
1791 /* we delete from the conflict detection hash _after_ we sent out the
1792 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */
Philipp Reisner302bdea2011-04-21 11:36:49 +02001793 if (peer_req->flags & EE_IN_INTERVAL_TREE) {
Philipp Reisner87eeee42011-01-19 14:16:30 +01001794 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001795 D_ASSERT(!drbd_interval_empty(&peer_req->i));
1796 drbd_remove_epoch_entry_interval(mdev, peer_req);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001797 if (peer_req->flags & EE_RESTART_REQUESTS)
1798 restart_conflicting_writes(mdev, sector, peer_req->i.size);
Philipp Reisner87eeee42011-01-19 14:16:30 +01001799 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +01001800 } else
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001801 D_ASSERT(drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001802
Philipp Reisner1e9dd292011-11-10 15:14:53 +01001803 drbd_may_finish_epoch(mdev->tconn, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001804
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001805 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001806}
1807
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001808static int e_send_ack(struct drbd_work *w, enum drbd_packet ack)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001809{
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001810 struct drbd_conf *mdev = w->mdev;
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001811 struct drbd_peer_request *peer_req =
1812 container_of(w, struct drbd_peer_request, w);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001813 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001814
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001815 err = drbd_send_ack(mdev, ack, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001816 dec_unacked(mdev);
1817
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001818 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001819}
1820
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001821static int e_send_discard_write(struct drbd_work *w, int unused)
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001822{
1823 return e_send_ack(w, P_DISCARD_WRITE);
1824}
1825
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001826static int e_send_retry_write(struct drbd_work *w, int unused)
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001827{
1828 struct drbd_tconn *tconn = w->mdev->tconn;
1829
1830 return e_send_ack(w, tconn->agreed_pro_version >= 100 ?
1831 P_RETRY_WRITE : P_DISCARD_WRITE);
1832}
1833
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001834static bool seq_greater(u32 a, u32 b)
1835{
1836 /*
1837 * We assume 32-bit wrap-around here.
1838 * For 24-bit wrap-around, we would have to shift:
1839 * a <<= 8; b <<= 8;
1840 */
1841 return (s32)a - (s32)b > 0;
1842}
1843
1844static u32 seq_max(u32 a, u32 b)
1845{
1846 return seq_greater(a, b) ? a : b;
1847}
1848
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001849static bool need_peer_seq(struct drbd_conf *mdev)
1850{
1851 struct drbd_tconn *tconn = mdev->tconn;
Philipp Reisner302bdea2011-04-21 11:36:49 +02001852 int tp;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001853
1854 /*
1855 * We only need to keep track of the last packet_seq number of our peer
1856 * if we are in dual-primary mode and we have the discard flag set; see
1857 * handle_write_conflicts().
1858 */
Philipp Reisner302bdea2011-04-21 11:36:49 +02001859
1860 rcu_read_lock();
1861 tp = rcu_dereference(mdev->tconn->net_conf)->two_primaries;
1862 rcu_read_unlock();
1863
1864 return tp && test_bit(DISCARD_CONCURRENT, &tconn->flags);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001865}
1866
Andreas Gruenbacher43ae0772011-02-03 18:42:08 +01001867static void update_peer_seq(struct drbd_conf *mdev, unsigned int peer_seq)
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001868{
Lars Ellenberg3c13b682011-02-23 16:10:01 +01001869 unsigned int newest_peer_seq;
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001870
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001871 if (need_peer_seq(mdev)) {
1872 spin_lock(&mdev->peer_seq_lock);
Lars Ellenberg3c13b682011-02-23 16:10:01 +01001873 newest_peer_seq = seq_max(mdev->peer_seq, peer_seq);
1874 mdev->peer_seq = newest_peer_seq;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001875 spin_unlock(&mdev->peer_seq_lock);
Lars Ellenberg3c13b682011-02-23 16:10:01 +01001876 /* wake up only if we actually changed mdev->peer_seq */
1877 if (peer_seq == newest_peer_seq)
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001878 wake_up(&mdev->seq_wait);
1879 }
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001880}
1881
Lars Ellenbergd93f6302012-03-26 15:49:13 +02001882static inline int overlaps(sector_t s1, int l1, sector_t s2, int l2)
1883{
1884 return !((s1 + (l1>>9) <= s2) || (s1 >= s2 + (l2>>9)));
1885}
1886
1887/* maybe change sync_ee into interval trees as well? */
1888static bool overlaping_resync_write(struct drbd_conf *mdev, struct drbd_peer_request *peer_req)
1889{
1890 struct drbd_peer_request *rs_req;
1891 bool rv = 0;
1892
1893 spin_lock_irq(&mdev->tconn->req_lock);
1894 list_for_each_entry(rs_req, &mdev->sync_ee, w.list) {
1895 if (overlaps(peer_req->i.sector, peer_req->i.size,
1896 rs_req->i.sector, rs_req->i.size)) {
1897 rv = 1;
1898 break;
1899 }
1900 }
1901 spin_unlock_irq(&mdev->tconn->req_lock);
1902
1903 if (rv)
1904 dev_warn(DEV, "WARN: Avoiding concurrent data/resync write to single sector.\n");
1905
1906 return rv;
1907}
1908
Philipp Reisnerb411b362009-09-25 16:07:19 -07001909/* Called from receive_Data.
1910 * Synchronize packets on sock with packets on msock.
1911 *
1912 * This is here so even when a P_DATA packet traveling via sock overtook an Ack
1913 * packet traveling on msock, they are still processed in the order they have
1914 * been sent.
1915 *
1916 * Note: we don't care for Ack packets overtaking P_DATA packets.
1917 *
1918 * In case packet_seq is larger than mdev->peer_seq number, there are
1919 * outstanding packets on the msock. We wait for them to arrive.
1920 * In case we are the logically next packet, we update mdev->peer_seq
1921 * ourselves. Correctly handles 32bit wrap around.
1922 *
1923 * Assume we have a 10 GBit connection, that is about 1<<30 byte per second,
1924 * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds
1925 * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have
1926 * 1<<9 == 512 seconds aka ages for the 32bit wrap around...
1927 *
1928 * returns 0 if we may process the packet,
1929 * -ERESTARTSYS if we were interrupted (by disconnect signal). */
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001930static int wait_for_and_update_peer_seq(struct drbd_conf *mdev, const u32 peer_seq)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001931{
1932 DEFINE_WAIT(wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001933 long timeout;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001934 int ret;
1935
1936 if (!need_peer_seq(mdev))
1937 return 0;
1938
Philipp Reisnerb411b362009-09-25 16:07:19 -07001939 spin_lock(&mdev->peer_seq_lock);
1940 for (;;) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001941 if (!seq_greater(peer_seq - 1, mdev->peer_seq)) {
1942 mdev->peer_seq = seq_max(mdev->peer_seq, peer_seq);
1943 ret = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001944 break;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001945 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001946 if (signal_pending(current)) {
1947 ret = -ERESTARTSYS;
1948 break;
1949 }
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001950 prepare_to_wait(&mdev->seq_wait, &wait, TASK_INTERRUPTIBLE);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001951 spin_unlock(&mdev->peer_seq_lock);
Philipp Reisner44ed1672011-04-19 17:10:19 +02001952 rcu_read_lock();
1953 timeout = rcu_dereference(mdev->tconn->net_conf)->ping_timeo*HZ/10;
1954 rcu_read_unlock();
Andreas Gruenbacher71b1c1e2011-03-01 15:40:43 +01001955 timeout = schedule_timeout(timeout);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001956 spin_lock(&mdev->peer_seq_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001957 if (!timeout) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001958 ret = -ETIMEDOUT;
Andreas Gruenbacher71b1c1e2011-03-01 15:40:43 +01001959 dev_err(DEV, "Timed out waiting for missing ack packets; disconnecting\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001960 break;
1961 }
1962 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001963 spin_unlock(&mdev->peer_seq_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001964 finish_wait(&mdev->seq_wait, &wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001965 return ret;
1966}
1967
Lars Ellenberg688593c2010-11-17 22:25:03 +01001968/* see also bio_flags_to_wire()
1969 * DRBD_REQ_*, because we need to semantically map the flags to data packet
1970 * flags and back. We may replicate to other kernel versions. */
1971static unsigned long wire_flags_to_bio(struct drbd_conf *mdev, u32 dpf)
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02001972{
Lars Ellenberg688593c2010-11-17 22:25:03 +01001973 return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
1974 (dpf & DP_FUA ? REQ_FUA : 0) |
1975 (dpf & DP_FLUSH ? REQ_FLUSH : 0) |
1976 (dpf & DP_DISCARD ? REQ_DISCARD : 0);
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02001977}
1978
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001979static void fail_postponed_requests(struct drbd_conf *mdev, sector_t sector,
1980 unsigned int size)
1981{
1982 struct drbd_interval *i;
1983
1984 repeat:
1985 drbd_for_each_overlap(i, &mdev->write_requests, sector, size) {
1986 struct drbd_request *req;
1987 struct bio_and_error m;
1988
1989 if (!i->local)
1990 continue;
1991 req = container_of(i, struct drbd_request, i);
1992 if (!(req->rq_state & RQ_POSTPONED))
1993 continue;
1994 req->rq_state &= ~RQ_POSTPONED;
1995 __req_mod(req, NEG_ACKED, &m);
1996 spin_unlock_irq(&mdev->tconn->req_lock);
1997 if (m.bio)
1998 complete_master_bio(mdev, &m);
1999 spin_lock_irq(&mdev->tconn->req_lock);
2000 goto repeat;
2001 }
2002}
2003
2004static int handle_write_conflicts(struct drbd_conf *mdev,
2005 struct drbd_peer_request *peer_req)
2006{
2007 struct drbd_tconn *tconn = mdev->tconn;
2008 bool resolve_conflicts = test_bit(DISCARD_CONCURRENT, &tconn->flags);
2009 sector_t sector = peer_req->i.sector;
2010 const unsigned int size = peer_req->i.size;
2011 struct drbd_interval *i;
2012 bool equal;
2013 int err;
2014
2015 /*
2016 * Inserting the peer request into the write_requests tree will prevent
2017 * new conflicting local requests from being added.
2018 */
2019 drbd_insert_interval(&mdev->write_requests, &peer_req->i);
2020
2021 repeat:
2022 drbd_for_each_overlap(i, &mdev->write_requests, sector, size) {
2023 if (i == &peer_req->i)
2024 continue;
2025
2026 if (!i->local) {
2027 /*
2028 * Our peer has sent a conflicting remote request; this
2029 * should not happen in a two-node setup. Wait for the
2030 * earlier peer request to complete.
2031 */
2032 err = drbd_wait_misc(mdev, i);
2033 if (err)
2034 goto out;
2035 goto repeat;
2036 }
2037
2038 equal = i->sector == sector && i->size == size;
2039 if (resolve_conflicts) {
2040 /*
2041 * If the peer request is fully contained within the
2042 * overlapping request, it can be discarded; otherwise,
2043 * it will be retried once all overlapping requests
2044 * have completed.
2045 */
2046 bool discard = i->sector <= sector && i->sector +
2047 (i->size >> 9) >= sector + (size >> 9);
2048
2049 if (!equal)
2050 dev_alert(DEV, "Concurrent writes detected: "
2051 "local=%llus +%u, remote=%llus +%u, "
2052 "assuming %s came first\n",
2053 (unsigned long long)i->sector, i->size,
2054 (unsigned long long)sector, size,
2055 discard ? "local" : "remote");
2056
2057 inc_unacked(mdev);
2058 peer_req->w.cb = discard ? e_send_discard_write :
2059 e_send_retry_write;
2060 list_add_tail(&peer_req->w.list, &mdev->done_ee);
2061 wake_asender(mdev->tconn);
2062
2063 err = -ENOENT;
2064 goto out;
2065 } else {
2066 struct drbd_request *req =
2067 container_of(i, struct drbd_request, i);
2068
2069 if (!equal)
2070 dev_alert(DEV, "Concurrent writes detected: "
2071 "local=%llus +%u, remote=%llus +%u\n",
2072 (unsigned long long)i->sector, i->size,
2073 (unsigned long long)sector, size);
2074
2075 if (req->rq_state & RQ_LOCAL_PENDING ||
2076 !(req->rq_state & RQ_POSTPONED)) {
2077 /*
2078 * Wait for the node with the discard flag to
2079 * decide if this request will be discarded or
2080 * retried. Requests that are discarded will
2081 * disappear from the write_requests tree.
2082 *
2083 * In addition, wait for the conflicting
2084 * request to finish locally before submitting
2085 * the conflicting peer request.
2086 */
2087 err = drbd_wait_misc(mdev, &req->i);
2088 if (err) {
2089 _conn_request_state(mdev->tconn,
2090 NS(conn, C_TIMEOUT),
2091 CS_HARD);
2092 fail_postponed_requests(mdev, sector, size);
2093 goto out;
2094 }
2095 goto repeat;
2096 }
2097 /*
2098 * Remember to restart the conflicting requests after
2099 * the new peer request has completed.
2100 */
2101 peer_req->flags |= EE_RESTART_REQUESTS;
2102 }
2103 }
2104 err = 0;
2105
2106 out:
2107 if (err)
2108 drbd_remove_epoch_entry_interval(mdev, peer_req);
2109 return err;
2110}
2111
Philipp Reisnerb411b362009-09-25 16:07:19 -07002112/* mirrored write */
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002113static int receive_Data(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002114{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002115 struct drbd_conf *mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002116 sector_t sector;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002117 struct drbd_peer_request *peer_req;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02002118 struct p_data *p = pi->data;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002119 u32 peer_seq = be32_to_cpu(p->seq_num);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002120 int rw = WRITE;
2121 u32 dp_flags;
Philipp Reisner302bdea2011-04-21 11:36:49 +02002122 int err, tp;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002123
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002124 mdev = vnr_to_mdev(tconn, pi->vnr);
2125 if (!mdev)
2126 return -EIO;
2127
Philipp Reisnerb411b362009-09-25 16:07:19 -07002128 if (!get_ldev(mdev)) {
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002129 int err2;
2130
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002131 err = wait_for_and_update_peer_seq(mdev, peer_seq);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002132 drbd_send_ack_dp(mdev, P_NEG_ACK, p, pi->size);
Philipp Reisner12038a32011-11-09 19:18:00 +01002133 atomic_inc(&tconn->current_epoch->epoch_size);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002134 err2 = drbd_drain_block(mdev, pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002135 if (!err)
2136 err = err2;
2137 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002138 }
2139
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01002140 /*
2141 * Corresponding put_ldev done either below (on various errors), or in
2142 * drbd_peer_request_endio, if we successfully submit the data at the
2143 * end of this function.
2144 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002145
2146 sector = be64_to_cpu(p->sector);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002147 peer_req = read_in_block(mdev, p->block_id, sector, pi->size);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002148 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002149 put_ldev(mdev);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002150 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002151 }
2152
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002153 peer_req->w.cb = e_end_block;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002154
Lars Ellenberg688593c2010-11-17 22:25:03 +01002155 dp_flags = be32_to_cpu(p->dp_flags);
2156 rw |= wire_flags_to_bio(mdev, dp_flags);
2157
2158 if (dp_flags & DP_MAY_SET_IN_SYNC)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002159 peer_req->flags |= EE_MAY_SET_IN_SYNC;
Lars Ellenberg688593c2010-11-17 22:25:03 +01002160
Philipp Reisner12038a32011-11-09 19:18:00 +01002161 spin_lock(&tconn->epoch_lock);
2162 peer_req->epoch = tconn->current_epoch;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002163 atomic_inc(&peer_req->epoch->epoch_size);
2164 atomic_inc(&peer_req->epoch->active);
Philipp Reisner12038a32011-11-09 19:18:00 +01002165 spin_unlock(&tconn->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002166
Philipp Reisner302bdea2011-04-21 11:36:49 +02002167 rcu_read_lock();
2168 tp = rcu_dereference(mdev->tconn->net_conf)->two_primaries;
2169 rcu_read_unlock();
2170 if (tp) {
2171 peer_req->flags |= EE_IN_INTERVAL_TREE;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002172 err = wait_for_and_update_peer_seq(mdev, peer_seq);
2173 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002174 goto out_interrupted;
Philipp Reisner87eeee42011-01-19 14:16:30 +01002175 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002176 err = handle_write_conflicts(mdev, peer_req);
2177 if (err) {
2178 spin_unlock_irq(&mdev->tconn->req_lock);
2179 if (err == -ENOENT) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002180 put_ldev(mdev);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002181 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002182 }
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002183 goto out_interrupted;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002184 }
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002185 } else
2186 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002187 list_add(&peer_req->w.list, &mdev->active_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002188 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002189
Lars Ellenbergd93f6302012-03-26 15:49:13 +02002190 if (mdev->state.conn == C_SYNC_TARGET)
2191 wait_event(mdev->ee_wait, !overlaping_resync_write(mdev, peer_req));
2192
Philipp Reisner303d1442011-04-13 16:24:47 -07002193 if (mdev->tconn->agreed_pro_version < 100) {
Philipp Reisner44ed1672011-04-19 17:10:19 +02002194 rcu_read_lock();
2195 switch (rcu_dereference(mdev->tconn->net_conf)->wire_protocol) {
Philipp Reisner303d1442011-04-13 16:24:47 -07002196 case DRBD_PROT_C:
2197 dp_flags |= DP_SEND_WRITE_ACK;
2198 break;
2199 case DRBD_PROT_B:
2200 dp_flags |= DP_SEND_RECEIVE_ACK;
2201 break;
2202 }
Philipp Reisner44ed1672011-04-19 17:10:19 +02002203 rcu_read_unlock();
Philipp Reisner303d1442011-04-13 16:24:47 -07002204 }
2205
2206 if (dp_flags & DP_SEND_WRITE_ACK) {
2207 peer_req->flags |= EE_SEND_WRITE_ACK;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002208 inc_unacked(mdev);
2209 /* corresponding dec_unacked() in e_end_block()
2210 * respective _drbd_clear_done_ee */
Philipp Reisner303d1442011-04-13 16:24:47 -07002211 }
2212
2213 if (dp_flags & DP_SEND_RECEIVE_ACK) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002214 /* I really don't like it that the receiver thread
2215 * sends on the msock, but anyways */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002216 drbd_send_ack(mdev, P_RECV_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002217 }
2218
Lars Ellenberg6719fb02010-10-18 23:04:07 +02002219 if (mdev->state.pdsk < D_INCONSISTENT) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002220 /* In case we have the only disk of the cluster, */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002221 drbd_set_out_of_sync(mdev, peer_req->i.sector, peer_req->i.size);
2222 peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
2223 peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
Lars Ellenberg181286a2011-03-31 15:18:56 +02002224 drbd_al_begin_io(mdev, &peer_req->i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002225 }
2226
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002227 err = drbd_submit_peer_request(mdev, peer_req, rw, DRBD_FAULT_DT_WR);
2228 if (!err)
2229 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002230
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002231 /* don't care for the reason here */
2232 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01002233 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002234 list_del(&peer_req->w.list);
2235 drbd_remove_epoch_entry_interval(mdev, peer_req);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002236 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002237 if (peer_req->flags & EE_CALL_AL_COMPLETE_IO)
Lars Ellenberg181286a2011-03-31 15:18:56 +02002238 drbd_al_complete_io(mdev, &peer_req->i);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002239
Philipp Reisnerb411b362009-09-25 16:07:19 -07002240out_interrupted:
Philipp Reisner1e9dd292011-11-10 15:14:53 +01002241 drbd_may_finish_epoch(tconn, peer_req->epoch, EV_PUT + EV_CLEANUP);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002242 put_ldev(mdev);
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +02002243 drbd_free_peer_req(mdev, peer_req);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002244 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002245}
2246
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002247/* We may throttle resync, if the lower device seems to be busy,
2248 * and current sync rate is above c_min_rate.
2249 *
2250 * To decide whether or not the lower device is busy, we use a scheme similar
2251 * to MD RAID is_mddev_idle(): if the partition stats reveal "significant"
2252 * (more than 64 sectors) of activity we cannot account for with our own resync
2253 * activity, it obviously is "busy".
2254 *
2255 * The current sync rate used here uses only the most recent two step marks,
2256 * to have a short time average so we can react faster.
2257 */
Philipp Reisnere3555d82010-11-07 15:56:29 +01002258int drbd_rs_should_slow_down(struct drbd_conf *mdev, sector_t sector)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002259{
2260 struct gendisk *disk = mdev->ldev->backing_bdev->bd_contains->bd_disk;
2261 unsigned long db, dt, dbdt;
Philipp Reisnere3555d82010-11-07 15:56:29 +01002262 struct lc_element *tmp;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002263 int curr_events;
2264 int throttle = 0;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002265 unsigned int c_min_rate;
2266
2267 rcu_read_lock();
2268 c_min_rate = rcu_dereference(mdev->ldev->disk_conf)->c_min_rate;
2269 rcu_read_unlock();
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002270
2271 /* feature disabled? */
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002272 if (c_min_rate == 0)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002273 return 0;
2274
Philipp Reisnere3555d82010-11-07 15:56:29 +01002275 spin_lock_irq(&mdev->al_lock);
2276 tmp = lc_find(mdev->resync, BM_SECT_TO_EXT(sector));
2277 if (tmp) {
2278 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
2279 if (test_bit(BME_PRIORITY, &bm_ext->flags)) {
2280 spin_unlock_irq(&mdev->al_lock);
2281 return 0;
2282 }
2283 /* Do not slow down if app IO is already waiting for this extent */
2284 }
2285 spin_unlock_irq(&mdev->al_lock);
2286
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002287 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
2288 (int)part_stat_read(&disk->part0, sectors[1]) -
2289 atomic_read(&mdev->rs_sect_ev);
Philipp Reisnere3555d82010-11-07 15:56:29 +01002290
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002291 if (!mdev->rs_last_events || curr_events - mdev->rs_last_events > 64) {
2292 unsigned long rs_left;
2293 int i;
2294
2295 mdev->rs_last_events = curr_events;
2296
2297 /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
2298 * approx. */
Lars Ellenberg2649f082010-11-05 10:05:47 +01002299 i = (mdev->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
2300
2301 if (mdev->state.conn == C_VERIFY_S || mdev->state.conn == C_VERIFY_T)
2302 rs_left = mdev->ov_left;
2303 else
2304 rs_left = drbd_bm_total_weight(mdev) - mdev->rs_failed;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002305
2306 dt = ((long)jiffies - (long)mdev->rs_mark_time[i]) / HZ;
2307 if (!dt)
2308 dt++;
2309 db = mdev->rs_mark_left[i] - rs_left;
2310 dbdt = Bit2KB(db/dt);
2311
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002312 if (dbdt > c_min_rate)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002313 throttle = 1;
2314 }
2315 return throttle;
2316}
2317
2318
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002319static int receive_DataRequest(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002320{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002321 struct drbd_conf *mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002322 sector_t sector;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002323 sector_t capacity;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002324 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002325 struct digest_info *di = NULL;
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002326 int size, verb;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002327 unsigned int fault_type;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02002328 struct p_block_req *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002329
2330 mdev = vnr_to_mdev(tconn, pi->vnr);
2331 if (!mdev)
2332 return -EIO;
2333 capacity = drbd_get_capacity(mdev->this_bdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002334
2335 sector = be64_to_cpu(p->sector);
2336 size = be32_to_cpu(p->blksize);
2337
Andreas Gruenbacherc670a392011-02-21 12:41:39 +01002338 if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002339 dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2340 (unsigned long long)sector, size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002341 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002342 }
2343 if (sector + (size>>9) > capacity) {
2344 dev_err(DEV, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
2345 (unsigned long long)sector, size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002346 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002347 }
2348
2349 if (!get_ldev_if_state(mdev, D_UP_TO_DATE)) {
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002350 verb = 1;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002351 switch (pi->cmd) {
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002352 case P_DATA_REQUEST:
2353 drbd_send_ack_rp(mdev, P_NEG_DREPLY, p);
2354 break;
2355 case P_RS_DATA_REQUEST:
2356 case P_CSUM_RS_REQUEST:
2357 case P_OV_REQUEST:
2358 drbd_send_ack_rp(mdev, P_NEG_RS_DREPLY , p);
2359 break;
2360 case P_OV_REPLY:
2361 verb = 0;
2362 dec_rs_pending(mdev);
2363 drbd_send_ack_ex(mdev, P_OV_RESULT, sector, size, ID_IN_SYNC);
2364 break;
2365 default:
Andreas Gruenbacher49ba9b12011-03-25 00:35:45 +01002366 BUG();
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002367 }
2368 if (verb && __ratelimit(&drbd_ratelimit_state))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002369 dev_err(DEV, "Can not satisfy peer's read request, "
2370 "no local data.\n");
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002371
Lars Ellenberga821cc42010-09-06 12:31:37 +02002372 /* drain possibly payload */
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002373 return drbd_drain_block(mdev, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002374 }
2375
2376 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
2377 * "criss-cross" setup, that might cause write-out on some other DRBD,
2378 * which in turn might block on the other node at this very place. */
Andreas Gruenbacher0db55362011-04-06 16:09:15 +02002379 peer_req = drbd_alloc_peer_req(mdev, p->block_id, sector, size, GFP_NOIO);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002380 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002381 put_ldev(mdev);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002382 return -ENOMEM;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002383 }
2384
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002385 switch (pi->cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002386 case P_DATA_REQUEST:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002387 peer_req->w.cb = w_e_end_data_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002388 fault_type = DRBD_FAULT_DT_RD;
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002389 /* application IO, don't drbd_rs_begin_io */
2390 goto submit;
2391
Philipp Reisnerb411b362009-09-25 16:07:19 -07002392 case P_RS_DATA_REQUEST:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002393 peer_req->w.cb = w_e_end_rsdata_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002394 fault_type = DRBD_FAULT_RS_RD;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002395 /* used in the sector offset progress display */
2396 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002397 break;
2398
2399 case P_OV_REPLY:
2400 case P_CSUM_RS_REQUEST:
2401 fault_type = DRBD_FAULT_RS_RD;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002402 di = kmalloc(sizeof(*di) + pi->size, GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002403 if (!di)
2404 goto out_free_e;
2405
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002406 di->digest_size = pi->size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002407 di->digest = (((char *)di)+sizeof(struct digest_info));
2408
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002409 peer_req->digest = di;
2410 peer_req->flags |= EE_HAS_DIGEST;
Lars Ellenbergc36c3ce2010-08-11 20:42:55 +02002411
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002412 if (drbd_recv_all(mdev->tconn, di->digest, pi->size))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002413 goto out_free_e;
2414
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002415 if (pi->cmd == P_CSUM_RS_REQUEST) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002416 D_ASSERT(mdev->tconn->agreed_pro_version >= 89);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002417 peer_req->w.cb = w_e_end_csum_rs_req;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002418 /* used in the sector offset progress display */
2419 mdev->bm_resync_fo = BM_SECT_TO_BIT(sector);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002420 } else if (pi->cmd == P_OV_REPLY) {
Lars Ellenberg2649f082010-11-05 10:05:47 +01002421 /* track progress, we may need to throttle */
2422 atomic_add(size >> 9, &mdev->rs_sect_in);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002423 peer_req->w.cb = w_e_end_ov_reply;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002424 dec_rs_pending(mdev);
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002425 /* drbd_rs_begin_io done when we sent this request,
2426 * but accounting still needs to be done. */
2427 goto submit_for_resync;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002428 }
2429 break;
2430
2431 case P_OV_REQUEST:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002432 if (mdev->ov_start_sector == ~(sector_t)0 &&
Philipp Reisner31890f42011-01-19 14:12:51 +01002433 mdev->tconn->agreed_pro_version >= 90) {
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002434 unsigned long now = jiffies;
2435 int i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002436 mdev->ov_start_sector = sector;
2437 mdev->ov_position = sector;
Lars Ellenberg30b743a2010-11-05 09:39:06 +01002438 mdev->ov_left = drbd_bm_bits(mdev) - BM_SECT_TO_BIT(sector);
2439 mdev->rs_total = mdev->ov_left;
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002440 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
2441 mdev->rs_mark_left[i] = mdev->ov_left;
2442 mdev->rs_mark_time[i] = now;
2443 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002444 dev_info(DEV, "Online Verify start sector: %llu\n",
2445 (unsigned long long)sector);
2446 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002447 peer_req->w.cb = w_e_end_ov_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002448 fault_type = DRBD_FAULT_RS_RD;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002449 break;
2450
Philipp Reisnerb411b362009-09-25 16:07:19 -07002451 default:
Andreas Gruenbacher49ba9b12011-03-25 00:35:45 +01002452 BUG();
Philipp Reisnerb411b362009-09-25 16:07:19 -07002453 }
2454
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002455 /* Throttle, drbd_rs_begin_io and submit should become asynchronous
2456 * wrt the receiver, but it is not as straightforward as it may seem.
2457 * Various places in the resync start and stop logic assume resync
2458 * requests are processed in order, requeuing this on the worker thread
2459 * introduces a bunch of new code for synchronization between threads.
2460 *
2461 * Unlimited throttling before drbd_rs_begin_io may stall the resync
2462 * "forever", throttling after drbd_rs_begin_io will lock that extent
2463 * for application writes for the same time. For now, just throttle
2464 * here, where the rest of the code expects the receiver to sleep for
2465 * a while, anyways.
2466 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002467
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002468 /* Throttle before drbd_rs_begin_io, as that locks out application IO;
2469 * this defers syncer requests for some time, before letting at least
2470 * on request through. The resync controller on the receiving side
2471 * will adapt to the incoming rate accordingly.
2472 *
2473 * We cannot throttle here if remote is Primary/SyncTarget:
2474 * we would also throttle its application reads.
2475 * In that case, throttling is done on the SyncTarget only.
2476 */
Philipp Reisnere3555d82010-11-07 15:56:29 +01002477 if (mdev->state.peer != R_PRIMARY && drbd_rs_should_slow_down(mdev, sector))
2478 schedule_timeout_uninterruptible(HZ/10);
2479 if (drbd_rs_begin_io(mdev, sector))
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002480 goto out_free_e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002481
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002482submit_for_resync:
2483 atomic_add(size >> 9, &mdev->rs_sect_ev);
2484
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002485submit:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002486 inc_unacked(mdev);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002487 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002488 list_add_tail(&peer_req->w.list, &mdev->read_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002489 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002490
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01002491 if (drbd_submit_peer_request(mdev, peer_req, READ, fault_type) == 0)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002492 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002493
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002494 /* don't care for the reason here */
2495 dev_err(DEV, "submit failed, triggering re-connect\n");
Philipp Reisner87eeee42011-01-19 14:16:30 +01002496 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002497 list_del(&peer_req->w.list);
Philipp Reisner87eeee42011-01-19 14:16:30 +01002498 spin_unlock_irq(&mdev->tconn->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002499 /* no drbd_rs_complete_io(), we are dropping the connection anyways */
2500
Philipp Reisnerb411b362009-09-25 16:07:19 -07002501out_free_e:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002502 put_ldev(mdev);
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +02002503 drbd_free_peer_req(mdev, peer_req);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002504 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002505}
2506
2507static int drbd_asb_recover_0p(struct drbd_conf *mdev) __must_hold(local)
2508{
2509 int self, peer, rv = -100;
2510 unsigned long ch_self, ch_peer;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002511 enum drbd_after_sb_p after_sb_0p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002512
2513 self = mdev->ldev->md.uuid[UI_BITMAP] & 1;
2514 peer = mdev->p_uuid[UI_BITMAP] & 1;
2515
2516 ch_peer = mdev->p_uuid[UI_SIZE];
2517 ch_self = mdev->comm_bm_set;
2518
Philipp Reisner44ed1672011-04-19 17:10:19 +02002519 rcu_read_lock();
2520 after_sb_0p = rcu_dereference(mdev->tconn->net_conf)->after_sb_0p;
2521 rcu_read_unlock();
2522 switch (after_sb_0p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002523 case ASB_CONSENSUS:
2524 case ASB_DISCARD_SECONDARY:
2525 case ASB_CALL_HELPER:
Philipp Reisner44ed1672011-04-19 17:10:19 +02002526 case ASB_VIOLENTLY:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002527 dev_err(DEV, "Configuration error.\n");
2528 break;
2529 case ASB_DISCONNECT:
2530 break;
2531 case ASB_DISCARD_YOUNGER_PRI:
2532 if (self == 0 && peer == 1) {
2533 rv = -1;
2534 break;
2535 }
2536 if (self == 1 && peer == 0) {
2537 rv = 1;
2538 break;
2539 }
2540 /* Else fall through to one of the other strategies... */
2541 case ASB_DISCARD_OLDER_PRI:
2542 if (self == 0 && peer == 1) {
2543 rv = 1;
2544 break;
2545 }
2546 if (self == 1 && peer == 0) {
2547 rv = -1;
2548 break;
2549 }
2550 /* Else fall through to one of the other strategies... */
Lars Ellenbergad19bf62009-10-14 09:36:49 +02002551 dev_warn(DEV, "Discard younger/older primary did not find a decision\n"
Philipp Reisnerb411b362009-09-25 16:07:19 -07002552 "Using discard-least-changes instead\n");
2553 case ASB_DISCARD_ZERO_CHG:
2554 if (ch_peer == 0 && ch_self == 0) {
Philipp Reisner25703f82011-02-07 14:35:25 +01002555 rv = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002556 ? -1 : 1;
2557 break;
2558 } else {
2559 if (ch_peer == 0) { rv = 1; break; }
2560 if (ch_self == 0) { rv = -1; break; }
2561 }
Philipp Reisner44ed1672011-04-19 17:10:19 +02002562 if (after_sb_0p == ASB_DISCARD_ZERO_CHG)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002563 break;
2564 case ASB_DISCARD_LEAST_CHG:
2565 if (ch_self < ch_peer)
2566 rv = -1;
2567 else if (ch_self > ch_peer)
2568 rv = 1;
2569 else /* ( ch_self == ch_peer ) */
2570 /* Well, then use something else. */
Philipp Reisner25703f82011-02-07 14:35:25 +01002571 rv = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002572 ? -1 : 1;
2573 break;
2574 case ASB_DISCARD_LOCAL:
2575 rv = -1;
2576 break;
2577 case ASB_DISCARD_REMOTE:
2578 rv = 1;
2579 }
2580
2581 return rv;
2582}
2583
2584static int drbd_asb_recover_1p(struct drbd_conf *mdev) __must_hold(local)
2585{
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002586 int hg, rv = -100;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002587 enum drbd_after_sb_p after_sb_1p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002588
Philipp Reisner44ed1672011-04-19 17:10:19 +02002589 rcu_read_lock();
2590 after_sb_1p = rcu_dereference(mdev->tconn->net_conf)->after_sb_1p;
2591 rcu_read_unlock();
2592 switch (after_sb_1p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002593 case ASB_DISCARD_YOUNGER_PRI:
2594 case ASB_DISCARD_OLDER_PRI:
2595 case ASB_DISCARD_LEAST_CHG:
2596 case ASB_DISCARD_LOCAL:
2597 case ASB_DISCARD_REMOTE:
Philipp Reisner44ed1672011-04-19 17:10:19 +02002598 case ASB_DISCARD_ZERO_CHG:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002599 dev_err(DEV, "Configuration error.\n");
2600 break;
2601 case ASB_DISCONNECT:
2602 break;
2603 case ASB_CONSENSUS:
2604 hg = drbd_asb_recover_0p(mdev);
2605 if (hg == -1 && mdev->state.role == R_SECONDARY)
2606 rv = hg;
2607 if (hg == 1 && mdev->state.role == R_PRIMARY)
2608 rv = hg;
2609 break;
2610 case ASB_VIOLENTLY:
2611 rv = drbd_asb_recover_0p(mdev);
2612 break;
2613 case ASB_DISCARD_SECONDARY:
2614 return mdev->state.role == R_PRIMARY ? 1 : -1;
2615 case ASB_CALL_HELPER:
2616 hg = drbd_asb_recover_0p(mdev);
2617 if (hg == -1 && mdev->state.role == R_PRIMARY) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002618 enum drbd_state_rv rv2;
2619
2620 drbd_set_role(mdev, R_SECONDARY, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002621 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2622 * we might be here in C_WF_REPORT_PARAMS which is transient.
2623 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002624 rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
2625 if (rv2 != SS_SUCCESS) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002626 drbd_khelper(mdev, "pri-lost-after-sb");
2627 } else {
2628 dev_warn(DEV, "Successfully gave up primary role.\n");
2629 rv = hg;
2630 }
2631 } else
2632 rv = hg;
2633 }
2634
2635 return rv;
2636}
2637
2638static int drbd_asb_recover_2p(struct drbd_conf *mdev) __must_hold(local)
2639{
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002640 int hg, rv = -100;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002641 enum drbd_after_sb_p after_sb_2p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002642
Philipp Reisner44ed1672011-04-19 17:10:19 +02002643 rcu_read_lock();
2644 after_sb_2p = rcu_dereference(mdev->tconn->net_conf)->after_sb_2p;
2645 rcu_read_unlock();
2646 switch (after_sb_2p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002647 case ASB_DISCARD_YOUNGER_PRI:
2648 case ASB_DISCARD_OLDER_PRI:
2649 case ASB_DISCARD_LEAST_CHG:
2650 case ASB_DISCARD_LOCAL:
2651 case ASB_DISCARD_REMOTE:
2652 case ASB_CONSENSUS:
2653 case ASB_DISCARD_SECONDARY:
Philipp Reisner44ed1672011-04-19 17:10:19 +02002654 case ASB_DISCARD_ZERO_CHG:
Philipp Reisnerb411b362009-09-25 16:07:19 -07002655 dev_err(DEV, "Configuration error.\n");
2656 break;
2657 case ASB_VIOLENTLY:
2658 rv = drbd_asb_recover_0p(mdev);
2659 break;
2660 case ASB_DISCONNECT:
2661 break;
2662 case ASB_CALL_HELPER:
2663 hg = drbd_asb_recover_0p(mdev);
2664 if (hg == -1) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002665 enum drbd_state_rv rv2;
2666
Philipp Reisnerb411b362009-09-25 16:07:19 -07002667 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2668 * we might be here in C_WF_REPORT_PARAMS which is transient.
2669 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002670 rv2 = drbd_change_state(mdev, CS_VERBOSE, NS(role, R_SECONDARY));
2671 if (rv2 != SS_SUCCESS) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002672 drbd_khelper(mdev, "pri-lost-after-sb");
2673 } else {
2674 dev_warn(DEV, "Successfully gave up primary role.\n");
2675 rv = hg;
2676 }
2677 } else
2678 rv = hg;
2679 }
2680
2681 return rv;
2682}
2683
2684static void drbd_uuid_dump(struct drbd_conf *mdev, char *text, u64 *uuid,
2685 u64 bits, u64 flags)
2686{
2687 if (!uuid) {
2688 dev_info(DEV, "%s uuid info vanished while I was looking!\n", text);
2689 return;
2690 }
2691 dev_info(DEV, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n",
2692 text,
2693 (unsigned long long)uuid[UI_CURRENT],
2694 (unsigned long long)uuid[UI_BITMAP],
2695 (unsigned long long)uuid[UI_HISTORY_START],
2696 (unsigned long long)uuid[UI_HISTORY_END],
2697 (unsigned long long)bits,
2698 (unsigned long long)flags);
2699}
2700
2701/*
2702 100 after split brain try auto recover
2703 2 C_SYNC_SOURCE set BitMap
2704 1 C_SYNC_SOURCE use BitMap
2705 0 no Sync
2706 -1 C_SYNC_TARGET use BitMap
2707 -2 C_SYNC_TARGET set BitMap
2708 -100 after split brain, disconnect
2709-1000 unrelated data
Philipp Reisner4a23f262011-01-11 17:42:17 +01002710-1091 requires proto 91
2711-1096 requires proto 96
Philipp Reisnerb411b362009-09-25 16:07:19 -07002712 */
2713static int drbd_uuid_compare(struct drbd_conf *mdev, int *rule_nr) __must_hold(local)
2714{
2715 u64 self, peer;
2716 int i, j;
2717
2718 self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2719 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
2720
2721 *rule_nr = 10;
2722 if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED)
2723 return 0;
2724
2725 *rule_nr = 20;
2726 if ((self == UUID_JUST_CREATED || self == (u64)0) &&
2727 peer != UUID_JUST_CREATED)
2728 return -2;
2729
2730 *rule_nr = 30;
2731 if (self != UUID_JUST_CREATED &&
2732 (peer == UUID_JUST_CREATED || peer == (u64)0))
2733 return 2;
2734
2735 if (self == peer) {
2736 int rct, dc; /* roles at crash time */
2737
2738 if (mdev->p_uuid[UI_BITMAP] == (u64)0 && mdev->ldev->md.uuid[UI_BITMAP] != (u64)0) {
2739
Philipp Reisner31890f42011-01-19 14:12:51 +01002740 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002741 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002742
2743 if ((mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
2744 (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
2745 dev_info(DEV, "was SyncSource, missed the resync finished event, corrected myself:\n");
2746 drbd_uuid_set_bm(mdev, 0UL);
2747
2748 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
2749 mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
2750 *rule_nr = 34;
2751 } else {
2752 dev_info(DEV, "was SyncSource (peer failed to write sync_uuid)\n");
2753 *rule_nr = 36;
2754 }
2755
2756 return 1;
2757 }
2758
2759 if (mdev->ldev->md.uuid[UI_BITMAP] == (u64)0 && mdev->p_uuid[UI_BITMAP] != (u64)0) {
2760
Philipp Reisner31890f42011-01-19 14:12:51 +01002761 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002762 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002763
2764 if ((mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (mdev->p_uuid[UI_BITMAP] & ~((u64)1)) &&
2765 (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
2766 dev_info(DEV, "was SyncTarget, peer missed the resync finished event, corrected peer:\n");
2767
2768 mdev->p_uuid[UI_HISTORY_START + 1] = mdev->p_uuid[UI_HISTORY_START];
2769 mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_BITMAP];
2770 mdev->p_uuid[UI_BITMAP] = 0UL;
2771
2772 drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2773 *rule_nr = 35;
2774 } else {
2775 dev_info(DEV, "was SyncTarget (failed to write sync_uuid)\n");
2776 *rule_nr = 37;
2777 }
2778
2779 return -1;
2780 }
2781
2782 /* Common power [off|failure] */
2783 rct = (test_bit(CRASHED_PRIMARY, &mdev->flags) ? 1 : 0) +
2784 (mdev->p_uuid[UI_FLAGS] & 2);
2785 /* lowest bit is set when we were primary,
2786 * next bit (weight 2) is set when peer was primary */
2787 *rule_nr = 40;
2788
2789 switch (rct) {
2790 case 0: /* !self_pri && !peer_pri */ return 0;
2791 case 1: /* self_pri && !peer_pri */ return 1;
2792 case 2: /* !self_pri && peer_pri */ return -1;
2793 case 3: /* self_pri && peer_pri */
Philipp Reisner25703f82011-02-07 14:35:25 +01002794 dc = test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002795 return dc ? -1 : 1;
2796 }
2797 }
2798
2799 *rule_nr = 50;
2800 peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1);
2801 if (self == peer)
2802 return -1;
2803
2804 *rule_nr = 51;
2805 peer = mdev->p_uuid[UI_HISTORY_START] & ~((u64)1);
2806 if (self == peer) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002807 if (mdev->tconn->agreed_pro_version < 96 ?
Philipp Reisner4a23f262011-01-11 17:42:17 +01002808 (mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
2809 (mdev->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
2810 peer + UUID_NEW_BM_OFFSET == (mdev->p_uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002811 /* The last P_SYNC_UUID did not get though. Undo the last start of
2812 resync as sync source modifications of the peer's UUIDs. */
2813
Philipp Reisner31890f42011-01-19 14:12:51 +01002814 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002815 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002816
2817 mdev->p_uuid[UI_BITMAP] = mdev->p_uuid[UI_HISTORY_START];
2818 mdev->p_uuid[UI_HISTORY_START] = mdev->p_uuid[UI_HISTORY_START + 1];
Philipp Reisner4a23f262011-01-11 17:42:17 +01002819
2820 dev_info(DEV, "Did not got last syncUUID packet, corrected:\n");
2821 drbd_uuid_dump(mdev, "peer", mdev->p_uuid, mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2822
Philipp Reisnerb411b362009-09-25 16:07:19 -07002823 return -1;
2824 }
2825 }
2826
2827 *rule_nr = 60;
2828 self = mdev->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2829 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2830 peer = mdev->p_uuid[i] & ~((u64)1);
2831 if (self == peer)
2832 return -2;
2833 }
2834
2835 *rule_nr = 70;
2836 self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2837 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
2838 if (self == peer)
2839 return 1;
2840
2841 *rule_nr = 71;
2842 self = mdev->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
2843 if (self == peer) {
Philipp Reisner31890f42011-01-19 14:12:51 +01002844 if (mdev->tconn->agreed_pro_version < 96 ?
Philipp Reisner4a23f262011-01-11 17:42:17 +01002845 (mdev->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
2846 (mdev->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
2847 self + UUID_NEW_BM_OFFSET == (mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002848 /* The last P_SYNC_UUID did not get though. Undo the last start of
2849 resync as sync source modifications of our UUIDs. */
2850
Philipp Reisner31890f42011-01-19 14:12:51 +01002851 if (mdev->tconn->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002852 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002853
2854 _drbd_uuid_set(mdev, UI_BITMAP, mdev->ldev->md.uuid[UI_HISTORY_START]);
2855 _drbd_uuid_set(mdev, UI_HISTORY_START, mdev->ldev->md.uuid[UI_HISTORY_START + 1]);
2856
Philipp Reisner4a23f262011-01-11 17:42:17 +01002857 dev_info(DEV, "Last syncUUID did not get through, corrected:\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002858 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid,
2859 mdev->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(mdev) : 0, 0);
2860
2861 return 1;
2862 }
2863 }
2864
2865
2866 *rule_nr = 80;
Philipp Reisnerd8c2a362009-11-18 15:52:51 +01002867 peer = mdev->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002868 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2869 self = mdev->ldev->md.uuid[i] & ~((u64)1);
2870 if (self == peer)
2871 return 2;
2872 }
2873
2874 *rule_nr = 90;
2875 self = mdev->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2876 peer = mdev->p_uuid[UI_BITMAP] & ~((u64)1);
2877 if (self == peer && self != ((u64)0))
2878 return 100;
2879
2880 *rule_nr = 100;
2881 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
2882 self = mdev->ldev->md.uuid[i] & ~((u64)1);
2883 for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) {
2884 peer = mdev->p_uuid[j] & ~((u64)1);
2885 if (self == peer)
2886 return -100;
2887 }
2888 }
2889
2890 return -1000;
2891}
2892
2893/* drbd_sync_handshake() returns the new conn state on success, or
2894 CONN_MASK (-1) on failure.
2895 */
2896static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_role peer_role,
2897 enum drbd_disk_state peer_disk) __must_hold(local)
2898{
Philipp Reisnerb411b362009-09-25 16:07:19 -07002899 enum drbd_conns rv = C_MASK;
2900 enum drbd_disk_state mydisk;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002901 struct net_conf *nc;
Andreas Gruenbacher6dff2902011-06-28 14:18:12 +02002902 int hg, rule_nr, rr_conflict, tentative;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002903
2904 mydisk = mdev->state.disk;
2905 if (mydisk == D_NEGOTIATING)
2906 mydisk = mdev->new_state_tmp.disk;
2907
2908 dev_info(DEV, "drbd_sync_handshake:\n");
2909 drbd_uuid_dump(mdev, "self", mdev->ldev->md.uuid, mdev->comm_bm_set, 0);
2910 drbd_uuid_dump(mdev, "peer", mdev->p_uuid,
2911 mdev->p_uuid[UI_SIZE], mdev->p_uuid[UI_FLAGS]);
2912
2913 hg = drbd_uuid_compare(mdev, &rule_nr);
2914
2915 dev_info(DEV, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
2916
2917 if (hg == -1000) {
2918 dev_alert(DEV, "Unrelated data, aborting!\n");
2919 return C_MASK;
2920 }
Philipp Reisner4a23f262011-01-11 17:42:17 +01002921 if (hg < -1000) {
2922 dev_alert(DEV, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002923 return C_MASK;
2924 }
2925
2926 if ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) ||
2927 (peer_disk == D_INCONSISTENT && mydisk > D_INCONSISTENT)) {
2928 int f = (hg == -100) || abs(hg) == 2;
2929 hg = mydisk > D_INCONSISTENT ? 1 : -1;
2930 if (f)
2931 hg = hg*2;
2932 dev_info(DEV, "Becoming sync %s due to disk states.\n",
2933 hg > 0 ? "source" : "target");
2934 }
2935
Adam Gandelman3a11a482010-04-08 16:48:23 -07002936 if (abs(hg) == 100)
2937 drbd_khelper(mdev, "initial-split-brain");
2938
Philipp Reisner44ed1672011-04-19 17:10:19 +02002939 rcu_read_lock();
2940 nc = rcu_dereference(mdev->tconn->net_conf);
2941
2942 if (hg == 100 || (hg == -100 && nc->always_asbp)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002943 int pcount = (mdev->state.role == R_PRIMARY)
2944 + (peer_role == R_PRIMARY);
2945 int forced = (hg == -100);
2946
2947 switch (pcount) {
2948 case 0:
2949 hg = drbd_asb_recover_0p(mdev);
2950 break;
2951 case 1:
2952 hg = drbd_asb_recover_1p(mdev);
2953 break;
2954 case 2:
2955 hg = drbd_asb_recover_2p(mdev);
2956 break;
2957 }
2958 if (abs(hg) < 100) {
2959 dev_warn(DEV, "Split-Brain detected, %d primaries, "
2960 "automatically solved. Sync from %s node\n",
2961 pcount, (hg < 0) ? "peer" : "this");
2962 if (forced) {
2963 dev_warn(DEV, "Doing a full sync, since"
2964 " UUIDs where ambiguous.\n");
2965 hg = hg*2;
2966 }
2967 }
2968 }
2969
2970 if (hg == -100) {
Philipp Reisner08b165b2011-09-05 16:22:33 +02002971 if (test_bit(DISCARD_MY_DATA, &mdev->flags) && !(mdev->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002972 hg = -1;
Philipp Reisner08b165b2011-09-05 16:22:33 +02002973 if (!test_bit(DISCARD_MY_DATA, &mdev->flags) && (mdev->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002974 hg = 1;
2975
2976 if (abs(hg) < 100)
2977 dev_warn(DEV, "Split-Brain detected, manually solved. "
2978 "Sync from %s node\n",
2979 (hg < 0) ? "peer" : "this");
2980 }
Philipp Reisner44ed1672011-04-19 17:10:19 +02002981 rr_conflict = nc->rr_conflict;
Andreas Gruenbacher6dff2902011-06-28 14:18:12 +02002982 tentative = nc->tentative;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002983 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07002984
2985 if (hg == -100) {
Lars Ellenberg580b9762010-02-26 23:15:23 +01002986 /* FIXME this log message is not correct if we end up here
2987 * after an attempted attach on a diskless node.
2988 * We just refuse to attach -- well, we drop the "connection"
2989 * to that disk, in a way... */
Adam Gandelman3a11a482010-04-08 16:48:23 -07002990 dev_alert(DEV, "Split-Brain detected but unresolved, dropping connection!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002991 drbd_khelper(mdev, "split-brain");
2992 return C_MASK;
2993 }
2994
2995 if (hg > 0 && mydisk <= D_INCONSISTENT) {
2996 dev_err(DEV, "I shall become SyncSource, but I am inconsistent!\n");
2997 return C_MASK;
2998 }
2999
3000 if (hg < 0 && /* by intention we do not use mydisk here. */
3001 mdev->state.role == R_PRIMARY && mdev->state.disk >= D_CONSISTENT) {
Philipp Reisner44ed1672011-04-19 17:10:19 +02003002 switch (rr_conflict) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003003 case ASB_CALL_HELPER:
3004 drbd_khelper(mdev, "pri-lost");
3005 /* fall through */
3006 case ASB_DISCONNECT:
3007 dev_err(DEV, "I shall become SyncTarget, but I am primary!\n");
3008 return C_MASK;
3009 case ASB_VIOLENTLY:
3010 dev_warn(DEV, "Becoming SyncTarget, violating the stable-data"
3011 "assumption\n");
3012 }
3013 }
3014
Andreas Gruenbacher6dff2902011-06-28 14:18:12 +02003015 if (tentative || test_bit(CONN_DRY_RUN, &mdev->tconn->flags)) {
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003016 if (hg == 0)
3017 dev_info(DEV, "dry-run connect: No resync, would become Connected immediately.\n");
3018 else
3019 dev_info(DEV, "dry-run connect: Would become %s, doing a %s resync.",
3020 drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET),
3021 abs(hg) >= 2 ? "full" : "bit-map based");
3022 return C_MASK;
3023 }
3024
Philipp Reisnerb411b362009-09-25 16:07:19 -07003025 if (abs(hg) >= 2) {
3026 dev_info(DEV, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003027 if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
3028 BM_LOCKED_SET_ALLOWED))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003029 return C_MASK;
3030 }
3031
3032 if (hg > 0) { /* become sync source. */
3033 rv = C_WF_BITMAP_S;
3034 } else if (hg < 0) { /* become sync target */
3035 rv = C_WF_BITMAP_T;
3036 } else {
3037 rv = C_CONNECTED;
3038 if (drbd_bm_total_weight(mdev)) {
3039 dev_info(DEV, "No resync, but %lu bits in bitmap!\n",
3040 drbd_bm_total_weight(mdev));
3041 }
3042 }
3043
3044 return rv;
3045}
3046
Philipp Reisnerf179d762011-05-16 17:31:47 +02003047static enum drbd_after_sb_p convert_after_sb(enum drbd_after_sb_p peer)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003048{
3049 /* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */
Philipp Reisnerf179d762011-05-16 17:31:47 +02003050 if (peer == ASB_DISCARD_REMOTE)
3051 return ASB_DISCARD_LOCAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003052
3053 /* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */
Philipp Reisnerf179d762011-05-16 17:31:47 +02003054 if (peer == ASB_DISCARD_LOCAL)
3055 return ASB_DISCARD_REMOTE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003056
3057 /* everything else is valid if they are equal on both sides. */
Philipp Reisnerf179d762011-05-16 17:31:47 +02003058 return peer;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003059}
3060
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003061static int receive_protocol(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003062{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003063 struct p_protocol *p = pi->data;
Philipp Reisner036b17e2011-05-16 17:38:11 +02003064 enum drbd_after_sb_p p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
3065 int p_proto, p_discard_my_data, p_two_primaries, cf;
3066 struct net_conf *nc, *old_net_conf, *new_net_conf = NULL;
3067 char integrity_alg[SHARED_SECRET_MAX] = "";
Andreas Gruenbacheraccdbcc2011-07-15 17:41:09 +02003068 struct crypto_hash *peer_integrity_tfm = NULL;
Philipp Reisner7aca6c72011-05-17 10:12:56 +02003069 void *int_dig_in = NULL, *int_dig_vv = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003070
Philipp Reisnerb411b362009-09-25 16:07:19 -07003071 p_proto = be32_to_cpu(p->protocol);
3072 p_after_sb_0p = be32_to_cpu(p->after_sb_0p);
3073 p_after_sb_1p = be32_to_cpu(p->after_sb_1p);
3074 p_after_sb_2p = be32_to_cpu(p->after_sb_2p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003075 p_two_primaries = be32_to_cpu(p->two_primaries);
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003076 cf = be32_to_cpu(p->conn_flags);
Andreas Gruenbacher6139f602011-05-06 20:00:02 +02003077 p_discard_my_data = cf & CF_DISCARD_MY_DATA;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003078
Andreas Gruenbacher86db0612011-04-28 15:24:18 +02003079 if (tconn->agreed_pro_version >= 87) {
3080 int err;
3081
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02003082 if (pi->size > sizeof(integrity_alg))
Andreas Gruenbacher86db0612011-04-28 15:24:18 +02003083 return -EIO;
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02003084 err = drbd_recv_all(tconn, integrity_alg, pi->size);
Andreas Gruenbacher86db0612011-04-28 15:24:18 +02003085 if (err)
3086 return err;
Philipp Reisner036b17e2011-05-16 17:38:11 +02003087 integrity_alg[SHARED_SECRET_MAX - 1] = 0;
3088 }
Andreas Gruenbacher86db0612011-04-28 15:24:18 +02003089
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003090 if (pi->cmd != P_PROTOCOL_UPDATE) {
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003091 clear_bit(CONN_DRY_RUN, &tconn->flags);
Philipp Reisner036b17e2011-05-16 17:38:11 +02003092
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003093 if (cf & CF_DRY_RUN)
3094 set_bit(CONN_DRY_RUN, &tconn->flags);
3095
3096 rcu_read_lock();
3097 nc = rcu_dereference(tconn->net_conf);
3098
3099 if (p_proto != nc->wire_protocol) {
Andreas Gruenbacherd505d9b2011-07-15 17:19:18 +02003100 conn_err(tconn, "incompatible %s settings\n", "protocol");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003101 goto disconnect_rcu_unlock;
3102 }
3103
3104 if (convert_after_sb(p_after_sb_0p) != nc->after_sb_0p) {
Andreas Gruenbacherd505d9b2011-07-15 17:19:18 +02003105 conn_err(tconn, "incompatible %s settings\n", "after-sb-0pri");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003106 goto disconnect_rcu_unlock;
3107 }
3108
3109 if (convert_after_sb(p_after_sb_1p) != nc->after_sb_1p) {
Andreas Gruenbacherd505d9b2011-07-15 17:19:18 +02003110 conn_err(tconn, "incompatible %s settings\n", "after-sb-1pri");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003111 goto disconnect_rcu_unlock;
3112 }
3113
3114 if (convert_after_sb(p_after_sb_2p) != nc->after_sb_2p) {
Andreas Gruenbacherd505d9b2011-07-15 17:19:18 +02003115 conn_err(tconn, "incompatible %s settings\n", "after-sb-2pri");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003116 goto disconnect_rcu_unlock;
3117 }
3118
3119 if (p_discard_my_data && nc->discard_my_data) {
Andreas Gruenbacherd505d9b2011-07-15 17:19:18 +02003120 conn_err(tconn, "incompatible %s settings\n", "discard-my-data");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003121 goto disconnect_rcu_unlock;
3122 }
3123
3124 if (p_two_primaries != nc->two_primaries) {
Andreas Gruenbacherd505d9b2011-07-15 17:19:18 +02003125 conn_err(tconn, "incompatible %s settings\n", "allow-two-primaries");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003126 goto disconnect_rcu_unlock;
3127 }
3128
3129 if (strcmp(integrity_alg, nc->integrity_alg)) {
Andreas Gruenbacherd505d9b2011-07-15 17:19:18 +02003130 conn_err(tconn, "incompatible %s settings\n", "data-integrity-alg");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003131 goto disconnect_rcu_unlock;
3132 }
3133
3134 rcu_read_unlock();
Andreas Gruenbacher86db0612011-04-28 15:24:18 +02003135 }
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003136
3137 if (integrity_alg[0]) {
3138 int hash_size;
3139
3140 /*
3141 * We can only change the peer data integrity algorithm
3142 * here. Changing our own data integrity algorithm
3143 * requires that we send a P_PROTOCOL_UPDATE packet at
3144 * the same time; otherwise, the peer has no way to
3145 * tell between which packets the algorithm should
3146 * change.
3147 */
3148
3149 peer_integrity_tfm = crypto_alloc_hash(integrity_alg, 0, CRYPTO_ALG_ASYNC);
3150 if (!peer_integrity_tfm) {
3151 conn_err(tconn, "peer data-integrity-alg %s not supported\n",
3152 integrity_alg);
3153 goto disconnect;
3154 }
3155
3156 hash_size = crypto_hash_digestsize(peer_integrity_tfm);
3157 int_dig_in = kmalloc(hash_size, GFP_KERNEL);
3158 int_dig_vv = kmalloc(hash_size, GFP_KERNEL);
3159 if (!(int_dig_in && int_dig_vv)) {
3160 conn_err(tconn, "Allocation of buffers for data integrity checking failed\n");
3161 goto disconnect;
3162 }
3163 }
3164
3165 new_net_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL);
3166 if (!new_net_conf) {
3167 conn_err(tconn, "Allocation of new net_conf failed\n");
3168 goto disconnect;
3169 }
3170
3171 mutex_lock(&tconn->data.mutex);
3172 mutex_lock(&tconn->conf_update);
3173 old_net_conf = tconn->net_conf;
3174 *new_net_conf = *old_net_conf;
3175
3176 new_net_conf->wire_protocol = p_proto;
3177 new_net_conf->after_sb_0p = convert_after_sb(p_after_sb_0p);
3178 new_net_conf->after_sb_1p = convert_after_sb(p_after_sb_1p);
3179 new_net_conf->after_sb_2p = convert_after_sb(p_after_sb_2p);
3180 new_net_conf->two_primaries = p_two_primaries;
3181
3182 rcu_assign_pointer(tconn->net_conf, new_net_conf);
3183 mutex_unlock(&tconn->conf_update);
3184 mutex_unlock(&tconn->data.mutex);
3185
3186 crypto_free_hash(tconn->peer_integrity_tfm);
3187 kfree(tconn->int_dig_in);
3188 kfree(tconn->int_dig_vv);
3189 tconn->peer_integrity_tfm = peer_integrity_tfm;
3190 tconn->int_dig_in = int_dig_in;
3191 tconn->int_dig_vv = int_dig_vv;
3192
3193 if (strcmp(old_net_conf->integrity_alg, integrity_alg))
3194 conn_info(tconn, "peer data-integrity-alg: %s\n",
3195 integrity_alg[0] ? integrity_alg : "(none)");
3196
3197 synchronize_rcu();
3198 kfree(old_net_conf);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003199 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003200
Philipp Reisner44ed1672011-04-19 17:10:19 +02003201disconnect_rcu_unlock:
3202 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07003203disconnect:
Andreas Gruenbacherb792c352011-07-15 16:48:49 +02003204 crypto_free_hash(peer_integrity_tfm);
Philipp Reisner036b17e2011-05-16 17:38:11 +02003205 kfree(int_dig_in);
3206 kfree(int_dig_vv);
Philipp Reisner72046242011-03-15 18:51:47 +01003207 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003208 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003209}
3210
3211/* helper function
3212 * input: alg name, feature name
3213 * return: NULL (alg name was "")
3214 * ERR_PTR(error) if something goes wrong
3215 * or the crypto hash ptr, if it worked out ok. */
3216struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_conf *mdev,
3217 const char *alg, const char *name)
3218{
3219 struct crypto_hash *tfm;
3220
3221 if (!alg[0])
3222 return NULL;
3223
3224 tfm = crypto_alloc_hash(alg, 0, CRYPTO_ALG_ASYNC);
3225 if (IS_ERR(tfm)) {
3226 dev_err(DEV, "Can not allocate \"%s\" as %s (reason: %ld)\n",
3227 alg, name, PTR_ERR(tfm));
3228 return tfm;
3229 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003230 return tfm;
3231}
3232
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003233static int ignore_remaining_packet(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003234{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003235 void *buffer = tconn->data.rbuf;
3236 int size = pi->size;
3237
3238 while (size) {
3239 int s = min_t(int, size, DRBD_SOCKET_BUFFER_SIZE);
3240 s = drbd_recv(tconn, buffer, s);
3241 if (s <= 0) {
3242 if (s < 0)
3243 return s;
3244 break;
3245 }
3246 size -= s;
3247 }
3248 if (size)
3249 return -EIO;
3250 return 0;
3251}
3252
3253/*
3254 * config_unknown_volume - device configuration command for unknown volume
3255 *
3256 * When a device is added to an existing connection, the node on which the
3257 * device is added first will send configuration commands to its peer but the
3258 * peer will not know about the device yet. It will warn and ignore these
3259 * commands. Once the device is added on the second node, the second node will
3260 * send the same device configuration commands, but in the other direction.
3261 *
3262 * (We can also end up here if drbd is misconfigured.)
3263 */
3264static int config_unknown_volume(struct drbd_tconn *tconn, struct packet_info *pi)
3265{
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02003266 conn_warn(tconn, "%s packet received for volume %u, which is not configured locally\n",
3267 cmdname(pi->cmd), pi->vnr);
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003268 return ignore_remaining_packet(tconn, pi);
3269}
3270
3271static int receive_SyncParam(struct drbd_tconn *tconn, struct packet_info *pi)
3272{
3273 struct drbd_conf *mdev;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003274 struct p_rs_param_95 *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003275 unsigned int header_size, data_size, exp_max_sz;
3276 struct crypto_hash *verify_tfm = NULL;
3277 struct crypto_hash *csums_tfm = NULL;
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003278 struct net_conf *old_net_conf, *new_net_conf = NULL;
Philipp Reisner813472c2011-05-03 16:47:02 +02003279 struct disk_conf *old_disk_conf = NULL, *new_disk_conf = NULL;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003280 const int apv = tconn->agreed_pro_version;
Philipp Reisner813472c2011-05-03 16:47:02 +02003281 struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
Philipp Reisner778f2712010-07-06 11:14:00 +02003282 int fifo_size = 0;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003283 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003284
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003285 mdev = vnr_to_mdev(tconn, pi->vnr);
3286 if (!mdev)
3287 return config_unknown_volume(tconn, pi);
3288
Philipp Reisnerb411b362009-09-25 16:07:19 -07003289 exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param)
3290 : apv == 88 ? sizeof(struct p_rs_param)
3291 + SHARED_SECRET_MAX
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003292 : apv <= 94 ? sizeof(struct p_rs_param_89)
3293 : /* apv >= 95 */ sizeof(struct p_rs_param_95);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003294
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003295 if (pi->size > exp_max_sz) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003296 dev_err(DEV, "SyncParam packet too long: received %u, expected <= %u bytes\n",
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003297 pi->size, exp_max_sz);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003298 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003299 }
3300
3301 if (apv <= 88) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003302 header_size = sizeof(struct p_rs_param);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003303 data_size = pi->size - header_size;
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003304 } else if (apv <= 94) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003305 header_size = sizeof(struct p_rs_param_89);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003306 data_size = pi->size - header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003307 D_ASSERT(data_size == 0);
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003308 } else {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003309 header_size = sizeof(struct p_rs_param_95);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003310 data_size = pi->size - header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003311 D_ASSERT(data_size == 0);
3312 }
3313
3314 /* initialize verify_alg and csums_alg */
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003315 p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003316 memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
3317
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003318 err = drbd_recv_all(mdev->tconn, p, header_size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003319 if (err)
3320 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003321
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003322 mutex_lock(&mdev->tconn->conf_update);
3323 old_net_conf = mdev->tconn->net_conf;
Philipp Reisner813472c2011-05-03 16:47:02 +02003324 if (get_ldev(mdev)) {
3325 new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3326 if (!new_disk_conf) {
3327 put_ldev(mdev);
3328 mutex_unlock(&mdev->tconn->conf_update);
3329 dev_err(DEV, "Allocation of new disk_conf failed\n");
3330 return -ENOMEM;
3331 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003332
Philipp Reisner813472c2011-05-03 16:47:02 +02003333 old_disk_conf = mdev->ldev->disk_conf;
3334 *new_disk_conf = *old_disk_conf;
3335
Andreas Gruenbacher6394b932011-05-11 14:29:52 +02003336 new_disk_conf->resync_rate = be32_to_cpu(p->resync_rate);
Philipp Reisner813472c2011-05-03 16:47:02 +02003337 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003338
Philipp Reisnerb411b362009-09-25 16:07:19 -07003339 if (apv >= 88) {
3340 if (apv == 88) {
3341 if (data_size > SHARED_SECRET_MAX) {
3342 dev_err(DEV, "verify-alg too long, "
3343 "peer wants %u, accepting only %u byte\n",
3344 data_size, SHARED_SECRET_MAX);
Philipp Reisner813472c2011-05-03 16:47:02 +02003345 err = -EIO;
3346 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003347 }
3348
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003349 err = drbd_recv_all(mdev->tconn, p->verify_alg, data_size);
Philipp Reisner813472c2011-05-03 16:47:02 +02003350 if (err)
3351 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003352 /* we expect NUL terminated string */
3353 /* but just in case someone tries to be evil */
3354 D_ASSERT(p->verify_alg[data_size-1] == 0);
3355 p->verify_alg[data_size-1] = 0;
3356
3357 } else /* apv >= 89 */ {
3358 /* we still expect NUL terminated strings */
3359 /* but just in case someone tries to be evil */
3360 D_ASSERT(p->verify_alg[SHARED_SECRET_MAX-1] == 0);
3361 D_ASSERT(p->csums_alg[SHARED_SECRET_MAX-1] == 0);
3362 p->verify_alg[SHARED_SECRET_MAX-1] = 0;
3363 p->csums_alg[SHARED_SECRET_MAX-1] = 0;
3364 }
3365
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003366 if (strcmp(old_net_conf->verify_alg, p->verify_alg)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003367 if (mdev->state.conn == C_WF_REPORT_PARAMS) {
3368 dev_err(DEV, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003369 old_net_conf->verify_alg, p->verify_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003370 goto disconnect;
3371 }
3372 verify_tfm = drbd_crypto_alloc_digest_safe(mdev,
3373 p->verify_alg, "verify-alg");
3374 if (IS_ERR(verify_tfm)) {
3375 verify_tfm = NULL;
3376 goto disconnect;
3377 }
3378 }
3379
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003380 if (apv >= 89 && strcmp(old_net_conf->csums_alg, p->csums_alg)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003381 if (mdev->state.conn == C_WF_REPORT_PARAMS) {
3382 dev_err(DEV, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003383 old_net_conf->csums_alg, p->csums_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003384 goto disconnect;
3385 }
3386 csums_tfm = drbd_crypto_alloc_digest_safe(mdev,
3387 p->csums_alg, "csums-alg");
3388 if (IS_ERR(csums_tfm)) {
3389 csums_tfm = NULL;
3390 goto disconnect;
3391 }
3392 }
3393
Philipp Reisner813472c2011-05-03 16:47:02 +02003394 if (apv > 94 && new_disk_conf) {
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003395 new_disk_conf->c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
3396 new_disk_conf->c_delay_target = be32_to_cpu(p->c_delay_target);
3397 new_disk_conf->c_fill_target = be32_to_cpu(p->c_fill_target);
3398 new_disk_conf->c_max_rate = be32_to_cpu(p->c_max_rate);
Philipp Reisner778f2712010-07-06 11:14:00 +02003399
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003400 fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
Philipp Reisner9958c852011-05-03 16:19:31 +02003401 if (fifo_size != mdev->rs_plan_s->size) {
Philipp Reisner813472c2011-05-03 16:47:02 +02003402 new_plan = fifo_alloc(fifo_size);
3403 if (!new_plan) {
Philipp Reisner778f2712010-07-06 11:14:00 +02003404 dev_err(DEV, "kmalloc of fifo_buffer failed");
Lars Ellenbergf3990022011-03-23 14:31:09 +01003405 put_ldev(mdev);
Philipp Reisner778f2712010-07-06 11:14:00 +02003406 goto disconnect;
3407 }
3408 }
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003409 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003410
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003411 if (verify_tfm || csums_tfm) {
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003412 new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
3413 if (!new_net_conf) {
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003414 dev_err(DEV, "Allocation of new net_conf failed\n");
3415 goto disconnect;
3416 }
3417
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003418 *new_net_conf = *old_net_conf;
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003419
3420 if (verify_tfm) {
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003421 strcpy(new_net_conf->verify_alg, p->verify_alg);
3422 new_net_conf->verify_alg_len = strlen(p->verify_alg) + 1;
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003423 crypto_free_hash(mdev->tconn->verify_tfm);
3424 mdev->tconn->verify_tfm = verify_tfm;
3425 dev_info(DEV, "using verify-alg: \"%s\"\n", p->verify_alg);
3426 }
3427 if (csums_tfm) {
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003428 strcpy(new_net_conf->csums_alg, p->csums_alg);
3429 new_net_conf->csums_alg_len = strlen(p->csums_alg) + 1;
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003430 crypto_free_hash(mdev->tconn->csums_tfm);
3431 mdev->tconn->csums_tfm = csums_tfm;
3432 dev_info(DEV, "using csums-alg: \"%s\"\n", p->csums_alg);
3433 }
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003434 rcu_assign_pointer(tconn->net_conf, new_net_conf);
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003435 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003436 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003437
Philipp Reisner813472c2011-05-03 16:47:02 +02003438 if (new_disk_conf) {
3439 rcu_assign_pointer(mdev->ldev->disk_conf, new_disk_conf);
3440 put_ldev(mdev);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003441 }
Philipp Reisner813472c2011-05-03 16:47:02 +02003442
3443 if (new_plan) {
3444 old_plan = mdev->rs_plan_s;
3445 rcu_assign_pointer(mdev->rs_plan_s, new_plan);
3446 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003447
3448 mutex_unlock(&mdev->tconn->conf_update);
3449 synchronize_rcu();
3450 if (new_net_conf)
3451 kfree(old_net_conf);
3452 kfree(old_disk_conf);
Philipp Reisner813472c2011-05-03 16:47:02 +02003453 kfree(old_plan);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003454
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003455 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003456
Philipp Reisner813472c2011-05-03 16:47:02 +02003457reconnect:
3458 if (new_disk_conf) {
3459 put_ldev(mdev);
3460 kfree(new_disk_conf);
3461 }
3462 mutex_unlock(&mdev->tconn->conf_update);
3463 return -EIO;
3464
Philipp Reisnerb411b362009-09-25 16:07:19 -07003465disconnect:
Philipp Reisner813472c2011-05-03 16:47:02 +02003466 kfree(new_plan);
3467 if (new_disk_conf) {
3468 put_ldev(mdev);
3469 kfree(new_disk_conf);
3470 }
Philipp Reisnera0095502011-05-03 13:14:15 +02003471 mutex_unlock(&mdev->tconn->conf_update);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003472 /* just for completeness: actually not needed,
3473 * as this is not reached if csums_tfm was ok. */
3474 crypto_free_hash(csums_tfm);
3475 /* but free the verify_tfm again, if csums_tfm did not work out */
3476 crypto_free_hash(verify_tfm);
Philipp Reisner38fa9982011-03-15 18:24:49 +01003477 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003478 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003479}
3480
Philipp Reisnerb411b362009-09-25 16:07:19 -07003481/* warn if the arguments differ by more than 12.5% */
3482static void warn_if_differ_considerably(struct drbd_conf *mdev,
3483 const char *s, sector_t a, sector_t b)
3484{
3485 sector_t d;
3486 if (a == 0 || b == 0)
3487 return;
3488 d = (a > b) ? (a - b) : (b - a);
3489 if (d > (a>>3) || d > (b>>3))
3490 dev_warn(DEV, "Considerable difference in %s: %llus vs. %llus\n", s,
3491 (unsigned long long)a, (unsigned long long)b);
3492}
3493
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003494static int receive_sizes(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003495{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003496 struct drbd_conf *mdev;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003497 struct p_sizes *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003498 enum determine_dev_size dd = unchanged;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003499 sector_t p_size, p_usize, my_usize;
3500 int ldsc = 0; /* local disk size changed */
Philipp Reisnere89b5912010-03-24 17:11:33 +01003501 enum dds_flags ddsf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003502
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003503 mdev = vnr_to_mdev(tconn, pi->vnr);
3504 if (!mdev)
3505 return config_unknown_volume(tconn, pi);
3506
Philipp Reisnerb411b362009-09-25 16:07:19 -07003507 p_size = be64_to_cpu(p->d_size);
3508 p_usize = be64_to_cpu(p->u_size);
3509
Philipp Reisnerb411b362009-09-25 16:07:19 -07003510 /* just store the peer's disk size for now.
3511 * we still need to figure out whether we accept that. */
3512 mdev->p_size = p_size;
3513
Philipp Reisnerb411b362009-09-25 16:07:19 -07003514 if (get_ldev(mdev)) {
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003515 rcu_read_lock();
3516 my_usize = rcu_dereference(mdev->ldev->disk_conf)->disk_size;
3517 rcu_read_unlock();
3518
Philipp Reisnerb411b362009-09-25 16:07:19 -07003519 warn_if_differ_considerably(mdev, "lower level device sizes",
3520 p_size, drbd_get_max_capacity(mdev->ldev));
3521 warn_if_differ_considerably(mdev, "user requested size",
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003522 p_usize, my_usize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003523
3524 /* if this is the first connect, or an otherwise expected
3525 * param exchange, choose the minimum */
3526 if (mdev->state.conn == C_WF_REPORT_PARAMS)
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003527 p_usize = min_not_zero(my_usize, p_usize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003528
3529 /* Never shrink a device with usable data during connect.
3530 But allow online shrinking if we are connected. */
Philipp Reisneref5e44a2011-05-03 13:27:43 +02003531 if (drbd_new_dev_size(mdev, mdev->ldev, p_usize, 0) <
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003532 drbd_get_capacity(mdev->this_bdev) &&
3533 mdev->state.disk >= D_OUTDATED &&
3534 mdev->state.conn < C_CONNECTED) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003535 dev_err(DEV, "The peer's disk size is too small!\n");
Philipp Reisner38fa9982011-03-15 18:24:49 +01003536 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003537 put_ldev(mdev);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003538 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003539 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003540
3541 if (my_usize != p_usize) {
3542 struct disk_conf *old_disk_conf, *new_disk_conf = NULL;
3543
3544 new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3545 if (!new_disk_conf) {
3546 dev_err(DEV, "Allocation of new disk_conf failed\n");
3547 put_ldev(mdev);
3548 return -ENOMEM;
3549 }
3550
3551 mutex_lock(&mdev->tconn->conf_update);
3552 old_disk_conf = mdev->ldev->disk_conf;
3553 *new_disk_conf = *old_disk_conf;
3554 new_disk_conf->disk_size = p_usize;
3555
3556 rcu_assign_pointer(mdev->ldev->disk_conf, new_disk_conf);
3557 mutex_unlock(&mdev->tconn->conf_update);
3558 synchronize_rcu();
3559 kfree(old_disk_conf);
3560
3561 dev_info(DEV, "Peer sets u_size to %lu sectors\n",
3562 (unsigned long)my_usize);
3563 }
3564
Philipp Reisnerb411b362009-09-25 16:07:19 -07003565 put_ldev(mdev);
3566 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003567
Philipp Reisnere89b5912010-03-24 17:11:33 +01003568 ddsf = be16_to_cpu(p->dds_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003569 if (get_ldev(mdev)) {
Bart Van Assche24c48302011-05-21 18:32:29 +02003570 dd = drbd_determine_dev_size(mdev, ddsf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003571 put_ldev(mdev);
3572 if (dd == dev_size_error)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003573 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003574 drbd_md_sync(mdev);
3575 } else {
3576 /* I am diskless, need to accept the peer's size. */
3577 drbd_set_my_capacity(mdev, p_size);
3578 }
3579
Philipp Reisner99432fc2011-05-20 16:39:13 +02003580 mdev->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
3581 drbd_reconsider_max_bio_size(mdev);
3582
Philipp Reisnerb411b362009-09-25 16:07:19 -07003583 if (get_ldev(mdev)) {
3584 if (mdev->ldev->known_size != drbd_get_capacity(mdev->ldev->backing_bdev)) {
3585 mdev->ldev->known_size = drbd_get_capacity(mdev->ldev->backing_bdev);
3586 ldsc = 1;
3587 }
3588
Philipp Reisnerb411b362009-09-25 16:07:19 -07003589 put_ldev(mdev);
3590 }
3591
3592 if (mdev->state.conn > C_WF_REPORT_PARAMS) {
3593 if (be64_to_cpu(p->c_size) !=
3594 drbd_get_capacity(mdev->this_bdev) || ldsc) {
3595 /* we have different sizes, probably peer
3596 * needs to know my new size... */
Philipp Reisnere89b5912010-03-24 17:11:33 +01003597 drbd_send_sizes(mdev, 0, ddsf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003598 }
3599 if (test_and_clear_bit(RESIZE_PENDING, &mdev->flags) ||
3600 (dd == grew && mdev->state.conn == C_CONNECTED)) {
3601 if (mdev->state.pdsk >= D_INCONSISTENT &&
Philipp Reisnere89b5912010-03-24 17:11:33 +01003602 mdev->state.disk >= D_INCONSISTENT) {
3603 if (ddsf & DDSF_NO_RESYNC)
3604 dev_info(DEV, "Resync of new storage suppressed with --assume-clean\n");
3605 else
3606 resync_after_online_grow(mdev);
3607 } else
Philipp Reisnerb411b362009-09-25 16:07:19 -07003608 set_bit(RESYNC_AFTER_NEG, &mdev->flags);
3609 }
3610 }
3611
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003612 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003613}
3614
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003615static int receive_uuids(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003616{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003617 struct drbd_conf *mdev;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003618 struct p_uuids *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003619 u64 *p_uuid;
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003620 int i, updated_uuids = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003621
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003622 mdev = vnr_to_mdev(tconn, pi->vnr);
3623 if (!mdev)
3624 return config_unknown_volume(tconn, pi);
3625
Philipp Reisnerb411b362009-09-25 16:07:19 -07003626 p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO);
3627
3628 for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
3629 p_uuid[i] = be64_to_cpu(p->uuid[i]);
3630
3631 kfree(mdev->p_uuid);
3632 mdev->p_uuid = p_uuid;
3633
3634 if (mdev->state.conn < C_CONNECTED &&
3635 mdev->state.disk < D_INCONSISTENT &&
3636 mdev->state.role == R_PRIMARY &&
3637 (mdev->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
3638 dev_err(DEV, "Can only connect to data with current UUID=%016llX\n",
3639 (unsigned long long)mdev->ed_uuid);
Philipp Reisner38fa9982011-03-15 18:24:49 +01003640 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003641 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003642 }
3643
3644 if (get_ldev(mdev)) {
3645 int skip_initial_sync =
3646 mdev->state.conn == C_CONNECTED &&
Philipp Reisner31890f42011-01-19 14:12:51 +01003647 mdev->tconn->agreed_pro_version >= 90 &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003648 mdev->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
3649 (p_uuid[UI_FLAGS] & 8);
3650 if (skip_initial_sync) {
3651 dev_info(DEV, "Accepted new current UUID, preparing to skip initial sync\n");
3652 drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write,
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003653 "clear_n_write from receive_uuids",
3654 BM_LOCKED_TEST_ALLOWED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003655 _drbd_uuid_set(mdev, UI_CURRENT, p_uuid[UI_CURRENT]);
3656 _drbd_uuid_set(mdev, UI_BITMAP, 0);
3657 _drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
3658 CS_VERBOSE, NULL);
3659 drbd_md_sync(mdev);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003660 updated_uuids = 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003661 }
3662 put_ldev(mdev);
Philipp Reisner18a50fa2010-06-21 14:14:15 +02003663 } else if (mdev->state.disk < D_INCONSISTENT &&
3664 mdev->state.role == R_PRIMARY) {
3665 /* I am a diskless primary, the peer just created a new current UUID
3666 for me. */
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003667 updated_uuids = drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003668 }
3669
3670 /* Before we test for the disk state, we should wait until an eventually
3671 ongoing cluster wide state change is finished. That is important if
3672 we are primary and are detaching from our disk. We need to see the
3673 new disk state... */
Philipp Reisner8410da82011-02-11 20:11:10 +01003674 mutex_lock(mdev->state_mutex);
3675 mutex_unlock(mdev->state_mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003676 if (mdev->state.conn >= C_CONNECTED && mdev->state.disk < D_INCONSISTENT)
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003677 updated_uuids |= drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]);
3678
3679 if (updated_uuids)
3680 drbd_print_uuids(mdev, "receiver updated UUIDs to");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003681
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003682 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003683}
3684
3685/**
3686 * convert_state() - Converts the peer's view of the cluster state to our point of view
3687 * @ps: The state as seen by the peer.
3688 */
3689static union drbd_state convert_state(union drbd_state ps)
3690{
3691 union drbd_state ms;
3692
3693 static enum drbd_conns c_tab[] = {
Philipp Reisner369bea62011-07-06 23:04:44 +02003694 [C_WF_REPORT_PARAMS] = C_WF_REPORT_PARAMS,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003695 [C_CONNECTED] = C_CONNECTED,
3696
3697 [C_STARTING_SYNC_S] = C_STARTING_SYNC_T,
3698 [C_STARTING_SYNC_T] = C_STARTING_SYNC_S,
3699 [C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */
3700 [C_VERIFY_S] = C_VERIFY_T,
3701 [C_MASK] = C_MASK,
3702 };
3703
3704 ms.i = ps.i;
3705
3706 ms.conn = c_tab[ps.conn];
3707 ms.peer = ps.role;
3708 ms.role = ps.peer;
3709 ms.pdsk = ps.disk;
3710 ms.disk = ps.pdsk;
3711 ms.peer_isp = (ps.aftr_isp | ps.user_isp);
3712
3713 return ms;
3714}
3715
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003716static int receive_req_state(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003717{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003718 struct drbd_conf *mdev;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003719 struct p_req_state *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003720 union drbd_state mask, val;
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +01003721 enum drbd_state_rv rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003722
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003723 mdev = vnr_to_mdev(tconn, pi->vnr);
3724 if (!mdev)
3725 return -EIO;
3726
Philipp Reisnerb411b362009-09-25 16:07:19 -07003727 mask.i = be32_to_cpu(p->mask);
3728 val.i = be32_to_cpu(p->val);
3729
Philipp Reisner25703f82011-02-07 14:35:25 +01003730 if (test_bit(DISCARD_CONCURRENT, &mdev->tconn->flags) &&
Philipp Reisner8410da82011-02-11 20:11:10 +01003731 mutex_is_locked(mdev->state_mutex)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003732 drbd_send_sr_reply(mdev, SS_CONCURRENT_ST_CHG);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003733 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003734 }
3735
3736 mask = convert_state(mask);
3737 val = convert_state(val);
3738
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003739 rv = drbd_change_state(mdev, CS_VERBOSE, mask, val);
3740 drbd_send_sr_reply(mdev, rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003741
Philipp Reisnerb411b362009-09-25 16:07:19 -07003742 drbd_md_sync(mdev);
3743
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003744 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003745}
3746
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003747static int receive_req_conn_state(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003748{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003749 struct p_req_state *p = pi->data;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003750 union drbd_state mask, val;
3751 enum drbd_state_rv rv;
3752
3753 mask.i = be32_to_cpu(p->mask);
3754 val.i = be32_to_cpu(p->val);
3755
3756 if (test_bit(DISCARD_CONCURRENT, &tconn->flags) &&
3757 mutex_is_locked(&tconn->cstate_mutex)) {
3758 conn_send_sr_reply(tconn, SS_CONCURRENT_ST_CHG);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003759 return 0;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003760 }
3761
3762 mask = convert_state(mask);
3763 val = convert_state(val);
3764
Philipp Reisner778bcf22011-03-28 12:55:03 +02003765 rv = conn_request_state(tconn, mask, val, CS_VERBOSE | CS_LOCAL_ONLY | CS_IGN_OUTD_FAIL);
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003766 conn_send_sr_reply(tconn, rv);
3767
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003768 return 0;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003769}
3770
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003771static int receive_state(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003772{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003773 struct drbd_conf *mdev;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003774 struct p_state *p = pi->data;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003775 union drbd_state os, ns, peer_state;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003776 enum drbd_disk_state real_peer_disk;
Philipp Reisner65d922c2010-06-16 16:18:09 +02003777 enum chg_state_flags cs_flags;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003778 int rv;
3779
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003780 mdev = vnr_to_mdev(tconn, pi->vnr);
3781 if (!mdev)
3782 return config_unknown_volume(tconn, pi);
3783
Philipp Reisnerb411b362009-09-25 16:07:19 -07003784 peer_state.i = be32_to_cpu(p->state);
3785
3786 real_peer_disk = peer_state.disk;
3787 if (peer_state.disk == D_NEGOTIATING) {
3788 real_peer_disk = mdev->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT;
3789 dev_info(DEV, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
3790 }
3791
Philipp Reisner87eeee42011-01-19 14:16:30 +01003792 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003793 retry:
Philipp Reisner78bae592011-03-28 15:40:12 +02003794 os = ns = drbd_read_state(mdev);
Philipp Reisner87eeee42011-01-19 14:16:30 +01003795 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003796
Philipp Reisnerb8853db2011-12-13 11:09:16 +01003797 /* If some other part of the code (asender thread, timeout)
3798 * already decided to close the connection again,
3799 * we must not "re-establish" it here. */
3800 if (os.conn <= C_TEAR_DOWN)
3801 return false;
3802
Philipp Reisner9bcd2522011-09-29 13:00:14 +02003803 /* If this is the "end of sync" confirmation, usually the peer disk
3804 * transitions from D_INCONSISTENT to D_UP_TO_DATE. For empty (0 bits
3805 * set) resync started in PausedSyncT, or if the timing of pause-/
3806 * unpause-sync events has been "just right", the peer disk may
3807 * transition from D_CONSISTENT to D_UP_TO_DATE as well.
3808 */
3809 if ((os.pdsk == D_INCONSISTENT || os.pdsk == D_CONSISTENT) &&
3810 real_peer_disk == D_UP_TO_DATE &&
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003811 os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
3812 /* If we are (becoming) SyncSource, but peer is still in sync
3813 * preparation, ignore its uptodate-ness to avoid flapping, it
3814 * will change to inconsistent once the peer reaches active
3815 * syncing states.
3816 * It may have changed syncer-paused flags, however, so we
3817 * cannot ignore this completely. */
3818 if (peer_state.conn > C_CONNECTED &&
3819 peer_state.conn < C_SYNC_SOURCE)
3820 real_peer_disk = D_INCONSISTENT;
3821
3822 /* if peer_state changes to connected at the same time,
3823 * it explicitly notifies us that it finished resync.
3824 * Maybe we should finish it up, too? */
3825 else if (os.conn >= C_SYNC_SOURCE &&
3826 peer_state.conn == C_CONNECTED) {
3827 if (drbd_bm_total_weight(mdev) <= mdev->rs_failed)
3828 drbd_resync_finished(mdev);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003829 return 0;
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003830 }
3831 }
3832
3833 /* peer says his disk is inconsistent, while we think it is uptodate,
3834 * and this happens while the peer still thinks we have a sync going on,
3835 * but we think we are already done with the sync.
3836 * We ignore this to avoid flapping pdsk.
3837 * This should not happen, if the peer is a recent version of drbd. */
3838 if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT &&
3839 os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE)
3840 real_peer_disk = D_UP_TO_DATE;
3841
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003842 if (ns.conn == C_WF_REPORT_PARAMS)
3843 ns.conn = C_CONNECTED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003844
Philipp Reisner67531712010-10-27 12:21:30 +02003845 if (peer_state.conn == C_AHEAD)
3846 ns.conn = C_BEHIND;
3847
Philipp Reisnerb411b362009-09-25 16:07:19 -07003848 if (mdev->p_uuid && peer_state.disk >= D_NEGOTIATING &&
3849 get_ldev_if_state(mdev, D_NEGOTIATING)) {
3850 int cr; /* consider resync */
3851
3852 /* if we established a new connection */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003853 cr = (os.conn < C_CONNECTED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003854 /* if we had an established connection
3855 * and one of the nodes newly attaches a disk */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003856 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003857 (peer_state.disk == D_NEGOTIATING ||
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003858 os.disk == D_NEGOTIATING));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003859 /* if we have both been inconsistent, and the peer has been
3860 * forced to be UpToDate with --overwrite-data */
3861 cr |= test_bit(CONSIDER_RESYNC, &mdev->flags);
3862 /* if we had been plain connected, and the admin requested to
3863 * start a sync by "invalidate" or "invalidate-remote" */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003864 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003865 (peer_state.conn >= C_STARTING_SYNC_S &&
3866 peer_state.conn <= C_WF_BITMAP_T));
3867
3868 if (cr)
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003869 ns.conn = drbd_sync_handshake(mdev, peer_state.role, real_peer_disk);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003870
3871 put_ldev(mdev);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003872 if (ns.conn == C_MASK) {
3873 ns.conn = C_CONNECTED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003874 if (mdev->state.disk == D_NEGOTIATING) {
Lars Ellenberg82f59cc2010-10-16 12:13:47 +02003875 drbd_force_state(mdev, NS(disk, D_FAILED));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003876 } else if (peer_state.disk == D_NEGOTIATING) {
3877 dev_err(DEV, "Disk attach process on the peer node was aborted.\n");
3878 peer_state.disk = D_DISKLESS;
Lars Ellenberg580b9762010-02-26 23:15:23 +01003879 real_peer_disk = D_DISKLESS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003880 } else {
Philipp Reisner8169e412011-03-15 18:40:27 +01003881 if (test_and_clear_bit(CONN_DRY_RUN, &mdev->tconn->flags))
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003882 return -EIO;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003883 D_ASSERT(os.conn == C_WF_REPORT_PARAMS);
Philipp Reisner38fa9982011-03-15 18:24:49 +01003884 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003885 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003886 }
3887 }
3888 }
3889
Philipp Reisner87eeee42011-01-19 14:16:30 +01003890 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisner78bae592011-03-28 15:40:12 +02003891 if (os.i != drbd_read_state(mdev).i)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003892 goto retry;
3893 clear_bit(CONSIDER_RESYNC, &mdev->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003894 ns.peer = peer_state.role;
3895 ns.pdsk = real_peer_disk;
3896 ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003897 if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003898 ns.disk = mdev->new_state_tmp.disk;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003899 cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
Philipp Reisner2aebfab2011-03-28 16:48:11 +02003900 if (ns.pdsk == D_CONSISTENT && drbd_suspended(mdev) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
Philipp Reisner481c6f52010-06-22 14:03:27 +02003901 test_bit(NEW_CUR_UUID, &mdev->flags)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01003902 /* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this
Philipp Reisner481c6f52010-06-22 14:03:27 +02003903 for temporal network outages! */
Philipp Reisner87eeee42011-01-19 14:16:30 +01003904 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisner481c6f52010-06-22 14:03:27 +02003905 dev_err(DEV, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
Philipp Reisner2f5cdd02011-02-21 14:29:27 +01003906 tl_clear(mdev->tconn);
Philipp Reisner481c6f52010-06-22 14:03:27 +02003907 drbd_uuid_new_current(mdev);
3908 clear_bit(NEW_CUR_UUID, &mdev->flags);
Philipp Reisner38fa9982011-03-15 18:24:49 +01003909 conn_request_state(mdev->tconn, NS2(conn, C_PROTOCOL_ERROR, susp, 0), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003910 return -EIO;
Philipp Reisner481c6f52010-06-22 14:03:27 +02003911 }
Philipp Reisner65d922c2010-06-16 16:18:09 +02003912 rv = _drbd_set_state(mdev, ns, cs_flags, NULL);
Philipp Reisner78bae592011-03-28 15:40:12 +02003913 ns = drbd_read_state(mdev);
Philipp Reisner87eeee42011-01-19 14:16:30 +01003914 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003915
3916 if (rv < SS_SUCCESS) {
Philipp Reisner38fa9982011-03-15 18:24:49 +01003917 conn_request_state(mdev->tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003918 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003919 }
3920
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003921 if (os.conn > C_WF_REPORT_PARAMS) {
3922 if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003923 peer_state.disk != D_NEGOTIATING ) {
3924 /* we want resync, peer has not yet decided to sync... */
3925 /* Nowadays only used when forcing a node into primary role and
3926 setting its disk to UpToDate with that */
3927 drbd_send_uuids(mdev);
Philipp Reisner43de7c82011-11-10 13:16:13 +01003928 drbd_send_current_state(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003929 }
3930 }
3931
Philipp Reisner08b165b2011-09-05 16:22:33 +02003932 clear_bit(DISCARD_MY_DATA, &mdev->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003933
3934 drbd_md_sync(mdev); /* update connected indicator, la_size, ... */
3935
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003936 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003937}
3938
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003939static int receive_sync_uuid(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003940{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003941 struct drbd_conf *mdev;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003942 struct p_rs_uuid *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003943
3944 mdev = vnr_to_mdev(tconn, pi->vnr);
3945 if (!mdev)
3946 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003947
3948 wait_event(mdev->misc_wait,
3949 mdev->state.conn == C_WF_SYNC_UUID ||
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02003950 mdev->state.conn == C_BEHIND ||
Philipp Reisnerb411b362009-09-25 16:07:19 -07003951 mdev->state.conn < C_CONNECTED ||
3952 mdev->state.disk < D_NEGOTIATING);
3953
3954 /* D_ASSERT( mdev->state.conn == C_WF_SYNC_UUID ); */
3955
Philipp Reisnerb411b362009-09-25 16:07:19 -07003956 /* Here the _drbd_uuid_ functions are right, current should
3957 _not_ be rotated into the history */
3958 if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
3959 _drbd_uuid_set(mdev, UI_CURRENT, be64_to_cpu(p->uuid));
3960 _drbd_uuid_set(mdev, UI_BITMAP, 0UL);
3961
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003962 drbd_print_uuids(mdev, "updated sync uuid");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003963 drbd_start_resync(mdev, C_SYNC_TARGET);
3964
3965 put_ldev(mdev);
3966 } else
3967 dev_err(DEV, "Ignoring SyncUUID packet!\n");
3968
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003969 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003970}
3971
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003972/**
3973 * receive_bitmap_plain
3974 *
3975 * Return 0 when done, 1 when another iteration is needed, and a negative error
3976 * code upon failure.
3977 */
3978static int
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02003979receive_bitmap_plain(struct drbd_conf *mdev, unsigned int size,
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003980 unsigned long *p, struct bm_xfer_ctx *c)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003981{
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02003982 unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE -
3983 drbd_header_size(mdev->tconn);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003984 unsigned int num_words = min_t(size_t, data_size / sizeof(*p),
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02003985 c->bm_words - c->word_offset);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003986 unsigned int want = num_words * sizeof(*p);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003987 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003988
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02003989 if (want != size) {
3990 dev_err(DEV, "%s:want (%u) != size (%u)\n", __func__, want, size);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003991 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003992 }
3993 if (want == 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003994 return 0;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003995 err = drbd_recv_all(mdev->tconn, p, want);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003996 if (err)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01003997 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003998
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003999 drbd_bm_merge_lel(mdev, c->word_offset, num_words, p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004000
4001 c->word_offset += num_words;
4002 c->bit_offset = c->word_offset * BITS_PER_LONG;
4003 if (c->bit_offset > c->bm_bits)
4004 c->bit_offset = c->bm_bits;
4005
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004006 return 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004007}
4008
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004009static enum drbd_bitmap_code dcbp_get_code(struct p_compressed_bm *p)
4010{
4011 return (enum drbd_bitmap_code)(p->encoding & 0x0f);
4012}
4013
4014static int dcbp_get_start(struct p_compressed_bm *p)
4015{
4016 return (p->encoding & 0x80) != 0;
4017}
4018
4019static int dcbp_get_pad_bits(struct p_compressed_bm *p)
4020{
4021 return (p->encoding >> 4) & 0x7;
4022}
4023
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004024/**
4025 * recv_bm_rle_bits
4026 *
4027 * Return 0 when done, 1 when another iteration is needed, and a negative error
4028 * code upon failure.
4029 */
4030static int
Philipp Reisnerb411b362009-09-25 16:07:19 -07004031recv_bm_rle_bits(struct drbd_conf *mdev,
4032 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01004033 struct bm_xfer_ctx *c,
4034 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004035{
4036 struct bitstream bs;
4037 u64 look_ahead;
4038 u64 rl;
4039 u64 tmp;
4040 unsigned long s = c->bit_offset;
4041 unsigned long e;
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004042 int toggle = dcbp_get_start(p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004043 int have;
4044 int bits;
4045
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004046 bitstream_init(&bs, p->code, len, dcbp_get_pad_bits(p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004047
4048 bits = bitstream_get_bits(&bs, &look_ahead, 64);
4049 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004050 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004051
4052 for (have = bits; have > 0; s += rl, toggle = !toggle) {
4053 bits = vli_decode_bits(&rl, look_ahead);
4054 if (bits <= 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004055 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004056
4057 if (toggle) {
4058 e = s + rl -1;
4059 if (e >= c->bm_bits) {
4060 dev_err(DEV, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004061 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004062 }
4063 _drbd_bm_set_bits(mdev, s, e);
4064 }
4065
4066 if (have < bits) {
4067 dev_err(DEV, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n",
4068 have, bits, look_ahead,
4069 (unsigned int)(bs.cur.b - p->code),
4070 (unsigned int)bs.buf_len);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004071 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004072 }
4073 look_ahead >>= bits;
4074 have -= bits;
4075
4076 bits = bitstream_get_bits(&bs, &tmp, 64 - have);
4077 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004078 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004079 look_ahead |= tmp << have;
4080 have += bits;
4081 }
4082
4083 c->bit_offset = s;
4084 bm_xfer_ctx_bit_to_word_offset(c);
4085
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004086 return (s != c->bm_bits);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004087}
4088
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004089/**
4090 * decode_bitmap_c
4091 *
4092 * Return 0 when done, 1 when another iteration is needed, and a negative error
4093 * code upon failure.
4094 */
4095static int
Philipp Reisnerb411b362009-09-25 16:07:19 -07004096decode_bitmap_c(struct drbd_conf *mdev,
4097 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01004098 struct bm_xfer_ctx *c,
4099 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004100{
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004101 if (dcbp_get_code(p) == RLE_VLI_Bits)
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004102 return recv_bm_rle_bits(mdev, p, c, len - sizeof(*p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004103
4104 /* other variants had been implemented for evaluation,
4105 * but have been dropped as this one turned out to be "best"
4106 * during all our tests. */
4107
4108 dev_err(DEV, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
Philipp Reisner38fa9982011-03-15 18:24:49 +01004109 conn_request_state(mdev->tconn, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004110 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004111}
4112
4113void INFO_bm_xfer_stats(struct drbd_conf *mdev,
4114 const char *direction, struct bm_xfer_ctx *c)
4115{
4116 /* what would it take to transfer it "plaintext" */
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004117 unsigned int header_size = drbd_header_size(mdev->tconn);
4118 unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE - header_size;
4119 unsigned int plain =
4120 header_size * (DIV_ROUND_UP(c->bm_words, data_size) + 1) +
4121 c->bm_words * sizeof(unsigned long);
4122 unsigned int total = c->bytes[0] + c->bytes[1];
4123 unsigned int r;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004124
4125 /* total can not be zero. but just in case: */
4126 if (total == 0)
4127 return;
4128
4129 /* don't report if not compressed */
4130 if (total >= plain)
4131 return;
4132
4133 /* total < plain. check for overflow, still */
4134 r = (total > UINT_MAX/1000) ? (total / (plain/1000))
4135 : (1000 * total / plain);
4136
4137 if (r > 1000)
4138 r = 1000;
4139
4140 r = 1000 - r;
4141 dev_info(DEV, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
4142 "total %u; compression: %u.%u%%\n",
4143 direction,
4144 c->bytes[1], c->packets[1],
4145 c->bytes[0], c->packets[0],
4146 total, r/10, r % 10);
4147}
4148
4149/* Since we are processing the bitfield from lower addresses to higher,
4150 it does not matter if the process it in 32 bit chunks or 64 bit
4151 chunks as long as it is little endian. (Understand it as byte stream,
4152 beginning with the lowest byte...) If we would use big endian
4153 we would need to process it from the highest address to the lowest,
4154 in order to be agnostic to the 32 vs 64 bits issue.
4155
4156 returns 0 on failure, 1 if we successfully received it. */
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004157static int receive_bitmap(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004158{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004159 struct drbd_conf *mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004160 struct bm_xfer_ctx c;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004161 int err;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004162
4163 mdev = vnr_to_mdev(tconn, pi->vnr);
4164 if (!mdev)
4165 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004166
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004167 drbd_bm_lock(mdev, "receive bitmap", BM_LOCKED_SET_ALLOWED);
4168 /* you are supposed to send additional out-of-sync information
4169 * if you actually set bits during this phase */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004170
Philipp Reisnerb411b362009-09-25 16:07:19 -07004171 c = (struct bm_xfer_ctx) {
4172 .bm_bits = drbd_bm_bits(mdev),
4173 .bm_words = drbd_bm_words(mdev),
4174 };
4175
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004176 for(;;) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004177 if (pi->cmd == P_BITMAP)
4178 err = receive_bitmap_plain(mdev, pi->size, pi->data, &c);
4179 else if (pi->cmd == P_COMPRESSED_BITMAP) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004180 /* MAYBE: sanity check that we speak proto >= 90,
4181 * and the feature is enabled! */
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004182 struct p_compressed_bm *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004183
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004184 if (pi->size > DRBD_SOCKET_BUFFER_SIZE - drbd_header_size(tconn)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004185 dev_err(DEV, "ReportCBitmap packet too large\n");
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004186 err = -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004187 goto out;
4188 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004189 if (pi->size <= sizeof(*p)) {
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004190 dev_err(DEV, "ReportCBitmap packet too small (l:%u)\n", pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004191 err = -EIO;
Andreas Gruenbacher78fcbda2010-12-10 22:18:27 +01004192 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004193 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004194 err = drbd_recv_all(mdev->tconn, p, pi->size);
4195 if (err)
4196 goto out;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004197 err = decode_bitmap_c(mdev, p, &c, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004198 } else {
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004199 dev_warn(DEV, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", pi->cmd);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004200 err = -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004201 goto out;
4202 }
4203
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004204 c.packets[pi->cmd == P_BITMAP]++;
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004205 c.bytes[pi->cmd == P_BITMAP] += drbd_header_size(tconn) + pi->size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004206
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004207 if (err <= 0) {
4208 if (err < 0)
4209 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004210 break;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004211 }
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004212 err = drbd_recv_header(mdev->tconn, pi);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004213 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004214 goto out;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004215 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004216
4217 INFO_bm_xfer_stats(mdev, "receive", &c);
4218
4219 if (mdev->state.conn == C_WF_BITMAP_T) {
Andreas Gruenbacherde1f8e42010-12-10 21:04:00 +01004220 enum drbd_state_rv rv;
4221
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004222 err = drbd_send_bitmap(mdev);
4223 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004224 goto out;
4225 /* Omit CS_ORDERED with this state transition to avoid deadlocks. */
Andreas Gruenbacherde1f8e42010-12-10 21:04:00 +01004226 rv = _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
4227 D_ASSERT(rv == SS_SUCCESS);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004228 } else if (mdev->state.conn != C_WF_BITMAP_S) {
4229 /* admin may have requested C_DISCONNECTING,
4230 * other threads may have noticed network errors */
4231 dev_info(DEV, "unexpected cstate (%s) in receive_bitmap\n",
4232 drbd_conn_str(mdev->state.conn));
4233 }
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004234 err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004235
Philipp Reisnerb411b362009-09-25 16:07:19 -07004236 out:
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004237 drbd_bm_unlock(mdev);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004238 if (!err && mdev->state.conn == C_WF_BITMAP_S)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004239 drbd_start_resync(mdev, C_SYNC_SOURCE);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004240 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004241}
4242
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004243static int receive_skip(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004244{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004245 conn_warn(tconn, "skipping unknown optional packet type %d, l: %d!\n",
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004246 pi->cmd, pi->size);
Philipp Reisner2de876e2011-03-15 14:38:01 +01004247
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004248 return ignore_remaining_packet(tconn, pi);
Philipp Reisner2de876e2011-03-15 14:38:01 +01004249}
4250
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004251static int receive_UnplugRemote(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004252{
Philipp Reisnerb411b362009-09-25 16:07:19 -07004253 /* Make sure we've acked all the TCP data associated
4254 * with the data requests being unplugged */
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004255 drbd_tcp_quickack(tconn->data.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004256
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004257 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004258}
4259
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004260static int receive_out_of_sync(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisner73a01a12010-10-27 14:33:00 +02004261{
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004262 struct drbd_conf *mdev;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004263 struct p_block_desc *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004264
4265 mdev = vnr_to_mdev(tconn, pi->vnr);
4266 if (!mdev)
4267 return -EIO;
Philipp Reisner73a01a12010-10-27 14:33:00 +02004268
Lars Ellenbergf735e3632010-12-17 21:06:18 +01004269 switch (mdev->state.conn) {
4270 case C_WF_SYNC_UUID:
4271 case C_WF_BITMAP_T:
4272 case C_BEHIND:
4273 break;
4274 default:
4275 dev_err(DEV, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n",
4276 drbd_conn_str(mdev->state.conn));
4277 }
4278
Philipp Reisner73a01a12010-10-27 14:33:00 +02004279 drbd_set_out_of_sync(mdev, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
4280
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004281 return 0;
Philipp Reisner73a01a12010-10-27 14:33:00 +02004282}
4283
Philipp Reisner02918be2010-08-20 14:35:10 +02004284struct data_cmd {
4285 int expect_payload;
4286 size_t pkt_size;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004287 int (*fn)(struct drbd_tconn *, struct packet_info *);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004288};
4289
Philipp Reisner02918be2010-08-20 14:35:10 +02004290static struct data_cmd drbd_cmd_handler[] = {
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004291 [P_DATA] = { 1, sizeof(struct p_data), receive_Data },
4292 [P_DATA_REPLY] = { 1, sizeof(struct p_data), receive_DataReply },
4293 [P_RS_DATA_REPLY] = { 1, sizeof(struct p_data), receive_RSDataReply } ,
4294 [P_BARRIER] = { 0, sizeof(struct p_barrier), receive_Barrier } ,
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004295 [P_BITMAP] = { 1, 0, receive_bitmap } ,
4296 [P_COMPRESSED_BITMAP] = { 1, 0, receive_bitmap } ,
4297 [P_UNPLUG_REMOTE] = { 0, 0, receive_UnplugRemote },
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004298 [P_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
4299 [P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004300 [P_SYNC_PARAM] = { 1, 0, receive_SyncParam },
4301 [P_SYNC_PARAM89] = { 1, 0, receive_SyncParam },
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004302 [P_PROTOCOL] = { 1, sizeof(struct p_protocol), receive_protocol },
4303 [P_UUIDS] = { 0, sizeof(struct p_uuids), receive_uuids },
4304 [P_SIZES] = { 0, sizeof(struct p_sizes), receive_sizes },
4305 [P_STATE] = { 0, sizeof(struct p_state), receive_state },
4306 [P_STATE_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_state },
4307 [P_SYNC_UUID] = { 0, sizeof(struct p_rs_uuid), receive_sync_uuid },
4308 [P_OV_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
4309 [P_OV_REPLY] = { 1, sizeof(struct p_block_req), receive_DataRequest },
4310 [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest },
4311 [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip },
4312 [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
4313 [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_conn_state },
Philipp Reisner036b17e2011-05-16 17:38:11 +02004314 [P_PROTOCOL_UPDATE] = { 1, sizeof(struct p_protocol), receive_protocol },
Philipp Reisner02918be2010-08-20 14:35:10 +02004315};
4316
Philipp Reisnereefc2f72011-02-08 12:55:24 +01004317static void drbdd(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004318{
Philipp Reisner77351055b2011-02-07 17:24:26 +01004319 struct packet_info pi;
Philipp Reisner02918be2010-08-20 14:35:10 +02004320 size_t shs; /* sub header size */
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004321 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004322
Philipp Reisnereefc2f72011-02-08 12:55:24 +01004323 while (get_t_state(&tconn->receiver) == RUNNING) {
Andreas Gruenbacherdeebe192011-03-25 00:01:04 +01004324 struct data_cmd *cmd;
4325
Philipp Reisnereefc2f72011-02-08 12:55:24 +01004326 drbd_thread_current_set_cpu(&tconn->receiver);
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004327 if (drbd_recv_header(tconn, &pi))
Philipp Reisner02918be2010-08-20 14:35:10 +02004328 goto err_out;
4329
Andreas Gruenbacherdeebe192011-03-25 00:01:04 +01004330 cmd = &drbd_cmd_handler[pi.cmd];
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004331 if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) || !cmd->fn)) {
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004332 conn_err(tconn, "Unexpected data packet %s (0x%04x)",
4333 cmdname(pi.cmd), pi.cmd);
Philipp Reisner02918be2010-08-20 14:35:10 +02004334 goto err_out;
Lars Ellenberg0b33a912009-11-16 15:58:04 +01004335 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004336
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004337 shs = cmd->pkt_size;
4338 if (pi.size > shs && !cmd->expect_payload) {
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004339 conn_err(tconn, "No payload expected %s l:%d\n",
4340 cmdname(pi.cmd), pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02004341 goto err_out;
4342 }
4343
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02004344 if (shs) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004345 err = drbd_recv_all_warn(tconn, pi.data, shs);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004346 if (err)
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02004347 goto err_out;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004348 pi.size -= shs;
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02004349 }
4350
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004351 err = cmd->fn(tconn, &pi);
4352 if (err) {
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004353 conn_err(tconn, "error receiving %s, e: %d l: %d!\n",
4354 cmdname(pi.cmd), err, pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02004355 goto err_out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004356 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004357 }
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004358 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004359
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004360 err_out:
4361 conn_request_state(tconn, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004362}
4363
Philipp Reisner0e29d162011-02-18 14:23:11 +01004364void conn_flush_workqueue(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004365{
4366 struct drbd_wq_barrier barr;
4367
4368 barr.w.cb = w_prev_work_done;
Philipp Reisner0e29d162011-02-18 14:23:11 +01004369 barr.w.tconn = tconn;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004370 init_completion(&barr.done);
Philipp Reisner0e29d162011-02-18 14:23:11 +01004371 drbd_queue_work(&tconn->data.work, &barr.w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004372 wait_for_completion(&barr.done);
4373}
4374
Philipp Reisner81fa2e62011-05-04 15:10:30 +02004375static void conn_disconnect(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004376{
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02004377 struct drbd_conf *mdev;
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004378 enum drbd_conns oc;
Philipp Reisner376694a2011-11-07 10:54:28 +01004379 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004380
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004381 if (tconn->cstate == C_STANDALONE)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004382 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004383
Philipp Reisnerb8853db2011-12-13 11:09:16 +01004384 /* We are about to start the cleanup after connection loss.
4385 * Make sure drbd_make_request knows about that.
4386 * Usually we should be in some network failure state already,
4387 * but just in case we are not, we fix it up here.
4388 */
4389 conn_request_state(tconn, NS(conn, C_NETWORK_FAILURE), CS_HARD);
4390
Philipp Reisnerb411b362009-09-25 16:07:19 -07004391 /* asender does not clean up anything. it must not interfere, either */
Philipp Reisner360cc742011-02-08 14:29:53 +01004392 drbd_thread_stop(&tconn->asender);
4393 drbd_free_sock(tconn);
4394
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02004395 rcu_read_lock();
4396 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
4397 kref_get(&mdev->kref);
4398 rcu_read_unlock();
4399 drbd_disconnected(mdev);
4400 kref_put(&mdev->kref, &drbd_minor_destroy);
4401 rcu_read_lock();
4402 }
4403 rcu_read_unlock();
4404
Philipp Reisner12038a32011-11-09 19:18:00 +01004405 if (!list_empty(&tconn->current_epoch->list))
4406 conn_err(tconn, "ASSERTION FAILED: tconn->current_epoch->list not empty\n");
4407 /* ok, no more ee's on the fly, it is safe to reset the epoch_size */
4408 atomic_set(&tconn->current_epoch->epoch_size, 0);
4409
Philipp Reisner360cc742011-02-08 14:29:53 +01004410 conn_info(tconn, "Connection closed\n");
4411
Philipp Reisnercb703452011-03-24 11:03:07 +01004412 if (conn_highest_role(tconn) == R_PRIMARY && conn_highest_pdsk(tconn) >= D_UNKNOWN)
4413 conn_try_outdate_peer_async(tconn);
4414
Philipp Reisner360cc742011-02-08 14:29:53 +01004415 spin_lock_irq(&tconn->req_lock);
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004416 oc = tconn->cstate;
4417 if (oc >= C_UNCONNECTED)
Philipp Reisner376694a2011-11-07 10:54:28 +01004418 _conn_request_state(tconn, NS(conn, C_UNCONNECTED), CS_VERBOSE);
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004419
Philipp Reisner360cc742011-02-08 14:29:53 +01004420 spin_unlock_irq(&tconn->req_lock);
4421
Lars Ellenbergf3dfa402011-05-02 10:45:05 +02004422 if (oc == C_DISCONNECTING)
Lars Ellenbergd9cc6e22011-04-27 10:25:28 +02004423 conn_request_state(tconn, NS(conn, C_STANDALONE), CS_VERBOSE | CS_HARD);
Philipp Reisner360cc742011-02-08 14:29:53 +01004424}
4425
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02004426static int drbd_disconnected(struct drbd_conf *mdev)
Philipp Reisner360cc742011-02-08 14:29:53 +01004427{
Philipp Reisner360cc742011-02-08 14:29:53 +01004428 unsigned int i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004429
Philipp Reisner85719572010-07-21 10:20:17 +02004430 /* wait for current activity to cease. */
Philipp Reisner87eeee42011-01-19 14:16:30 +01004431 spin_lock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004432 _drbd_wait_ee_list_empty(mdev, &mdev->active_ee);
4433 _drbd_wait_ee_list_empty(mdev, &mdev->sync_ee);
4434 _drbd_wait_ee_list_empty(mdev, &mdev->read_ee);
Philipp Reisner87eeee42011-01-19 14:16:30 +01004435 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004436
4437 /* We do not have data structures that would allow us to
4438 * get the rs_pending_cnt down to 0 again.
4439 * * On C_SYNC_TARGET we do not have any data structures describing
4440 * the pending RSDataRequest's we have sent.
4441 * * On C_SYNC_SOURCE there is no data structure that tracks
4442 * the P_RS_DATA_REPLY blocks that we sent to the SyncTarget.
4443 * And no, it is not the sum of the reference counts in the
4444 * resync_LRU. The resync_LRU tracks the whole operation including
4445 * the disk-IO, while the rs_pending_cnt only tracks the blocks
4446 * on the fly. */
4447 drbd_rs_cancel_all(mdev);
4448 mdev->rs_total = 0;
4449 mdev->rs_failed = 0;
4450 atomic_set(&mdev->rs_pending_cnt, 0);
4451 wake_up(&mdev->misc_wait);
4452
Philipp Reisnerb411b362009-09-25 16:07:19 -07004453 del_timer_sync(&mdev->resync_timer);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004454 resync_timer_fn((unsigned long)mdev);
4455
Philipp Reisnerb411b362009-09-25 16:07:19 -07004456 /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
4457 * w_make_resync_request etc. which may still be on the worker queue
4458 * to be "canceled" */
Philipp Reisnera21e9292011-02-08 15:08:49 +01004459 drbd_flush_workqueue(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004460
Andreas Gruenbachera990be42011-04-06 17:56:48 +02004461 drbd_finish_peer_reqs(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004462
Philipp Reisnerd10b4ea2011-11-30 23:25:36 +01004463 /* This second workqueue flush is necessary, since drbd_finish_peer_reqs()
4464 might have issued a work again. The one before drbd_finish_peer_reqs() is
4465 necessary to reclain net_ee in drbd_finish_peer_reqs(). */
4466 drbd_flush_workqueue(mdev);
4467
Philipp Reisnerb411b362009-09-25 16:07:19 -07004468 kfree(mdev->p_uuid);
4469 mdev->p_uuid = NULL;
4470
Philipp Reisner2aebfab2011-03-28 16:48:11 +02004471 if (!drbd_suspended(mdev))
Philipp Reisner2f5cdd02011-02-21 14:29:27 +01004472 tl_clear(mdev->tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004473
Philipp Reisnerb411b362009-09-25 16:07:19 -07004474 drbd_md_sync(mdev);
4475
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004476 /* serialize with bitmap writeout triggered by the state change,
4477 * if any. */
4478 wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
4479
Philipp Reisnerb411b362009-09-25 16:07:19 -07004480 /* tcp_close and release of sendpage pages can be deferred. I don't
4481 * want to use SO_LINGER, because apparently it can be deferred for
4482 * more than 20 seconds (longest time I checked).
4483 *
4484 * Actually we don't care for exactly when the network stack does its
4485 * put_page(), but release our reference on these pages right here.
4486 */
Andreas Gruenbacher7721f562011-04-06 17:14:02 +02004487 i = drbd_free_peer_reqs(mdev, &mdev->net_ee);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004488 if (i)
4489 dev_info(DEV, "net_ee not empty, killed %u entries\n", i);
Lars Ellenberg435f0742010-09-06 12:30:25 +02004490 i = atomic_read(&mdev->pp_in_use_by_net);
4491 if (i)
4492 dev_info(DEV, "pp_in_use_by_net = %d, expected 0\n", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004493 i = atomic_read(&mdev->pp_in_use);
4494 if (i)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02004495 dev_info(DEV, "pp_in_use = %d, expected 0\n", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004496
4497 D_ASSERT(list_empty(&mdev->read_ee));
4498 D_ASSERT(list_empty(&mdev->active_ee));
4499 D_ASSERT(list_empty(&mdev->sync_ee));
4500 D_ASSERT(list_empty(&mdev->done_ee));
4501
Philipp Reisner360cc742011-02-08 14:29:53 +01004502 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004503}
4504
4505/*
4506 * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version
4507 * we can agree on is stored in agreed_pro_version.
4508 *
4509 * feature flags and the reserved array should be enough room for future
4510 * enhancements of the handshake protocol, and possible plugins...
4511 *
4512 * for now, they are expected to be zero, but ignored.
4513 */
Andreas Gruenbacher60381782011-03-28 17:05:50 +02004514static int drbd_send_features(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004515{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004516 struct drbd_socket *sock;
4517 struct p_connection_features *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004518
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004519 sock = &tconn->data;
4520 p = conn_prepare_command(tconn, sock);
4521 if (!p)
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004522 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004523 memset(p, 0, sizeof(*p));
4524 p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
4525 p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004526 return conn_send_command(tconn, sock, P_CONNECTION_FEATURES, sizeof(*p), NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004527}
4528
4529/*
4530 * return values:
4531 * 1 yes, we have a valid connection
4532 * 0 oops, did not work out, please try again
4533 * -1 peer talks different language,
4534 * no point in trying again, please go standalone.
4535 */
Andreas Gruenbacher60381782011-03-28 17:05:50 +02004536static int drbd_do_features(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004537{
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004538 /* ASSERT current == tconn->receiver ... */
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004539 struct p_connection_features *p;
4540 const int expect = sizeof(struct p_connection_features);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004541 struct packet_info pi;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004542 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004543
Andreas Gruenbacher60381782011-03-28 17:05:50 +02004544 err = drbd_send_features(tconn);
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004545 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004546 return 0;
4547
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004548 err = drbd_recv_header(tconn, &pi);
4549 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004550 return 0;
4551
Andreas Gruenbacher60381782011-03-28 17:05:50 +02004552 if (pi.cmd != P_CONNECTION_FEATURES) {
4553 conn_err(tconn, "expected ConnectionFeatures packet, received: %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004554 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004555 return -1;
4556 }
4557
Philipp Reisner77351055b2011-02-07 17:24:26 +01004558 if (pi.size != expect) {
Andreas Gruenbacher60381782011-03-28 17:05:50 +02004559 conn_err(tconn, "expected ConnectionFeatures length: %u, received: %u\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004560 expect, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004561 return -1;
4562 }
4563
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004564 p = pi.data;
4565 err = drbd_recv_all_warn(tconn, p, expect);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004566 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004567 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004568
Philipp Reisnerb411b362009-09-25 16:07:19 -07004569 p->protocol_min = be32_to_cpu(p->protocol_min);
4570 p->protocol_max = be32_to_cpu(p->protocol_max);
4571 if (p->protocol_max == 0)
4572 p->protocol_max = p->protocol_min;
4573
4574 if (PRO_VERSION_MAX < p->protocol_min ||
4575 PRO_VERSION_MIN > p->protocol_max)
4576 goto incompat;
4577
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004578 tconn->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004579
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004580 conn_info(tconn, "Handshake successful: "
4581 "Agreed network protocol version %d\n", tconn->agreed_pro_version);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004582
4583 return 1;
4584
4585 incompat:
Philipp Reisner65d11ed2011-02-07 17:35:59 +01004586 conn_err(tconn, "incompatible DRBD dialects: "
Philipp Reisnerb411b362009-09-25 16:07:19 -07004587 "I support %d-%d, peer supports %d-%d\n",
4588 PRO_VERSION_MIN, PRO_VERSION_MAX,
4589 p->protocol_min, p->protocol_max);
4590 return -1;
4591}
4592
4593#if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
Philipp Reisner13e60372011-02-08 09:54:40 +01004594static int drbd_do_auth(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004595{
4596 dev_err(DEV, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
4597 dev_err(DEV, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004598 return -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004599}
4600#else
4601#define CHALLENGE_LEN 64
Johannes Thomab10d96c2010-01-07 16:02:50 +01004602
4603/* Return value:
4604 1 - auth succeeded,
4605 0 - failed, try again (network error),
4606 -1 - auth failed, don't try again.
4607*/
4608
Philipp Reisner13e60372011-02-08 09:54:40 +01004609static int drbd_do_auth(struct drbd_tconn *tconn)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004610{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004611 struct drbd_socket *sock;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004612 char my_challenge[CHALLENGE_LEN]; /* 64 Bytes... */
4613 struct scatterlist sg;
4614 char *response = NULL;
4615 char *right_response = NULL;
4616 char *peers_ch = NULL;
Philipp Reisner44ed1672011-04-19 17:10:19 +02004617 unsigned int key_len;
4618 char secret[SHARED_SECRET_MAX]; /* 64 byte */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004619 unsigned int resp_size;
4620 struct hash_desc desc;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004621 struct packet_info pi;
Philipp Reisner44ed1672011-04-19 17:10:19 +02004622 struct net_conf *nc;
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004623 int err, rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004624
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004625 /* FIXME: Put the challenge/response into the preallocated socket buffer. */
4626
Philipp Reisner44ed1672011-04-19 17:10:19 +02004627 rcu_read_lock();
4628 nc = rcu_dereference(tconn->net_conf);
4629 key_len = strlen(nc->shared_secret);
4630 memcpy(secret, nc->shared_secret, key_len);
4631 rcu_read_unlock();
4632
Philipp Reisner13e60372011-02-08 09:54:40 +01004633 desc.tfm = tconn->cram_hmac_tfm;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004634 desc.flags = 0;
4635
Philipp Reisner44ed1672011-04-19 17:10:19 +02004636 rv = crypto_hash_setkey(tconn->cram_hmac_tfm, (u8 *)secret, key_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004637 if (rv) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004638 conn_err(tconn, "crypto_hash_setkey() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004639 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004640 goto fail;
4641 }
4642
4643 get_random_bytes(my_challenge, CHALLENGE_LEN);
4644
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004645 sock = &tconn->data;
4646 if (!conn_prepare_command(tconn, sock)) {
4647 rv = 0;
4648 goto fail;
4649 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004650 rv = !conn_send_command(tconn, sock, P_AUTH_CHALLENGE, 0,
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004651 my_challenge, CHALLENGE_LEN);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004652 if (!rv)
4653 goto fail;
4654
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004655 err = drbd_recv_header(tconn, &pi);
4656 if (err) {
4657 rv = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004658 goto fail;
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004659 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004660
Philipp Reisner77351055b2011-02-07 17:24:26 +01004661 if (pi.cmd != P_AUTH_CHALLENGE) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004662 conn_err(tconn, "expected AuthChallenge packet, received: %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004663 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004664 rv = 0;
4665 goto fail;
4666 }
4667
Philipp Reisner77351055b2011-02-07 17:24:26 +01004668 if (pi.size > CHALLENGE_LEN * 2) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004669 conn_err(tconn, "expected AuthChallenge payload too big.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004670 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004671 goto fail;
4672 }
4673
Philipp Reisner77351055b2011-02-07 17:24:26 +01004674 peers_ch = kmalloc(pi.size, GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004675 if (peers_ch == NULL) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004676 conn_err(tconn, "kmalloc of peers_ch failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004677 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004678 goto fail;
4679 }
4680
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004681 err = drbd_recv_all_warn(tconn, peers_ch, pi.size);
4682 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004683 rv = 0;
4684 goto fail;
4685 }
4686
Philipp Reisner13e60372011-02-08 09:54:40 +01004687 resp_size = crypto_hash_digestsize(tconn->cram_hmac_tfm);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004688 response = kmalloc(resp_size, GFP_NOIO);
4689 if (response == NULL) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004690 conn_err(tconn, "kmalloc of response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004691 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004692 goto fail;
4693 }
4694
4695 sg_init_table(&sg, 1);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004696 sg_set_buf(&sg, peers_ch, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004697
4698 rv = crypto_hash_digest(&desc, &sg, sg.length, response);
4699 if (rv) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004700 conn_err(tconn, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004701 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004702 goto fail;
4703 }
4704
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004705 if (!conn_prepare_command(tconn, sock)) {
4706 rv = 0;
4707 goto fail;
4708 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004709 rv = !conn_send_command(tconn, sock, P_AUTH_RESPONSE, 0,
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004710 response, resp_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004711 if (!rv)
4712 goto fail;
4713
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004714 err = drbd_recv_header(tconn, &pi);
4715 if (err) {
4716 rv = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004717 goto fail;
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004718 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004719
Philipp Reisner77351055b2011-02-07 17:24:26 +01004720 if (pi.cmd != P_AUTH_RESPONSE) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004721 conn_err(tconn, "expected AuthResponse packet, received: %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004722 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004723 rv = 0;
4724 goto fail;
4725 }
4726
Philipp Reisner77351055b2011-02-07 17:24:26 +01004727 if (pi.size != resp_size) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004728 conn_err(tconn, "expected AuthResponse payload of wrong size\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004729 rv = 0;
4730 goto fail;
4731 }
4732
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004733 err = drbd_recv_all_warn(tconn, response , resp_size);
4734 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004735 rv = 0;
4736 goto fail;
4737 }
4738
4739 right_response = kmalloc(resp_size, GFP_NOIO);
Julia Lawall2d1ee872009-12-27 22:27:11 +01004740 if (right_response == NULL) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004741 conn_err(tconn, "kmalloc of right_response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004742 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004743 goto fail;
4744 }
4745
4746 sg_set_buf(&sg, my_challenge, CHALLENGE_LEN);
4747
4748 rv = crypto_hash_digest(&desc, &sg, sg.length, right_response);
4749 if (rv) {
Philipp Reisner13e60372011-02-08 09:54:40 +01004750 conn_err(tconn, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004751 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004752 goto fail;
4753 }
4754
4755 rv = !memcmp(response, right_response, resp_size);
4756
4757 if (rv)
Philipp Reisner44ed1672011-04-19 17:10:19 +02004758 conn_info(tconn, "Peer authenticated using %d bytes HMAC\n",
4759 resp_size);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004760 else
4761 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004762
4763 fail:
4764 kfree(peers_ch);
4765 kfree(response);
4766 kfree(right_response);
4767
4768 return rv;
4769}
4770#endif
4771
4772int drbdd_init(struct drbd_thread *thi)
4773{
Philipp Reisner392c8802011-02-09 10:33:31 +01004774 struct drbd_tconn *tconn = thi->tconn;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004775 int h;
4776
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004777 conn_info(tconn, "receiver (re)started\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004778
4779 do {
Philipp Reisner81fa2e62011-05-04 15:10:30 +02004780 h = conn_connect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004781 if (h == 0) {
Philipp Reisner81fa2e62011-05-04 15:10:30 +02004782 conn_disconnect(tconn);
Philipp Reisner20ee6392011-01-18 15:28:59 +01004783 schedule_timeout_interruptible(HZ);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004784 }
4785 if (h == -1) {
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004786 conn_warn(tconn, "Discarding network configuration.\n");
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004787 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004788 }
4789 } while (h == 0);
4790
Philipp Reisner91fd4da2011-04-20 17:47:29 +02004791 if (h > 0)
4792 drbdd(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004793
Philipp Reisner81fa2e62011-05-04 15:10:30 +02004794 conn_disconnect(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004795
Philipp Reisner4d641dd2011-02-08 15:40:24 +01004796 conn_info(tconn, "receiver terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004797 return 0;
4798}
4799
4800/* ********* acknowledge sender ******** */
4801
Andreas Gruenbachere05e1e52011-03-25 15:16:26 +01004802static int got_conn_RqSReply(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004803{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004804 struct p_req_state_reply *p = pi->data;
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004805 int retcode = be32_to_cpu(p->retcode);
4806
4807 if (retcode >= SS_SUCCESS) {
4808 set_bit(CONN_WD_ST_CHG_OKAY, &tconn->flags);
4809 } else {
4810 set_bit(CONN_WD_ST_CHG_FAIL, &tconn->flags);
4811 conn_err(tconn, "Requested state change failed by peer: %s (%d)\n",
4812 drbd_set_st_err_str(retcode), retcode);
4813 }
4814 wake_up(&tconn->ping_wait);
4815
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004816 return 0;
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004817}
4818
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004819static int got_RqSReply(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004820{
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004821 struct drbd_conf *mdev;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004822 struct p_req_state_reply *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004823 int retcode = be32_to_cpu(p->retcode);
4824
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004825 mdev = vnr_to_mdev(tconn, pi->vnr);
4826 if (!mdev)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004827 return -EIO;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004828
Philipp Reisner4d0fc3f2012-01-20 13:52:27 +01004829 if (test_bit(CONN_WD_ST_CHG_REQ, &tconn->flags)) {
4830 D_ASSERT(tconn->agreed_pro_version < 100);
4831 return got_conn_RqSReply(tconn, pi);
4832 }
4833
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004834 if (retcode >= SS_SUCCESS) {
4835 set_bit(CL_ST_CHG_SUCCESS, &mdev->flags);
4836 } else {
4837 set_bit(CL_ST_CHG_FAIL, &mdev->flags);
4838 dev_err(DEV, "Requested state change failed by peer: %s (%d)\n",
4839 drbd_set_st_err_str(retcode), retcode);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004840 }
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004841 wake_up(&mdev->state_wait);
4842
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004843 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004844}
4845
Andreas Gruenbachere05e1e52011-03-25 15:16:26 +01004846static int got_Ping(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004847{
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004848 return drbd_send_ping_ack(tconn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004849
4850}
4851
Andreas Gruenbachere05e1e52011-03-25 15:16:26 +01004852static int got_PingAck(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004853{
4854 /* restore idle timeout */
Philipp Reisner2a67d8b2011-02-09 14:10:32 +01004855 tconn->meta.socket->sk->sk_rcvtimeo = tconn->net_conf->ping_int*HZ;
4856 if (!test_and_set_bit(GOT_PING_ACK, &tconn->flags))
4857 wake_up(&tconn->ping_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004858
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004859 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004860}
4861
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004862static int got_IsInSync(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004863{
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004864 struct drbd_conf *mdev;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004865 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004866 sector_t sector = be64_to_cpu(p->sector);
4867 int blksize = be32_to_cpu(p->blksize);
4868
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004869 mdev = vnr_to_mdev(tconn, pi->vnr);
4870 if (!mdev)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004871 return -EIO;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004872
Philipp Reisner31890f42011-01-19 14:12:51 +01004873 D_ASSERT(mdev->tconn->agreed_pro_version >= 89);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004874
4875 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4876
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004877 if (get_ldev(mdev)) {
4878 drbd_rs_complete_io(mdev, sector);
4879 drbd_set_in_sync(mdev, sector, blksize);
4880 /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
4881 mdev->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
4882 put_ldev(mdev);
4883 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004884 dec_rs_pending(mdev);
Philipp Reisner778f2712010-07-06 11:14:00 +02004885 atomic_add(blksize >> 9, &mdev->rs_sect_in);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004886
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004887 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004888}
4889
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004890static int
4891validate_req_change_req_state(struct drbd_conf *mdev, u64 id, sector_t sector,
4892 struct rb_root *root, const char *func,
4893 enum drbd_req_event what, bool missing_ok)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004894{
4895 struct drbd_request *req;
4896 struct bio_and_error m;
4897
Philipp Reisner87eeee42011-01-19 14:16:30 +01004898 spin_lock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01004899 req = find_request(mdev, root, id, sector, missing_ok, func);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004900 if (unlikely(!req)) {
Philipp Reisner87eeee42011-01-19 14:16:30 +01004901 spin_unlock_irq(&mdev->tconn->req_lock);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02004902 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004903 }
4904 __req_mod(req, what, &m);
Philipp Reisner87eeee42011-01-19 14:16:30 +01004905 spin_unlock_irq(&mdev->tconn->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004906
4907 if (m.bio)
4908 complete_master_bio(mdev, &m);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02004909 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004910}
4911
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004912static int got_BlockAck(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004913{
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004914 struct drbd_conf *mdev;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004915 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004916 sector_t sector = be64_to_cpu(p->sector);
4917 int blksize = be32_to_cpu(p->blksize);
4918 enum drbd_req_event what;
4919
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004920 mdev = vnr_to_mdev(tconn, pi->vnr);
4921 if (!mdev)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004922 return -EIO;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004923
Philipp Reisnerb411b362009-09-25 16:07:19 -07004924 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4925
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01004926 if (p->block_id == ID_SYNCER) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004927 drbd_set_in_sync(mdev, sector, blksize);
4928 dec_rs_pending(mdev);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004929 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004930 }
Andreas Gruenbachere05e1e52011-03-25 15:16:26 +01004931 switch (pi->cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004932 case P_RS_WRITE_ACK:
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004933 what = WRITE_ACKED_BY_PEER_AND_SIS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004934 break;
4935 case P_WRITE_ACK:
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004936 what = WRITE_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004937 break;
4938 case P_RECV_ACK:
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004939 what = RECV_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004940 break;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01004941 case P_DISCARD_WRITE:
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01004942 what = DISCARD_WRITE;
4943 break;
4944 case P_RETRY_WRITE:
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01004945 what = POSTPONE_WRITE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004946 break;
4947 default:
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004948 BUG();
Philipp Reisnerb411b362009-09-25 16:07:19 -07004949 }
4950
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004951 return validate_req_change_req_state(mdev, p->block_id, sector,
4952 &mdev->write_requests, __func__,
4953 what, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004954}
4955
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004956static int got_NegAck(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004957{
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004958 struct drbd_conf *mdev;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004959 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004960 sector_t sector = be64_to_cpu(p->sector);
Philipp Reisner2deb8332011-01-17 18:39:18 +01004961 int size = be32_to_cpu(p->blksize);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02004962 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004963
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004964 mdev = vnr_to_mdev(tconn, pi->vnr);
4965 if (!mdev)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004966 return -EIO;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004967
Philipp Reisnerb411b362009-09-25 16:07:19 -07004968 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
4969
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01004970 if (p->block_id == ID_SYNCER) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004971 dec_rs_pending(mdev);
4972 drbd_rs_failed_io(mdev, sector, size);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004973 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004974 }
Philipp Reisner2deb8332011-01-17 18:39:18 +01004975
Andreas Gruenbacher85997672011-04-04 13:09:15 +02004976 err = validate_req_change_req_state(mdev, p->block_id, sector,
4977 &mdev->write_requests, __func__,
Philipp Reisner303d1442011-04-13 16:24:47 -07004978 NEG_ACKED, true);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02004979 if (err) {
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004980 /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs.
4981 The master bio might already be completed, therefore the
4982 request is no longer in the collision hash. */
4983 /* In Protocol B we might already have got a P_RECV_ACK
4984 but then get a P_NEG_ACK afterwards. */
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01004985 drbd_set_out_of_sync(mdev, sector, size);
Philipp Reisner2deb8332011-01-17 18:39:18 +01004986 }
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004987 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004988}
4989
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004990static int got_NegDReply(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004991{
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004992 struct drbd_conf *mdev;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004993 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004994 sector_t sector = be64_to_cpu(p->sector);
4995
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004996 mdev = vnr_to_mdev(tconn, pi->vnr);
4997 if (!mdev)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004998 return -EIO;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004999
Philipp Reisnerb411b362009-09-25 16:07:19 -07005000 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01005001
Philipp Reisner380207d2011-11-11 12:31:20 +01005002 dev_err(DEV, "Got NegDReply; Sector %llus, len %u.\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07005003 (unsigned long long)sector, be32_to_cpu(p->blksize));
5004
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005005 return validate_req_change_req_state(mdev, p->block_id, sector,
5006 &mdev->read_requests, __func__,
5007 NEG_ACKED, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005008}
5009
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005010static int got_NegRSDReply(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005011{
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005012 struct drbd_conf *mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005013 sector_t sector;
5014 int size;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005015 struct p_block_ack *p = pi->data;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005016
5017 mdev = vnr_to_mdev(tconn, pi->vnr);
5018 if (!mdev)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005019 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005020
5021 sector = be64_to_cpu(p->sector);
5022 size = be32_to_cpu(p->blksize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005023
5024 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
5025
5026 dec_rs_pending(mdev);
5027
5028 if (get_ldev_if_state(mdev, D_FAILED)) {
5029 drbd_rs_complete_io(mdev, sector);
Andreas Gruenbachere05e1e52011-03-25 15:16:26 +01005030 switch (pi->cmd) {
Philipp Reisnerd612d302010-12-27 10:53:28 +01005031 case P_NEG_RS_DREPLY:
5032 drbd_rs_failed_io(mdev, sector, size);
5033 case P_RS_CANCEL:
5034 break;
5035 default:
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005036 BUG();
Philipp Reisnerd612d302010-12-27 10:53:28 +01005037 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005038 put_ldev(mdev);
5039 }
5040
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005041 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005042}
5043
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005044static int got_BarrierAck(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005045{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005046 struct p_barrier_ack *p = pi->data;
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005047 struct drbd_conf *mdev;
5048 int vnr;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005049
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005050 tl_release(tconn, p->barrier, be32_to_cpu(p->set_size));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005051
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005052 rcu_read_lock();
5053 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
5054 if (mdev->state.conn == C_AHEAD &&
5055 atomic_read(&mdev->ap_in_flight) == 0 &&
5056 !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &mdev->flags)) {
5057 mdev->start_resync_timer.expires = jiffies + HZ;
5058 add_timer(&mdev->start_resync_timer);
5059 }
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02005060 }
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005061 rcu_read_unlock();
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02005062
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005063 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005064}
5065
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005066static int got_OVResult(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005067{
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005068 struct drbd_conf *mdev;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005069 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005070 struct drbd_work *w;
5071 sector_t sector;
5072 int size;
5073
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005074 mdev = vnr_to_mdev(tconn, pi->vnr);
5075 if (!mdev)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005076 return -EIO;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005077
Philipp Reisnerb411b362009-09-25 16:07:19 -07005078 sector = be64_to_cpu(p->sector);
5079 size = be32_to_cpu(p->blksize);
5080
5081 update_peer_seq(mdev, be32_to_cpu(p->seq_num));
5082
5083 if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
Andreas Gruenbacher8f7bed72010-12-19 23:53:14 +01005084 drbd_ov_out_of_sync_found(mdev, sector, size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005085 else
Andreas Gruenbacher8f7bed72010-12-19 23:53:14 +01005086 ov_out_of_sync_print(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005087
Lars Ellenberg1d53f092010-09-05 01:13:24 +02005088 if (!get_ldev(mdev))
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005089 return 0;
Lars Ellenberg1d53f092010-09-05 01:13:24 +02005090
Philipp Reisnerb411b362009-09-25 16:07:19 -07005091 drbd_rs_complete_io(mdev, sector);
5092 dec_rs_pending(mdev);
5093
Lars Ellenbergea5442a2010-11-05 09:48:01 +01005094 --mdev->ov_left;
5095
5096 /* let's advance progress step marks only for every other megabyte */
5097 if ((mdev->ov_left & 0x200) == 0x200)
5098 drbd_advance_rs_marks(mdev, mdev->ov_left);
5099
5100 if (mdev->ov_left == 0) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07005101 w = kmalloc(sizeof(*w), GFP_NOIO);
5102 if (w) {
5103 w->cb = w_ov_finished;
Philipp Reisnera21e9292011-02-08 15:08:49 +01005104 w->mdev = mdev;
Philipp Reisnere42325a2011-01-19 13:55:45 +01005105 drbd_queue_work_front(&mdev->tconn->data.work, w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005106 } else {
5107 dev_err(DEV, "kmalloc(w) failed.");
Andreas Gruenbacher8f7bed72010-12-19 23:53:14 +01005108 ov_out_of_sync_print(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005109 drbd_resync_finished(mdev);
5110 }
5111 }
Lars Ellenberg1d53f092010-09-05 01:13:24 +02005112 put_ldev(mdev);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005113 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005114}
5115
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005116static int got_skip(struct drbd_tconn *tconn, struct packet_info *pi)
Philipp Reisner0ced55a2010-04-30 15:26:20 +02005117{
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005118 return 0;
Philipp Reisner0ced55a2010-04-30 15:26:20 +02005119}
5120
Andreas Gruenbachera990be42011-04-06 17:56:48 +02005121static int tconn_finish_peer_reqs(struct drbd_tconn *tconn)
Philipp Reisner32862ec2011-02-08 16:41:01 +01005122{
Philipp Reisner082a3432011-03-15 16:05:42 +01005123 struct drbd_conf *mdev;
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005124 int vnr, not_empty = 0;
Philipp Reisner32862ec2011-02-08 16:41:01 +01005125
5126 do {
5127 clear_bit(SIGNAL_ASENDER, &tconn->flags);
5128 flush_signals(current);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005129
5130 rcu_read_lock();
5131 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
5132 kref_get(&mdev->kref);
5133 rcu_read_unlock();
Philipp Reisnerd3fcb492011-04-13 14:46:05 -07005134 if (drbd_finish_peer_reqs(mdev)) {
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005135 kref_put(&mdev->kref, &drbd_minor_destroy);
5136 return 1;
Philipp Reisnerd3fcb492011-04-13 14:46:05 -07005137 }
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005138 kref_put(&mdev->kref, &drbd_minor_destroy);
5139 rcu_read_lock();
Philipp Reisner082a3432011-03-15 16:05:42 +01005140 }
Philipp Reisner32862ec2011-02-08 16:41:01 +01005141 set_bit(SIGNAL_ASENDER, &tconn->flags);
Philipp Reisner082a3432011-03-15 16:05:42 +01005142
5143 spin_lock_irq(&tconn->req_lock);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005144 idr_for_each_entry(&tconn->volumes, mdev, vnr) {
Philipp Reisner082a3432011-03-15 16:05:42 +01005145 not_empty = !list_empty(&mdev->done_ee);
5146 if (not_empty)
5147 break;
5148 }
5149 spin_unlock_irq(&tconn->req_lock);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005150 rcu_read_unlock();
Philipp Reisner32862ec2011-02-08 16:41:01 +01005151 } while (not_empty);
5152
5153 return 0;
5154}
5155
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01005156struct asender_cmd {
5157 size_t pkt_size;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005158 int (*fn)(struct drbd_tconn *tconn, struct packet_info *);
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01005159};
5160
5161static struct asender_cmd asender_tbl[] = {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005162 [P_PING] = { 0, got_Ping },
5163 [P_PING_ACK] = { 0, got_PingAck },
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005164 [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
5165 [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
5166 [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
5167 [P_DISCARD_WRITE] = { sizeof(struct p_block_ack), got_BlockAck },
5168 [P_NEG_ACK] = { sizeof(struct p_block_ack), got_NegAck },
5169 [P_NEG_DREPLY] = { sizeof(struct p_block_ack), got_NegDReply },
5170 [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), got_NegRSDReply },
5171 [P_OV_RESULT] = { sizeof(struct p_block_ack), got_OVResult },
5172 [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck },
5173 [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
5174 [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync },
5175 [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip },
5176 [P_RS_CANCEL] = { sizeof(struct p_block_ack), got_NegRSDReply },
5177 [P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), got_conn_RqSReply },
5178 [P_RETRY_WRITE] = { sizeof(struct p_block_ack), got_BlockAck },
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01005179};
5180
Philipp Reisnerb411b362009-09-25 16:07:19 -07005181int drbd_asender(struct drbd_thread *thi)
5182{
Philipp Reisner392c8802011-02-09 10:33:31 +01005183 struct drbd_tconn *tconn = thi->tconn;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005184 struct asender_cmd *cmd = NULL;
Philipp Reisner77351055b2011-02-07 17:24:26 +01005185 struct packet_info pi;
Philipp Reisner257d0af2011-01-26 12:15:29 +01005186 int rv;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005187 void *buf = tconn->meta.rbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005188 int received = 0;
Andreas Gruenbacher52b061a2011-03-30 11:38:49 +02005189 unsigned int header_size = drbd_header_size(tconn);
5190 int expect = header_size;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005191 bool ping_timeout_active = false;
5192 struct net_conf *nc;
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005193 int ping_timeo, tcp_cork, ping_int;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005194
Philipp Reisnerb411b362009-09-25 16:07:19 -07005195 current->policy = SCHED_RR; /* Make this a realtime task! */
5196 current->rt_priority = 2; /* more important than all other tasks */
5197
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +01005198 while (get_t_state(thi) == RUNNING) {
Philipp Reisner80822282011-02-08 12:46:30 +01005199 drbd_thread_current_set_cpu(thi);
Philipp Reisner44ed1672011-04-19 17:10:19 +02005200
5201 rcu_read_lock();
5202 nc = rcu_dereference(tconn->net_conf);
5203 ping_timeo = nc->ping_timeo;
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005204 tcp_cork = nc->tcp_cork;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005205 ping_int = nc->ping_int;
5206 rcu_read_unlock();
5207
Philipp Reisner32862ec2011-02-08 16:41:01 +01005208 if (test_and_clear_bit(SEND_PING, &tconn->flags)) {
Andreas Gruenbachera17647a2011-04-01 12:49:42 +02005209 if (drbd_send_ping(tconn)) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01005210 conn_err(tconn, "drbd_send_ping has failed\n");
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01005211 goto reconnect;
5212 }
Philipp Reisner44ed1672011-04-19 17:10:19 +02005213 tconn->meta.socket->sk->sk_rcvtimeo = ping_timeo * HZ / 10;
5214 ping_timeout_active = true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005215 }
5216
Philipp Reisner32862ec2011-02-08 16:41:01 +01005217 /* TODO: conditionally cork; it may hurt latency if we cork without
5218 much to send */
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005219 if (tcp_cork)
Philipp Reisner32862ec2011-02-08 16:41:01 +01005220 drbd_tcp_cork(tconn->meta.socket);
Andreas Gruenbachera990be42011-04-06 17:56:48 +02005221 if (tconn_finish_peer_reqs(tconn)) {
5222 conn_err(tconn, "tconn_finish_peer_reqs() failed\n");
Philipp Reisner32862ec2011-02-08 16:41:01 +01005223 goto reconnect;
Philipp Reisner082a3432011-03-15 16:05:42 +01005224 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005225 /* but unconditionally uncork unless disabled */
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005226 if (tcp_cork)
Philipp Reisner32862ec2011-02-08 16:41:01 +01005227 drbd_tcp_uncork(tconn->meta.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005228
5229 /* short circuit, recv_msg would return EINTR anyways. */
5230 if (signal_pending(current))
5231 continue;
5232
Philipp Reisner32862ec2011-02-08 16:41:01 +01005233 rv = drbd_recv_short(tconn->meta.socket, buf, expect-received, 0);
5234 clear_bit(SIGNAL_ASENDER, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005235
5236 flush_signals(current);
5237
5238 /* Note:
5239 * -EINTR (on meta) we got a signal
5240 * -EAGAIN (on meta) rcvtimeo expired
5241 * -ECONNRESET other side closed the connection
5242 * -ERESTARTSYS (on data) we got a signal
5243 * rv < 0 other than above: unexpected error!
5244 * rv == expected: full header or command
5245 * rv < expected: "woken" by signal during receive
5246 * rv == 0 : "connection shut down by peer"
5247 */
5248 if (likely(rv > 0)) {
5249 received += rv;
5250 buf += rv;
5251 } else if (rv == 0) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01005252 conn_err(tconn, "meta connection shut down by peer.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005253 goto reconnect;
5254 } else if (rv == -EAGAIN) {
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02005255 /* If the data socket received something meanwhile,
5256 * that is good enough: peer is still alive. */
Philipp Reisner32862ec2011-02-08 16:41:01 +01005257 if (time_after(tconn->last_received,
5258 jiffies - tconn->meta.socket->sk->sk_rcvtimeo))
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02005259 continue;
Lars Ellenbergf36af182011-03-09 22:44:55 +01005260 if (ping_timeout_active) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01005261 conn_err(tconn, "PingAck did not arrive in time.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005262 goto reconnect;
5263 }
Philipp Reisner32862ec2011-02-08 16:41:01 +01005264 set_bit(SEND_PING, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005265 continue;
5266 } else if (rv == -EINTR) {
5267 continue;
5268 } else {
Philipp Reisner32862ec2011-02-08 16:41:01 +01005269 conn_err(tconn, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005270 goto reconnect;
5271 }
5272
5273 if (received == expect && cmd == NULL) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005274 if (decode_header(tconn, tconn->meta.rbuf, &pi))
Philipp Reisnerb411b362009-09-25 16:07:19 -07005275 goto reconnect;
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01005276 cmd = &asender_tbl[pi.cmd];
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005277 if (pi.cmd >= ARRAY_SIZE(asender_tbl) || !cmd->fn) {
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02005278 conn_err(tconn, "Unexpected meta packet %s (0x%04x)\n",
5279 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005280 goto disconnect;
5281 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005282 expect = header_size + cmd->pkt_size;
Andreas Gruenbacher52b061a2011-03-30 11:38:49 +02005283 if (pi.size != expect - header_size) {
Philipp Reisner32862ec2011-02-08 16:41:01 +01005284 conn_err(tconn, "Wrong packet size on meta (c: %d, l: %d)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01005285 pi.cmd, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005286 goto reconnect;
Philipp Reisner257d0af2011-01-26 12:15:29 +01005287 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005288 }
5289 if (received == expect) {
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005290 bool err;
Philipp Reisnera4fbda82011-03-16 11:13:17 +01005291
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005292 err = cmd->fn(tconn, &pi);
5293 if (err) {
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005294 conn_err(tconn, "%pf failed\n", cmd->fn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005295 goto reconnect;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005296 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005297
Philipp Reisnera4fbda82011-03-16 11:13:17 +01005298 tconn->last_received = jiffies;
5299
Philipp Reisner44ed1672011-04-19 17:10:19 +02005300 if (cmd == &asender_tbl[P_PING_ACK]) {
5301 /* restore idle timeout */
5302 tconn->meta.socket->sk->sk_rcvtimeo = ping_int * HZ;
5303 ping_timeout_active = false;
5304 }
Lars Ellenbergf36af182011-03-09 22:44:55 +01005305
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005306 buf = tconn->meta.rbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005307 received = 0;
Andreas Gruenbacher52b061a2011-03-30 11:38:49 +02005308 expect = header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005309 cmd = NULL;
5310 }
5311 }
5312
5313 if (0) {
5314reconnect:
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01005315 conn_request_state(tconn, NS(conn, C_NETWORK_FAILURE), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005316 }
5317 if (0) {
5318disconnect:
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01005319 conn_request_state(tconn, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005320 }
Philipp Reisner32862ec2011-02-08 16:41:01 +01005321 clear_bit(SIGNAL_ASENDER, &tconn->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005322
Philipp Reisner32862ec2011-02-08 16:41:01 +01005323 conn_info(tconn, "asender terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005324
5325 return 0;
5326}