blob: 10d2dcb16bff3affc5c03d45c236d7e25408a835 [file] [log] [blame]
Philipp Reisnerb411b362009-09-25 16:07:19 -07001/*
2 drbd_receiver.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23 */
24
25
Philipp Reisnerb411b362009-09-25 16:07:19 -070026#include <linux/module.h>
27
28#include <asm/uaccess.h>
29#include <net/sock.h>
30
Philipp Reisnerb411b362009-09-25 16:07:19 -070031#include <linux/drbd.h>
32#include <linux/fs.h>
33#include <linux/file.h>
34#include <linux/in.h>
35#include <linux/mm.h>
36#include <linux/memcontrol.h>
37#include <linux/mm_inline.h>
38#include <linux/slab.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070039#include <linux/pkt_sched.h>
40#define __KERNEL_SYSCALLS__
41#include <linux/unistd.h>
42#include <linux/vmalloc.h>
43#include <linux/random.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070044#include <linux/string.h>
45#include <linux/scatterlist.h>
46#include "drbd_int.h"
Andreas Gruenbachera3603a62011-05-30 11:47:37 +020047#include "drbd_protocol.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070048#include "drbd_req.h"
49
50#include "drbd_vli.h"
51
Philipp Reisner77351055b2011-02-07 17:24:26 +010052struct packet_info {
53 enum drbd_packet cmd;
Andreas Gruenbachere2857212011-03-25 00:57:38 +010054 unsigned int size;
55 unsigned int vnr;
Andreas Gruenbachere6589832011-03-30 12:54:42 +020056 void *data;
Philipp Reisner77351055b2011-02-07 17:24:26 +010057};
58
Philipp Reisnerb411b362009-09-25 16:07:19 -070059enum finish_epoch {
60 FE_STILL_LIVE,
61 FE_DESTROYED,
62 FE_RECYCLED,
63};
64
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +020065static int drbd_do_features(struct drbd_connection *connection);
66static int drbd_do_auth(struct drbd_connection *connection);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +020067static int drbd_disconnected(struct drbd_peer_device *);
Philipp Reisnerb411b362009-09-25 16:07:19 -070068
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +020069static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *, struct drbd_epoch *, enum epoch_event);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +010070static int e_end_block(struct drbd_work *, int);
Philipp Reisnerb411b362009-09-25 16:07:19 -070071
Philipp Reisnerb411b362009-09-25 16:07:19 -070072
73#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
74
Lars Ellenberg45bb9122010-05-14 17:10:48 +020075/*
76 * some helper functions to deal with single linked page lists,
77 * page->private being our "next" pointer.
78 */
79
80/* If at least n pages are linked at head, get n pages off.
81 * Otherwise, don't modify head, and return NULL.
82 * Locking is the responsibility of the caller.
83 */
84static struct page *page_chain_del(struct page **head, int n)
85{
86 struct page *page;
87 struct page *tmp;
88
89 BUG_ON(!n);
90 BUG_ON(!head);
91
92 page = *head;
Philipp Reisner23ce4222010-05-20 13:35:31 +020093
94 if (!page)
95 return NULL;
96
Lars Ellenberg45bb9122010-05-14 17:10:48 +020097 while (page) {
98 tmp = page_chain_next(page);
99 if (--n == 0)
100 break; /* found sufficient pages */
101 if (tmp == NULL)
102 /* insufficient pages, don't use any of them. */
103 return NULL;
104 page = tmp;
105 }
106
107 /* add end of list marker for the returned list */
108 set_page_private(page, 0);
109 /* actual return value, and adjustment of head */
110 page = *head;
111 *head = tmp;
112 return page;
113}
114
115/* may be used outside of locks to find the tail of a (usually short)
116 * "private" page chain, before adding it back to a global chain head
117 * with page_chain_add() under a spinlock. */
118static struct page *page_chain_tail(struct page *page, int *len)
119{
120 struct page *tmp;
121 int i = 1;
122 while ((tmp = page_chain_next(page)))
123 ++i, page = tmp;
124 if (len)
125 *len = i;
126 return page;
127}
128
129static int page_chain_free(struct page *page)
130{
131 struct page *tmp;
132 int i = 0;
133 page_chain_for_each_safe(page, tmp) {
134 put_page(page);
135 ++i;
136 }
137 return i;
138}
139
140static void page_chain_add(struct page **head,
141 struct page *chain_first, struct page *chain_last)
142{
143#if 1
144 struct page *tmp;
145 tmp = page_chain_tail(chain_first, NULL);
146 BUG_ON(tmp != chain_last);
147#endif
148
149 /* add chain to head */
150 set_page_private(chain_last, (unsigned long)*head);
151 *head = chain_first;
152}
153
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200154static struct page *__drbd_alloc_pages(struct drbd_device *device,
Andreas Gruenbacher18c2d522011-04-07 21:08:50 +0200155 unsigned int number)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700156{
157 struct page *page = NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200158 struct page *tmp = NULL;
Andreas Gruenbacher18c2d522011-04-07 21:08:50 +0200159 unsigned int i = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700160
161 /* Yes, testing drbd_pp_vacant outside the lock is racy.
162 * So what. It saves a spin_lock. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200163 if (drbd_pp_vacant >= number) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700164 spin_lock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200165 page = page_chain_del(&drbd_pp_pool, number);
166 if (page)
167 drbd_pp_vacant -= number;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700168 spin_unlock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200169 if (page)
170 return page;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700171 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200172
Philipp Reisnerb411b362009-09-25 16:07:19 -0700173 /* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD
174 * "criss-cross" setup, that might cause write-out on some other DRBD,
175 * which in turn might block on the other node at this very place. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200176 for (i = 0; i < number; i++) {
177 tmp = alloc_page(GFP_TRY);
178 if (!tmp)
179 break;
180 set_page_private(tmp, (unsigned long)page);
181 page = tmp;
182 }
183
184 if (i == number)
185 return page;
186
187 /* Not enough pages immediately available this time.
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200188 * No need to jump around here, drbd_alloc_pages will retry this
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200189 * function "soon". */
190 if (page) {
191 tmp = page_chain_tail(page, NULL);
192 spin_lock(&drbd_pp_lock);
193 page_chain_add(&drbd_pp_pool, page, tmp);
194 drbd_pp_vacant += i;
195 spin_unlock(&drbd_pp_lock);
196 }
197 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700198}
199
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200200static void reclaim_finished_net_peer_reqs(struct drbd_device *device,
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200201 struct list_head *to_be_freed)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700202{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200203 struct drbd_peer_request *peer_req, *tmp;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700204
205 /* The EEs are always appended to the end of the list. Since
206 they are sent in order over the wire, they have to finish
207 in order. As soon as we see the first not finished we can
208 stop to examine the list... */
209
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200210 list_for_each_entry_safe(peer_req, tmp, &device->net_ee, w.list) {
Andreas Gruenbacher045417f2011-04-07 21:34:24 +0200211 if (drbd_peer_req_has_active_page(peer_req))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700212 break;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200213 list_move(&peer_req->w.list, to_be_freed);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700214 }
215}
216
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200217static void drbd_kick_lo_and_reclaim_net(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700218{
219 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100220 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700221
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200222 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200223 reclaim_finished_net_peer_reqs(device, &reclaimed);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200224 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700225
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200226 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200227 drbd_free_net_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700228}
229
230/**
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200231 * drbd_alloc_pages() - Returns @number pages, retries forever (or until signalled)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200232 * @device: DRBD device.
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200233 * @number: number of pages requested
234 * @retry: whether to retry, if not enough pages are available right now
Philipp Reisnerb411b362009-09-25 16:07:19 -0700235 *
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200236 * Tries to allocate number pages, first from our own page pool, then from
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200237 * the kernel.
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200238 * Possibly retry until DRBD frees sufficient pages somewhere else.
239 *
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200240 * If this allocation would exceed the max_buffers setting, we throttle
241 * allocation (schedule_timeout) to give the system some room to breathe.
242 *
243 * We do not use max-buffers as hard limit, because it could lead to
244 * congestion and further to a distributed deadlock during online-verify or
245 * (checksum based) resync, if the max-buffers, socket buffer sizes and
246 * resync-rate settings are mis-configured.
247 *
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200248 * Returns a page chain linked via page->private.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700249 */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200250struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int number,
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200251 bool retry)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700252{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200253 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700254 struct page *page = NULL;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200255 struct net_conf *nc;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700256 DEFINE_WAIT(wait);
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200257 unsigned int mxb;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700258
Philipp Reisner44ed1672011-04-19 17:10:19 +0200259 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200260 nc = rcu_dereference(peer_device->connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200261 mxb = nc ? nc->max_buffers : 1000000;
262 rcu_read_unlock();
263
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200264 if (atomic_read(&device->pp_in_use) < mxb)
265 page = __drbd_alloc_pages(device, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700266
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200267 while (page == NULL) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700268 prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
269
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200270 drbd_kick_lo_and_reclaim_net(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700271
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200272 if (atomic_read(&device->pp_in_use) < mxb) {
273 page = __drbd_alloc_pages(device, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700274 if (page)
275 break;
276 }
277
278 if (!retry)
279 break;
280
281 if (signal_pending(current)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200282 drbd_warn(device, "drbd_alloc_pages interrupted!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700283 break;
284 }
285
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200286 if (schedule_timeout(HZ/10) == 0)
287 mxb = UINT_MAX;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700288 }
289 finish_wait(&drbd_pp_wait, &wait);
290
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200291 if (page)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200292 atomic_add(number, &device->pp_in_use);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700293 return page;
294}
295
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200296/* Must not be used from irq, as that may deadlock: see drbd_alloc_pages.
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200297 * Is also used from inside an other spin_lock_irq(&resource->req_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200298 * Either links the page chain back to the global pool,
299 * or returns all pages to the system. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200300static void drbd_free_pages(struct drbd_device *device, struct page *page, int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700301{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200302 atomic_t *a = is_net ? &device->pp_in_use_by_net : &device->pp_in_use;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700303 int i;
Lars Ellenberg435f0742010-09-06 12:30:25 +0200304
Lars Ellenberga73ff322012-06-25 19:15:38 +0200305 if (page == NULL)
306 return;
307
Philipp Reisner81a5d602011-02-22 19:53:16 -0500308 if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200309 i = page_chain_free(page);
310 else {
311 struct page *tmp;
312 tmp = page_chain_tail(page, &i);
313 spin_lock(&drbd_pp_lock);
314 page_chain_add(&drbd_pp_pool, page, tmp);
315 drbd_pp_vacant += i;
316 spin_unlock(&drbd_pp_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700317 }
Lars Ellenberg435f0742010-09-06 12:30:25 +0200318 i = atomic_sub_return(i, a);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200319 if (i < 0)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200320 drbd_warn(device, "ASSERTION FAILED: %s: %d < 0\n",
Lars Ellenberg435f0742010-09-06 12:30:25 +0200321 is_net ? "pp_in_use_by_net" : "pp_in_use", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700322 wake_up(&drbd_pp_wait);
323}
324
325/*
326You need to hold the req_lock:
327 _drbd_wait_ee_list_empty()
328
329You must not have the req_lock:
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +0200330 drbd_free_peer_req()
Andreas Gruenbacher0db55362011-04-06 16:09:15 +0200331 drbd_alloc_peer_req()
Andreas Gruenbacher7721f562011-04-06 17:14:02 +0200332 drbd_free_peer_reqs()
Philipp Reisnerb411b362009-09-25 16:07:19 -0700333 drbd_ee_fix_bhs()
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200334 drbd_finish_peer_reqs()
Philipp Reisnerb411b362009-09-25 16:07:19 -0700335 drbd_clear_done_ee()
336 drbd_wait_ee_list_empty()
337*/
338
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100339struct drbd_peer_request *
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200340drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
Andreas Gruenbacher0db55362011-04-06 16:09:15 +0200341 unsigned int data_size, gfp_t gfp_mask) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700342{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200343 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100344 struct drbd_peer_request *peer_req;
Lars Ellenberga73ff322012-06-25 19:15:38 +0200345 struct page *page = NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200346 unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700347
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200348 if (drbd_insert_fault(device, DRBD_FAULT_AL_EE))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700349 return NULL;
350
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100351 peer_req = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
352 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700353 if (!(gfp_mask & __GFP_NOWARN))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200354 drbd_err(device, "%s: allocation failed\n", __func__);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700355 return NULL;
356 }
357
Lars Ellenberga73ff322012-06-25 19:15:38 +0200358 if (data_size) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200359 page = drbd_alloc_pages(peer_device, nr_pages, (gfp_mask & __GFP_WAIT));
Lars Ellenberga73ff322012-06-25 19:15:38 +0200360 if (!page)
361 goto fail;
362 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700363
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100364 drbd_clear_interval(&peer_req->i);
365 peer_req->i.size = data_size;
366 peer_req->i.sector = sector;
367 peer_req->i.local = false;
368 peer_req->i.waiting = false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700369
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100370 peer_req->epoch = NULL;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200371 peer_req->peer_device = peer_device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100372 peer_req->pages = page;
373 atomic_set(&peer_req->pending_bios, 0);
374 peer_req->flags = 0;
Andreas Gruenbacher9a8e7752011-01-11 14:04:09 +0100375 /*
376 * The block_id is opaque to the receiver. It is not endianness
377 * converted, and sent back to the sender unchanged.
378 */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100379 peer_req->block_id = id;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700380
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100381 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700382
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200383 fail:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100384 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700385 return NULL;
386}
387
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200388void __drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request *peer_req,
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100389 int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700390{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100391 if (peer_req->flags & EE_HAS_DIGEST)
392 kfree(peer_req->digest);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200393 drbd_free_pages(device, peer_req->pages, is_net);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +0200394 D_ASSERT(device, atomic_read(&peer_req->pending_bios) == 0);
395 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100396 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700397}
398
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200399int drbd_free_peer_reqs(struct drbd_device *device, struct list_head *list)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700400{
401 LIST_HEAD(work_list);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100402 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700403 int count = 0;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200404 int is_net = list == &device->net_ee;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700405
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200406 spin_lock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700407 list_splice_init(list, &work_list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200408 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700409
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200410 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200411 __drbd_free_peer_req(device, peer_req, is_net);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700412 count++;
413 }
414 return count;
415}
416
Philipp Reisnerb411b362009-09-25 16:07:19 -0700417/*
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200418 * See also comments in _req_mod(,BARRIER_ACKED) and receive_Barrier.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700419 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200420static int drbd_finish_peer_reqs(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700421{
422 LIST_HEAD(work_list);
423 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100424 struct drbd_peer_request *peer_req, *t;
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100425 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700426
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200427 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200428 reclaim_finished_net_peer_reqs(device, &reclaimed);
429 list_splice_init(&device->done_ee, &work_list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200430 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700431
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200432 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200433 drbd_free_net_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700434
435 /* possible callbacks here:
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +0200436 * e_end_block, and e_end_resync_block, e_send_superseded.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700437 * all ignore the last argument.
438 */
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200439 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100440 int err2;
441
Philipp Reisnerb411b362009-09-25 16:07:19 -0700442 /* list_del not necessary, next/prev members not touched */
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200443 err2 = peer_req->w.cb(&peer_req->w, !!err);
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100444 if (!err)
445 err = err2;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200446 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700447 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200448 wake_up(&device->ee_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700449
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100450 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700451}
452
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200453static void _drbd_wait_ee_list_empty(struct drbd_device *device,
Andreas Gruenbacherd4da1532011-04-07 00:06:56 +0200454 struct list_head *head)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700455{
456 DEFINE_WAIT(wait);
457
458 /* avoids spin_lock/unlock
459 * and calling prepare_to_wait in the fast path */
460 while (!list_empty(head)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200461 prepare_to_wait(&device->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200462 spin_unlock_irq(&device->resource->req_lock);
Jens Axboe7eaceac2011-03-10 08:52:07 +0100463 io_schedule();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200464 finish_wait(&device->ee_wait, &wait);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200465 spin_lock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700466 }
467}
468
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200469static void drbd_wait_ee_list_empty(struct drbd_device *device,
Andreas Gruenbacherd4da1532011-04-07 00:06:56 +0200470 struct list_head *head)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700471{
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200472 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200473 _drbd_wait_ee_list_empty(device, head);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200474 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700475}
476
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100477static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700478{
Philipp Reisnerb411b362009-09-25 16:07:19 -0700479 struct kvec iov = {
480 .iov_base = buf,
481 .iov_len = size,
482 };
483 struct msghdr msg = {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700484 .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
485 };
Al Virof730c842014-02-08 21:07:38 -0500486 return kernel_recvmsg(sock, &msg, &iov, 1, size, msg.msg_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700487}
488
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200489static int drbd_recv(struct drbd_connection *connection, void *buf, size_t size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700490{
Philipp Reisnerb411b362009-09-25 16:07:19 -0700491 int rv;
492
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200493 rv = drbd_recv_short(connection->data.socket, buf, size, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700494
Philipp Reisnerdbd08202012-08-17 16:55:47 +0200495 if (rv < 0) {
496 if (rv == -ECONNRESET)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200497 drbd_info(connection, "sock was reset by peer\n");
Philipp Reisnerdbd08202012-08-17 16:55:47 +0200498 else if (rv != -ERESTARTSYS)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200499 drbd_err(connection, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerdbd08202012-08-17 16:55:47 +0200500 } else if (rv == 0) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200501 if (test_bit(DISCONNECT_SENT, &connection->flags)) {
Philipp Reisnerb66623e2012-08-08 21:19:09 +0200502 long t;
503 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200504 t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
Philipp Reisnerb66623e2012-08-08 21:19:09 +0200505 rcu_read_unlock();
506
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200507 t = wait_event_timeout(connection->ping_wait, connection->cstate < C_WF_REPORT_PARAMS, t);
Philipp Reisnerb66623e2012-08-08 21:19:09 +0200508
Philipp Reisner599377a2012-08-17 14:50:22 +0200509 if (t)
510 goto out;
511 }
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200512 drbd_info(connection, "sock was shut down by peer\n");
Philipp Reisner599377a2012-08-17 14:50:22 +0200513 }
514
Philipp Reisnerb411b362009-09-25 16:07:19 -0700515 if (rv != size)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200516 conn_request_state(connection, NS(conn, C_BROKEN_PIPE), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700517
Philipp Reisner599377a2012-08-17 14:50:22 +0200518out:
Philipp Reisnerb411b362009-09-25 16:07:19 -0700519 return rv;
520}
521
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200522static int drbd_recv_all(struct drbd_connection *connection, void *buf, size_t size)
Andreas Gruenbacherc6967742011-03-17 17:15:20 +0100523{
524 int err;
525
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200526 err = drbd_recv(connection, buf, size);
Andreas Gruenbacherc6967742011-03-17 17:15:20 +0100527 if (err != size) {
528 if (err >= 0)
529 err = -EIO;
530 } else
531 err = 0;
532 return err;
533}
534
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200535static int drbd_recv_all_warn(struct drbd_connection *connection, void *buf, size_t size)
Andreas Gruenbachera5c31902011-03-24 03:28:04 +0100536{
537 int err;
538
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200539 err = drbd_recv_all(connection, buf, size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +0100540 if (err && !signal_pending(current))
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200541 drbd_warn(connection, "short read (expected size %d)\n", (int)size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +0100542 return err;
543}
544
Lars Ellenberg5dbf1672010-05-25 16:18:01 +0200545/* quoting tcp(7):
546 * On individual connections, the socket buffer size must be set prior to the
547 * listen(2) or connect(2) calls in order to have it take effect.
548 * This is our wrapper to do so.
549 */
550static void drbd_setbufsize(struct socket *sock, unsigned int snd,
551 unsigned int rcv)
552{
553 /* open coded SO_SNDBUF, SO_RCVBUF */
554 if (snd) {
555 sock->sk->sk_sndbuf = snd;
556 sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
557 }
558 if (rcv) {
559 sock->sk->sk_rcvbuf = rcv;
560 sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
561 }
562}
563
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200564static struct socket *drbd_try_connect(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700565{
566 const char *what;
567 struct socket *sock;
568 struct sockaddr_in6 src_in6;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200569 struct sockaddr_in6 peer_in6;
570 struct net_conf *nc;
571 int err, peer_addr_len, my_addr_len;
Andreas Gruenbacher69ef82d2011-05-11 14:34:35 +0200572 int sndbuf_size, rcvbuf_size, connect_int;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700573 int disconnect_on_error = 1;
574
Philipp Reisner44ed1672011-04-19 17:10:19 +0200575 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200576 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200577 if (!nc) {
578 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700579 return NULL;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200580 }
Philipp Reisner44ed1672011-04-19 17:10:19 +0200581 sndbuf_size = nc->sndbuf_size;
582 rcvbuf_size = nc->rcvbuf_size;
Andreas Gruenbacher69ef82d2011-05-11 14:34:35 +0200583 connect_int = nc->connect_int;
Andreas Gruenbacher089c0752011-06-14 18:28:09 +0200584 rcu_read_unlock();
Philipp Reisner44ed1672011-04-19 17:10:19 +0200585
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200586 my_addr_len = min_t(int, connection->my_addr_len, sizeof(src_in6));
587 memcpy(&src_in6, &connection->my_addr, my_addr_len);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200588
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200589 if (((struct sockaddr *)&connection->my_addr)->sa_family == AF_INET6)
Philipp Reisner44ed1672011-04-19 17:10:19 +0200590 src_in6.sin6_port = 0;
591 else
592 ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
593
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200594 peer_addr_len = min_t(int, connection->peer_addr_len, sizeof(src_in6));
595 memcpy(&peer_in6, &connection->peer_addr, peer_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700596
597 what = "sock_create_kern";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200598 err = sock_create_kern(((struct sockaddr *)&src_in6)->sa_family,
599 SOCK_STREAM, IPPROTO_TCP, &sock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700600 if (err < 0) {
601 sock = NULL;
602 goto out;
603 }
604
605 sock->sk->sk_rcvtimeo =
Andreas Gruenbacher69ef82d2011-05-11 14:34:35 +0200606 sock->sk->sk_sndtimeo = connect_int * HZ;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200607 drbd_setbufsize(sock, sndbuf_size, rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700608
609 /* explicitly bind to the configured IP as source IP
610 * for the outgoing connections.
611 * This is needed for multihomed hosts and to be
612 * able to use lo: interfaces for drbd.
613 * Make sure to use 0 as port number, so linux selects
614 * a free one dynamically.
615 */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700616 what = "bind before connect";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200617 err = sock->ops->bind(sock, (struct sockaddr *) &src_in6, my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700618 if (err < 0)
619 goto out;
620
621 /* connect may fail, peer not yet available.
622 * stay C_WF_CONNECTION, don't go Disconnecting! */
623 disconnect_on_error = 0;
624 what = "connect";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200625 err = sock->ops->connect(sock, (struct sockaddr *) &peer_in6, peer_addr_len, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700626
627out:
628 if (err < 0) {
629 if (sock) {
630 sock_release(sock);
631 sock = NULL;
632 }
633 switch (-err) {
634 /* timeout, busy, signal pending */
635 case ETIMEDOUT: case EAGAIN: case EINPROGRESS:
636 case EINTR: case ERESTARTSYS:
637 /* peer not (yet) available, network problem */
638 case ECONNREFUSED: case ENETUNREACH:
639 case EHOSTDOWN: case EHOSTUNREACH:
640 disconnect_on_error = 0;
641 break;
642 default:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200643 drbd_err(connection, "%s failed, err = %d\n", what, err);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700644 }
645 if (disconnect_on_error)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200646 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700647 }
Philipp Reisner44ed1672011-04-19 17:10:19 +0200648
Philipp Reisnerb411b362009-09-25 16:07:19 -0700649 return sock;
650}
651
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200652struct accept_wait_data {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200653 struct drbd_connection *connection;
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200654 struct socket *s_listen;
655 struct completion door_bell;
656 void (*original_sk_state_change)(struct sock *sk);
657
658};
659
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200660static void drbd_incoming_connection(struct sock *sk)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700661{
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200662 struct accept_wait_data *ad = sk->sk_user_data;
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200663 void (*state_change)(struct sock *sk);
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200664
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200665 state_change = ad->original_sk_state_change;
666 if (sk->sk_state == TCP_ESTABLISHED)
667 complete(&ad->door_bell);
668 state_change(sk);
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200669}
670
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200671static int prepare_listen_socket(struct drbd_connection *connection, struct accept_wait_data *ad)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700672{
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200673 int err, sndbuf_size, rcvbuf_size, my_addr_len;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200674 struct sockaddr_in6 my_addr;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200675 struct socket *s_listen;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200676 struct net_conf *nc;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700677 const char *what;
678
Philipp Reisner44ed1672011-04-19 17:10:19 +0200679 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200680 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200681 if (!nc) {
682 rcu_read_unlock();
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200683 return -EIO;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200684 }
Philipp Reisner44ed1672011-04-19 17:10:19 +0200685 sndbuf_size = nc->sndbuf_size;
686 rcvbuf_size = nc->rcvbuf_size;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200687 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700688
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200689 my_addr_len = min_t(int, connection->my_addr_len, sizeof(struct sockaddr_in6));
690 memcpy(&my_addr, &connection->my_addr, my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700691
692 what = "sock_create_kern";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200693 err = sock_create_kern(((struct sockaddr *)&my_addr)->sa_family,
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200694 SOCK_STREAM, IPPROTO_TCP, &s_listen);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700695 if (err) {
696 s_listen = NULL;
697 goto out;
698 }
699
Philipp Reisner98683652012-11-09 14:18:43 +0100700 s_listen->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
Philipp Reisner44ed1672011-04-19 17:10:19 +0200701 drbd_setbufsize(s_listen, sndbuf_size, rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700702
703 what = "bind before listen";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200704 err = s_listen->ops->bind(s_listen, (struct sockaddr *)&my_addr, my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700705 if (err < 0)
706 goto out;
707
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200708 ad->s_listen = s_listen;
709 write_lock_bh(&s_listen->sk->sk_callback_lock);
710 ad->original_sk_state_change = s_listen->sk->sk_state_change;
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200711 s_listen->sk->sk_state_change = drbd_incoming_connection;
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200712 s_listen->sk->sk_user_data = ad;
713 write_unlock_bh(&s_listen->sk->sk_callback_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700714
Philipp Reisner2820fd32012-07-12 10:22:48 +0200715 what = "listen";
716 err = s_listen->ops->listen(s_listen, 5);
717 if (err < 0)
718 goto out;
719
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200720 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700721out:
722 if (s_listen)
723 sock_release(s_listen);
724 if (err < 0) {
725 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200726 drbd_err(connection, "%s failed, err = %d\n", what, err);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200727 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700728 }
729 }
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200730
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200731 return -EIO;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200732}
733
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200734static void unregister_state_change(struct sock *sk, struct accept_wait_data *ad)
735{
736 write_lock_bh(&sk->sk_callback_lock);
737 sk->sk_state_change = ad->original_sk_state_change;
738 sk->sk_user_data = NULL;
739 write_unlock_bh(&sk->sk_callback_lock);
740}
741
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200742static struct socket *drbd_wait_for_connect(struct drbd_connection *connection, struct accept_wait_data *ad)
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200743{
744 int timeo, connect_int, err = 0;
745 struct socket *s_estab = NULL;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200746 struct net_conf *nc;
747
748 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200749 nc = rcu_dereference(connection->net_conf);
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200750 if (!nc) {
751 rcu_read_unlock();
752 return NULL;
753 }
754 connect_int = nc->connect_int;
755 rcu_read_unlock();
756
757 timeo = connect_int * HZ;
Akinobu Mita38b682b22013-04-29 16:21:31 -0700758 /* 28.5% random jitter */
759 timeo += (prandom_u32() & 1) ? timeo / 7 : -timeo / 7;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200760
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200761 err = wait_for_completion_interruptible_timeout(&ad->door_bell, timeo);
762 if (err <= 0)
763 return NULL;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200764
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200765 err = kernel_accept(ad->s_listen, &s_estab, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700766 if (err < 0) {
767 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200768 drbd_err(connection, "accept failed, err = %d\n", err);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200769 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700770 }
771 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700772
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200773 if (s_estab)
774 unregister_state_change(s_estab->sk, ad);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700775
776 return s_estab;
777}
778
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200779static int decode_header(struct drbd_connection *, void *, struct packet_info *);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700780
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200781static int send_first_packet(struct drbd_connection *connection, struct drbd_socket *sock,
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200782 enum drbd_packet cmd)
783{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200784 if (!conn_prepare_command(connection, sock))
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200785 return -EIO;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200786 return conn_send_command(connection, sock, cmd, 0, NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700787}
788
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200789static int receive_first_packet(struct drbd_connection *connection, struct socket *sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700790{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200791 unsigned int header_size = drbd_header_size(connection);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200792 struct packet_info pi;
793 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700794
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200795 err = drbd_recv_short(sock, connection->data.rbuf, header_size, 0);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200796 if (err != header_size) {
797 if (err >= 0)
798 err = -EIO;
799 return err;
800 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200801 err = decode_header(connection, connection->data.rbuf, &pi);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200802 if (err)
803 return err;
804 return pi.cmd;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700805}
806
807/**
808 * drbd_socket_okay() - Free the socket if its connection is not okay
Philipp Reisnerb411b362009-09-25 16:07:19 -0700809 * @sock: pointer to the pointer to the socket.
810 */
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100811static int drbd_socket_okay(struct socket **sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700812{
813 int rr;
814 char tb[4];
815
816 if (!*sock)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100817 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700818
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100819 rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700820
821 if (rr > 0 || rr == -EAGAIN) {
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100822 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700823 } else {
824 sock_release(*sock);
825 *sock = NULL;
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100826 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700827 }
828}
Philipp Reisner2325eb62011-03-15 16:56:18 +0100829/* Gets called if a connection is established, or if a new minor gets created
830 in a connection */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200831int drbd_connected(struct drbd_peer_device *peer_device)
Philipp Reisner907599e2011-02-08 11:25:37 +0100832{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200833 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100834 int err;
Philipp Reisner907599e2011-02-08 11:25:37 +0100835
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200836 atomic_set(&device->packet_seq, 0);
837 device->peer_seq = 0;
Philipp Reisner907599e2011-02-08 11:25:37 +0100838
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200839 device->state_mutex = peer_device->connection->agreed_pro_version < 100 ?
840 &peer_device->connection->cstate_mutex :
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200841 &device->own_state_mutex;
Philipp Reisner8410da82011-02-11 20:11:10 +0100842
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200843 err = drbd_send_sync_param(peer_device);
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100844 if (!err)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200845 err = drbd_send_sizes(peer_device, 0, 0);
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100846 if (!err)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200847 err = drbd_send_uuids(peer_device);
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100848 if (!err)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200849 err = drbd_send_current_state(peer_device);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200850 clear_bit(USE_DEGR_WFC_T, &device->flags);
851 clear_bit(RESIZE_PENDING, &device->flags);
852 atomic_set(&device->ap_in_flight, 0);
853 mod_timer(&device->request_timer, jiffies + HZ); /* just start it here. */
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100854 return err;
Philipp Reisner907599e2011-02-08 11:25:37 +0100855}
Philipp Reisnerb411b362009-09-25 16:07:19 -0700856
857/*
858 * return values:
859 * 1 yes, we have a valid connection
860 * 0 oops, did not work out, please try again
861 * -1 peer talks different language,
862 * no point in trying again, please go standalone.
863 * -2 We do not have a network config...
864 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200865static int conn_connect(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700866{
Philipp Reisner7da35862011-12-19 22:42:56 +0100867 struct drbd_socket sock, msock;
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +0200868 struct drbd_peer_device *peer_device;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200869 struct net_conf *nc;
Philipp Reisner92f14952012-08-01 11:41:01 +0200870 int vnr, timeout, h, ok;
Philipp Reisner08b165b2011-09-05 16:22:33 +0200871 bool discard_my_data;
Philipp Reisner197296f2012-03-26 16:47:11 +0200872 enum drbd_state_rv rv;
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200873 struct accept_wait_data ad = {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200874 .connection = connection,
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200875 .door_bell = COMPLETION_INITIALIZER_ONSTACK(ad.door_bell),
876 };
Philipp Reisnerb411b362009-09-25 16:07:19 -0700877
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200878 clear_bit(DISCONNECT_SENT, &connection->flags);
879 if (conn_request_state(connection, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700880 return -2;
881
Philipp Reisner7da35862011-12-19 22:42:56 +0100882 mutex_init(&sock.mutex);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200883 sock.sbuf = connection->data.sbuf;
884 sock.rbuf = connection->data.rbuf;
Philipp Reisner7da35862011-12-19 22:42:56 +0100885 sock.socket = NULL;
886 mutex_init(&msock.mutex);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200887 msock.sbuf = connection->meta.sbuf;
888 msock.rbuf = connection->meta.rbuf;
Philipp Reisner7da35862011-12-19 22:42:56 +0100889 msock.socket = NULL;
890
Andreas Gruenbacher0916e0e2011-03-21 14:10:15 +0100891 /* Assume that the peer only understands protocol 80 until we know better. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200892 connection->agreed_pro_version = 80;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700893
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200894 if (prepare_listen_socket(connection, &ad))
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200895 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700896
897 do {
Andreas Gruenbacher2bf89622011-03-28 16:33:12 +0200898 struct socket *s;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700899
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200900 s = drbd_try_connect(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700901 if (s) {
Philipp Reisner7da35862011-12-19 22:42:56 +0100902 if (!sock.socket) {
903 sock.socket = s;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200904 send_first_packet(connection, &sock, P_INITIAL_DATA);
Philipp Reisner7da35862011-12-19 22:42:56 +0100905 } else if (!msock.socket) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200906 clear_bit(RESOLVE_CONFLICTS, &connection->flags);
Philipp Reisner7da35862011-12-19 22:42:56 +0100907 msock.socket = s;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200908 send_first_packet(connection, &msock, P_INITIAL_META);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700909 } else {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200910 drbd_err(connection, "Logic error in conn_connect()\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700911 goto out_release_sockets;
912 }
913 }
914
Philipp Reisner7da35862011-12-19 22:42:56 +0100915 if (sock.socket && msock.socket) {
916 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200917 nc = rcu_dereference(connection->net_conf);
Philipp Reisner7da35862011-12-19 22:42:56 +0100918 timeout = nc->ping_timeo * HZ / 10;
919 rcu_read_unlock();
920 schedule_timeout_interruptible(timeout);
921 ok = drbd_socket_okay(&sock.socket);
922 ok = drbd_socket_okay(&msock.socket) && ok;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700923 if (ok)
924 break;
925 }
926
927retry:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200928 s = drbd_wait_for_connect(connection, &ad);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700929 if (s) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200930 int fp = receive_first_packet(connection, s);
Philipp Reisner7da35862011-12-19 22:42:56 +0100931 drbd_socket_okay(&sock.socket);
932 drbd_socket_okay(&msock.socket);
Philipp Reisner92f14952012-08-01 11:41:01 +0200933 switch (fp) {
Andreas Gruenbachere5d6f332011-03-28 16:44:40 +0200934 case P_INITIAL_DATA:
Philipp Reisner7da35862011-12-19 22:42:56 +0100935 if (sock.socket) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200936 drbd_warn(connection, "initial packet S crossed\n");
Philipp Reisner7da35862011-12-19 22:42:56 +0100937 sock_release(sock.socket);
Philipp Reisner80c6eed2012-08-01 14:53:39 +0200938 sock.socket = s;
939 goto randomize;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700940 }
Philipp Reisner7da35862011-12-19 22:42:56 +0100941 sock.socket = s;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700942 break;
Andreas Gruenbachere5d6f332011-03-28 16:44:40 +0200943 case P_INITIAL_META:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200944 set_bit(RESOLVE_CONFLICTS, &connection->flags);
Philipp Reisner7da35862011-12-19 22:42:56 +0100945 if (msock.socket) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200946 drbd_warn(connection, "initial packet M crossed\n");
Philipp Reisner7da35862011-12-19 22:42:56 +0100947 sock_release(msock.socket);
Philipp Reisner80c6eed2012-08-01 14:53:39 +0200948 msock.socket = s;
949 goto randomize;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700950 }
Philipp Reisner7da35862011-12-19 22:42:56 +0100951 msock.socket = s;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700952 break;
953 default:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200954 drbd_warn(connection, "Error receiving initial packet\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700955 sock_release(s);
Philipp Reisner80c6eed2012-08-01 14:53:39 +0200956randomize:
Akinobu Mita38b682b22013-04-29 16:21:31 -0700957 if (prandom_u32() & 1)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700958 goto retry;
959 }
960 }
961
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200962 if (connection->cstate <= C_DISCONNECTING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700963 goto out_release_sockets;
964 if (signal_pending(current)) {
965 flush_signals(current);
966 smp_rmb();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200967 if (get_t_state(&connection->receiver) == EXITING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700968 goto out_release_sockets;
969 }
970
Philipp Reisnerb666dbf2012-07-26 14:12:59 +0200971 ok = drbd_socket_okay(&sock.socket);
972 ok = drbd_socket_okay(&msock.socket) && ok;
973 } while (!ok);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700974
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200975 if (ad.s_listen)
976 sock_release(ad.s_listen);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700977
Philipp Reisner98683652012-11-09 14:18:43 +0100978 sock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
979 msock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700980
Philipp Reisner7da35862011-12-19 22:42:56 +0100981 sock.socket->sk->sk_allocation = GFP_NOIO;
982 msock.socket->sk->sk_allocation = GFP_NOIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700983
Philipp Reisner7da35862011-12-19 22:42:56 +0100984 sock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
985 msock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700986
Philipp Reisnerb411b362009-09-25 16:07:19 -0700987 /* NOT YET ...
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200988 * sock.socket->sk->sk_sndtimeo = connection->net_conf->timeout*HZ/10;
Philipp Reisner7da35862011-12-19 22:42:56 +0100989 * sock.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
Andreas Gruenbacher60381782011-03-28 17:05:50 +0200990 * first set it to the P_CONNECTION_FEATURES timeout,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700991 * which we set to 4x the configured ping_timeout. */
Philipp Reisner44ed1672011-04-19 17:10:19 +0200992 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200993 nc = rcu_dereference(connection->net_conf);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700994
Philipp Reisner7da35862011-12-19 22:42:56 +0100995 sock.socket->sk->sk_sndtimeo =
996 sock.socket->sk->sk_rcvtimeo = nc->ping_timeo*4*HZ/10;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200997
Philipp Reisner7da35862011-12-19 22:42:56 +0100998 msock.socket->sk->sk_rcvtimeo = nc->ping_int*HZ;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200999 timeout = nc->timeout * HZ / 10;
Philipp Reisner08b165b2011-09-05 16:22:33 +02001000 discard_my_data = nc->discard_my_data;
Philipp Reisner44ed1672011-04-19 17:10:19 +02001001 rcu_read_unlock();
1002
Philipp Reisner7da35862011-12-19 22:42:56 +01001003 msock.socket->sk->sk_sndtimeo = timeout;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001004
1005 /* we don't want delays.
Lucas De Marchi25985ed2011-03-30 22:57:33 -03001006 * we use TCP_CORK where appropriate, though */
Philipp Reisner7da35862011-12-19 22:42:56 +01001007 drbd_tcp_nodelay(sock.socket);
1008 drbd_tcp_nodelay(msock.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001009
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001010 connection->data.socket = sock.socket;
1011 connection->meta.socket = msock.socket;
1012 connection->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001013
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001014 h = drbd_do_features(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001015 if (h <= 0)
1016 return h;
1017
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001018 if (connection->cram_hmac_tfm) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001019 /* drbd_request_state(device, NS(conn, WFAuth)); */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001020 switch (drbd_do_auth(connection)) {
Johannes Thomab10d96c2010-01-07 16:02:50 +01001021 case -1:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001022 drbd_err(connection, "Authentication of peer failed\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001023 return -1;
Johannes Thomab10d96c2010-01-07 16:02:50 +01001024 case 0:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001025 drbd_err(connection, "Authentication of peer failed, trying again.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01001026 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001027 }
1028 }
1029
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001030 connection->data.socket->sk->sk_sndtimeo = timeout;
1031 connection->data.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001032
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001033 if (drbd_send_protocol(connection) == -EOPNOTSUPP)
Philipp Reisner7e2455c2010-04-22 14:50:23 +02001034 return -1;
Philipp Reisner1e86ac42011-08-04 10:33:08 +02001035
Philipp Reisner31007742014-04-28 18:43:12 +02001036 /* Prevent a race between resync-handshake and
1037 * being promoted to Primary.
1038 *
1039 * Grab and release the state mutex, so we know that any current
1040 * drbd_set_role() is finished, and any incoming drbd_set_role
1041 * will see the STATE_SENT flag, and wait for it to be cleared.
1042 */
1043 idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
1044 mutex_lock(peer_device->device->state_mutex);
1045
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001046 set_bit(STATE_SENT, &connection->flags);
Philipp Reisner197296f2012-03-26 16:47:11 +02001047
Philipp Reisner31007742014-04-28 18:43:12 +02001048 idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
1049 mutex_unlock(peer_device->device->state_mutex);
1050
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02001051 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001052 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1053 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001054 kref_get(&device->kref);
Andreas Gruenbacher26ea8f92013-06-25 16:50:03 +02001055 rcu_read_unlock();
1056
Philipp Reisner08b165b2011-09-05 16:22:33 +02001057 if (discard_my_data)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001058 set_bit(DISCARD_MY_DATA, &device->flags);
Philipp Reisner08b165b2011-09-05 16:22:33 +02001059 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001060 clear_bit(DISCARD_MY_DATA, &device->flags);
Philipp Reisner08b165b2011-09-05 16:22:33 +02001061
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001062 drbd_connected(peer_device);
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001063 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02001064 rcu_read_lock();
1065 }
1066 rcu_read_unlock();
1067
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001068 rv = conn_request_state(connection, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE);
1069 if (rv < SS_SUCCESS || connection->cstate != C_WF_REPORT_PARAMS) {
1070 clear_bit(STATE_SENT, &connection->flags);
Philipp Reisner1e86ac42011-08-04 10:33:08 +02001071 return 0;
Philipp Reisnera1096a62012-04-06 12:07:34 +02001072 }
Philipp Reisner1e86ac42011-08-04 10:33:08 +02001073
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001074 drbd_thread_start(&connection->asender);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001075
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001076 mutex_lock(&connection->resource->conf_update);
Philipp Reisner08b165b2011-09-05 16:22:33 +02001077 /* The discard_my_data flag is a single-shot modifier to the next
1078 * connection attempt, the handshake of which is now well underway.
1079 * No need for rcu style copying of the whole struct
1080 * just to clear a single value. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001081 connection->net_conf->discard_my_data = 0;
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001082 mutex_unlock(&connection->resource->conf_update);
Philipp Reisner08b165b2011-09-05 16:22:33 +02001083
Philipp Reisnerd3fcb492011-04-13 14:46:05 -07001084 return h;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001085
1086out_release_sockets:
Philipp Reisner7a426fd2012-07-12 14:22:37 +02001087 if (ad.s_listen)
1088 sock_release(ad.s_listen);
Philipp Reisner7da35862011-12-19 22:42:56 +01001089 if (sock.socket)
1090 sock_release(sock.socket);
1091 if (msock.socket)
1092 sock_release(msock.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001093 return -1;
1094}
1095
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001096static int decode_header(struct drbd_connection *connection, void *header, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001097{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001098 unsigned int header_size = drbd_header_size(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001099
Andreas Gruenbacher0c8e36d2011-03-30 16:00:17 +02001100 if (header_size == sizeof(struct p_header100) &&
1101 *(__be32 *)header == cpu_to_be32(DRBD_MAGIC_100)) {
1102 struct p_header100 *h = header;
1103 if (h->pad != 0) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001104 drbd_err(connection, "Header padding is not zero\n");
Andreas Gruenbacher0c8e36d2011-03-30 16:00:17 +02001105 return -EINVAL;
1106 }
1107 pi->vnr = be16_to_cpu(h->volume);
1108 pi->cmd = be16_to_cpu(h->command);
1109 pi->size = be32_to_cpu(h->length);
1110 } else if (header_size == sizeof(struct p_header95) &&
1111 *(__be16 *)header == cpu_to_be16(DRBD_MAGIC_BIG)) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001112 struct p_header95 *h = header;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001113 pi->cmd = be16_to_cpu(h->command);
Andreas Gruenbacherb55d84b2011-03-22 13:17:47 +01001114 pi->size = be32_to_cpu(h->length);
1115 pi->vnr = 0;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001116 } else if (header_size == sizeof(struct p_header80) &&
1117 *(__be32 *)header == cpu_to_be32(DRBD_MAGIC)) {
1118 struct p_header80 *h = header;
1119 pi->cmd = be16_to_cpu(h->command);
1120 pi->size = be16_to_cpu(h->length);
Philipp Reisner77351055b2011-02-07 17:24:26 +01001121 pi->vnr = 0;
Philipp Reisner02918be2010-08-20 14:35:10 +02001122 } else {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001123 drbd_err(connection, "Wrong magic value 0x%08x in protocol version %d\n",
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001124 be32_to_cpu(*(__be32 *)header),
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001125 connection->agreed_pro_version);
Andreas Gruenbacher8172f3e2011-03-16 17:22:39 +01001126 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001127 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001128 pi->data = header + header_size;
Andreas Gruenbacher8172f3e2011-03-16 17:22:39 +01001129 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001130}
1131
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001132static int drbd_recv_header(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisner257d0af2011-01-26 12:15:29 +01001133{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001134 void *buffer = connection->data.rbuf;
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001135 int err;
Philipp Reisner257d0af2011-01-26 12:15:29 +01001136
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001137 err = drbd_recv_all_warn(connection, buffer, drbd_header_size(connection));
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001138 if (err)
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001139 return err;
Philipp Reisner257d0af2011-01-26 12:15:29 +01001140
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001141 err = decode_header(connection, buffer, pi);
1142 connection->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001143
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001144 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001145}
1146
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001147static void drbd_flush(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001148{
1149 int rv;
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001150 struct drbd_peer_device *peer_device;
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001151 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001152
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001153 if (connection->write_ordering >= WO_bdev_flush) {
Lars Ellenberg615e0872011-11-17 14:32:12 +01001154 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001155 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1156 struct drbd_device *device = peer_device->device;
1157
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001158 if (!get_ldev(device))
Lars Ellenberg615e0872011-11-17 14:32:12 +01001159 continue;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001160 kref_get(&device->kref);
Lars Ellenberg615e0872011-11-17 14:32:12 +01001161 rcu_read_unlock();
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001162
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001163 rv = blkdev_issue_flush(device->ldev->backing_bdev,
Lars Ellenberg615e0872011-11-17 14:32:12 +01001164 GFP_NOIO, NULL);
1165 if (rv) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001166 drbd_info(device, "local disk flush failed with status %d\n", rv);
Lars Ellenberg615e0872011-11-17 14:32:12 +01001167 /* would rather check on EOPNOTSUPP, but that is not reliable.
1168 * don't try again for ANY return value != 0
1169 * if (rv == -EOPNOTSUPP) */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001170 drbd_bump_write_ordering(connection, WO_drain_io);
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001171 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001172 put_ldev(device);
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001173 kref_put(&device->kref, drbd_destroy_device);
Lars Ellenberg615e0872011-11-17 14:32:12 +01001174
1175 rcu_read_lock();
1176 if (rv)
1177 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001178 }
Lars Ellenberg615e0872011-11-17 14:32:12 +01001179 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07001180 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001181}
1182
1183/**
1184 * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it.
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001185 * @device: DRBD device.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001186 * @epoch: Epoch object.
1187 * @ev: Epoch event.
1188 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001189static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *connection,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001190 struct drbd_epoch *epoch,
1191 enum epoch_event ev)
1192{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001193 int epoch_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001194 struct drbd_epoch *next_epoch;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001195 enum finish_epoch rv = FE_STILL_LIVE;
1196
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001197 spin_lock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001198 do {
1199 next_epoch = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001200
1201 epoch_size = atomic_read(&epoch->epoch_size);
1202
1203 switch (ev & ~EV_CLEANUP) {
1204 case EV_PUT:
1205 atomic_dec(&epoch->active);
1206 break;
1207 case EV_GOT_BARRIER_NR:
1208 set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001209 break;
1210 case EV_BECAME_LAST:
1211 /* nothing to do*/
1212 break;
1213 }
1214
Philipp Reisnerb411b362009-09-25 16:07:19 -07001215 if (epoch_size != 0 &&
1216 atomic_read(&epoch->active) == 0 &&
Philipp Reisner80f9fd52011-07-18 15:45:15 +02001217 (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) || ev & EV_CLEANUP)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001218 if (!(ev & EV_CLEANUP)) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001219 spin_unlock(&connection->epoch_lock);
1220 drbd_send_b_ack(epoch->connection, epoch->barrier_nr, epoch_size);
1221 spin_lock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001222 }
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001223#if 0
1224 /* FIXME: dec unacked on connection, once we have
1225 * something to count pending connection packets in. */
Philipp Reisner80f9fd52011-07-18 15:45:15 +02001226 if (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags))
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001227 dec_unacked(epoch->connection);
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001228#endif
Philipp Reisnerb411b362009-09-25 16:07:19 -07001229
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001230 if (connection->current_epoch != epoch) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001231 next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
1232 list_del(&epoch->list);
1233 ev = EV_BECAME_LAST | (ev & EV_CLEANUP);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001234 connection->epochs--;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001235 kfree(epoch);
1236
1237 if (rv == FE_STILL_LIVE)
1238 rv = FE_DESTROYED;
1239 } else {
1240 epoch->flags = 0;
1241 atomic_set(&epoch->epoch_size, 0);
Uwe Kleine-König698f9312010-07-02 20:41:51 +02001242 /* atomic_set(&epoch->active, 0); is already zero */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001243 if (rv == FE_STILL_LIVE)
1244 rv = FE_RECYCLED;
1245 }
1246 }
1247
1248 if (!next_epoch)
1249 break;
1250
1251 epoch = next_epoch;
1252 } while (1);
1253
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001254 spin_unlock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001255
Philipp Reisnerb411b362009-09-25 16:07:19 -07001256 return rv;
1257}
1258
1259/**
1260 * drbd_bump_write_ordering() - Fall back to an other write ordering method
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001261 * @connection: DRBD connection.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001262 * @wo: Write ordering method to try.
1263 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001264void drbd_bump_write_ordering(struct drbd_connection *connection, enum write_ordering_e wo)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001265{
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001266 struct disk_conf *dc;
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001267 struct drbd_peer_device *peer_device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001268 enum write_ordering_e pwo;
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001269 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001270 static char *write_ordering_str[] = {
1271 [WO_none] = "none",
1272 [WO_drain_io] = "drain",
1273 [WO_bdev_flush] = "flush",
Philipp Reisnerb411b362009-09-25 16:07:19 -07001274 };
1275
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001276 pwo = connection->write_ordering;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001277 wo = min(pwo, wo);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001278 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001279 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1280 struct drbd_device *device = peer_device->device;
1281
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001282 if (!get_ldev_if_state(device, D_ATTACHING))
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001283 continue;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001284 dc = rcu_dereference(device->ldev->disk_conf);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001285
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001286 if (wo == WO_bdev_flush && !dc->disk_flushes)
1287 wo = WO_drain_io;
1288 if (wo == WO_drain_io && !dc->disk_drain)
1289 wo = WO_none;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001290 put_ldev(device);
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001291 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001292 rcu_read_unlock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001293 connection->write_ordering = wo;
1294 if (pwo != connection->write_ordering || wo == WO_bdev_flush)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001295 drbd_info(connection, "Method to ensure write ordering: %s\n", write_ordering_str[connection->write_ordering]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001296}
1297
1298/**
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001299 * drbd_submit_peer_request()
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001300 * @device: DRBD device.
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001301 * @peer_req: peer request
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001302 * @rw: flag field, see bio->bi_rw
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001303 *
1304 * May spread the pages to multiple bios,
1305 * depending on bio_add_page restrictions.
1306 *
1307 * Returns 0 if all bios have been submitted,
1308 * -ENOMEM if we could not allocate enough bios,
1309 * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a
1310 * single page to an empty bio (which should never happen and likely indicates
1311 * that the lower level IO stack is in some way broken). This has been observed
1312 * on certain Xen deployments.
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001313 */
1314/* TODO allocate from our own bio_set. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001315int drbd_submit_peer_request(struct drbd_device *device,
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001316 struct drbd_peer_request *peer_req,
1317 const unsigned rw, const int fault_type)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001318{
1319 struct bio *bios = NULL;
1320 struct bio *bio;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001321 struct page *page = peer_req->pages;
1322 sector_t sector = peer_req->i.sector;
1323 unsigned ds = peer_req->i.size;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001324 unsigned n_bios = 0;
1325 unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT;
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001326 int err = -ENOMEM;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001327
1328 /* In most cases, we will only need one bio. But in case the lower
1329 * level restrictions happen to be different at this offset on this
1330 * side than those of the sending peer, we may need to submit the
Lars Ellenberg9476f392011-02-23 17:02:01 +01001331 * request in more than one bio.
1332 *
1333 * Plain bio_alloc is good enough here, this is no DRBD internally
1334 * generated bio, but a bio allocated on behalf of the peer.
1335 */
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001336next_bio:
1337 bio = bio_alloc(GFP_NOIO, nr_pages);
1338 if (!bio) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001339 drbd_err(device, "submit_ee: Allocation of a bio failed\n");
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001340 goto fail;
1341 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001342 /* > peer_req->i.sector, unless this is the first bio */
Kent Overstreet4f024f32013-10-11 15:44:27 -07001343 bio->bi_iter.bi_sector = sector;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001344 bio->bi_bdev = device->ldev->backing_bdev;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001345 bio->bi_rw = rw;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001346 bio->bi_private = peer_req;
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001347 bio->bi_end_io = drbd_peer_request_endio;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001348
1349 bio->bi_next = bios;
1350 bios = bio;
1351 ++n_bios;
1352
1353 page_chain_for_each(page) {
1354 unsigned len = min_t(unsigned, ds, PAGE_SIZE);
1355 if (!bio_add_page(bio, page, len, 0)) {
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001356 /* A single page must always be possible!
1357 * But in case it fails anyways,
1358 * we deal with it, and complain (below). */
1359 if (bio->bi_vcnt == 0) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001360 drbd_err(device,
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001361 "bio_add_page failed for len=%u, "
1362 "bi_vcnt=0 (bi_sector=%llu)\n",
Kent Overstreet4f024f32013-10-11 15:44:27 -07001363 len, (uint64_t)bio->bi_iter.bi_sector);
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001364 err = -ENOSPC;
1365 goto fail;
1366 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001367 goto next_bio;
1368 }
1369 ds -= len;
1370 sector += len >> 9;
1371 --nr_pages;
1372 }
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001373 D_ASSERT(device, page == NULL);
1374 D_ASSERT(device, ds == 0);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001375
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001376 atomic_set(&peer_req->pending_bios, n_bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001377 do {
1378 bio = bios;
1379 bios = bios->bi_next;
1380 bio->bi_next = NULL;
1381
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001382 drbd_generic_make_request(device, fault_type, bio);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001383 } while (bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001384 return 0;
1385
1386fail:
1387 while (bios) {
1388 bio = bios;
1389 bios = bios->bi_next;
1390 bio_put(bio);
1391 }
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001392 return err;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001393}
1394
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001395static void drbd_remove_epoch_entry_interval(struct drbd_device *device,
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001396 struct drbd_peer_request *peer_req)
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001397{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001398 struct drbd_interval *i = &peer_req->i;
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001399
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001400 drbd_remove_interval(&device->write_requests, i);
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001401 drbd_clear_interval(i);
1402
Andreas Gruenbacher6c852be2011-02-04 15:38:52 +01001403 /* Wake up any processes waiting for this peer request to complete. */
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001404 if (i->waiting)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001405 wake_up(&device->misc_wait);
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001406}
1407
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001408static void conn_wait_active_ee_empty(struct drbd_connection *connection)
Philipp Reisner77fede52011-11-10 21:19:11 +01001409{
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001410 struct drbd_peer_device *peer_device;
Philipp Reisner77fede52011-11-10 21:19:11 +01001411 int vnr;
1412
1413 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001414 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1415 struct drbd_device *device = peer_device->device;
1416
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001417 kref_get(&device->kref);
Philipp Reisner77fede52011-11-10 21:19:11 +01001418 rcu_read_unlock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001419 drbd_wait_ee_list_empty(device, &device->active_ee);
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001420 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisner77fede52011-11-10 21:19:11 +01001421 rcu_read_lock();
1422 }
1423 rcu_read_unlock();
1424}
1425
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001426static struct drbd_peer_device *
1427conn_peer_device(struct drbd_connection *connection, int volume_number)
1428{
1429 return idr_find(&connection->peer_devices, volume_number);
1430}
1431
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001432static int receive_Barrier(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001433{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001434 int rv;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001435 struct p_barrier *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001436 struct drbd_epoch *epoch;
1437
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001438 /* FIXME these are unacked on connection,
1439 * not a specific (peer)device.
1440 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001441 connection->current_epoch->barrier_nr = p->barrier;
1442 connection->current_epoch->connection = connection;
1443 rv = drbd_may_finish_epoch(connection, connection->current_epoch, EV_GOT_BARRIER_NR);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001444
1445 /* P_BARRIER_ACK may imply that the corresponding extent is dropped from
1446 * the activity log, which means it would not be resynced in case the
1447 * R_PRIMARY crashes now.
1448 * Therefore we must send the barrier_ack after the barrier request was
1449 * completed. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001450 switch (connection->write_ordering) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001451 case WO_none:
1452 if (rv == FE_RECYCLED)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001453 return 0;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001454
1455 /* receiver context, in the writeout path of the other node.
1456 * avoid potential distributed deadlock */
1457 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1458 if (epoch)
1459 break;
1460 else
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001461 drbd_warn(connection, "Allocation of an epoch failed, slowing down\n");
Philipp Reisner2451fc32010-08-24 13:43:11 +02001462 /* Fall through */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001463
1464 case WO_bdev_flush:
1465 case WO_drain_io:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001466 conn_wait_active_ee_empty(connection);
1467 drbd_flush(connection);
Philipp Reisner2451fc32010-08-24 13:43:11 +02001468
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001469 if (atomic_read(&connection->current_epoch->epoch_size)) {
Philipp Reisner2451fc32010-08-24 13:43:11 +02001470 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1471 if (epoch)
1472 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001473 }
1474
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001475 return 0;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001476 default:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001477 drbd_err(connection, "Strangeness in connection->write_ordering %d\n", connection->write_ordering);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001478 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001479 }
1480
1481 epoch->flags = 0;
1482 atomic_set(&epoch->epoch_size, 0);
1483 atomic_set(&epoch->active, 0);
1484
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001485 spin_lock(&connection->epoch_lock);
1486 if (atomic_read(&connection->current_epoch->epoch_size)) {
1487 list_add(&epoch->list, &connection->current_epoch->list);
1488 connection->current_epoch = epoch;
1489 connection->epochs++;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001490 } else {
1491 /* The current_epoch got recycled while we allocated this one... */
1492 kfree(epoch);
1493 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001494 spin_unlock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001495
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001496 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001497}
1498
1499/* used from receive_RSDataReply (recv_resync_read)
1500 * and from receive_Data */
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +01001501static struct drbd_peer_request *
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001502read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +01001503 int data_size) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001504{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001505 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001506 const sector_t capacity = drbd_get_capacity(device->this_bdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001507 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001508 struct page *page;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001509 int dgs, ds, err;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001510 void *dig_in = peer_device->connection->int_dig_in;
1511 void *dig_vv = peer_device->connection->int_dig_vv;
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001512 unsigned long *data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001513
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001514 dgs = 0;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001515 if (peer_device->connection->peer_integrity_tfm) {
1516 dgs = crypto_hash_digestsize(peer_device->connection->peer_integrity_tfm);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001517 /*
1518 * FIXME: Receive the incoming digest into the receive buffer
1519 * here, together with its struct p_data?
1520 */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001521 err = drbd_recv_all_warn(peer_device->connection, dig_in, dgs);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001522 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001523 return NULL;
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001524 data_size -= dgs;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001525 }
1526
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01001527 if (!expect(IS_ALIGNED(data_size, 512)))
1528 return NULL;
1529 if (!expect(data_size <= DRBD_MAX_BIO_SIZE))
1530 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001531
Lars Ellenberg66660322010-04-06 12:15:04 +02001532 /* even though we trust out peer,
1533 * we sometimes have to double check. */
1534 if (sector + (data_size>>9) > capacity) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001535 drbd_err(device, "request from peer beyond end of local disk: "
Lars Ellenbergfdda6542011-01-24 15:11:01 +01001536 "capacity: %llus < sector: %llus + size: %u\n",
Lars Ellenberg66660322010-04-06 12:15:04 +02001537 (unsigned long long)capacity,
1538 (unsigned long long)sector, data_size);
1539 return NULL;
1540 }
1541
Philipp Reisnerb411b362009-09-25 16:07:19 -07001542 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
1543 * "criss-cross" setup, that might cause write-out on some other DRBD,
1544 * which in turn might block on the other node at this very place. */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001545 peer_req = drbd_alloc_peer_req(peer_device, id, sector, data_size, GFP_NOIO);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001546 if (!peer_req)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001547 return NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001548
Lars Ellenberga73ff322012-06-25 19:15:38 +02001549 if (!data_size)
Lars Ellenberg81a35372012-07-30 09:00:54 +02001550 return peer_req;
Lars Ellenberga73ff322012-06-25 19:15:38 +02001551
Philipp Reisnerb411b362009-09-25 16:07:19 -07001552 ds = data_size;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001553 page = peer_req->pages;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001554 page_chain_for_each(page) {
1555 unsigned len = min_t(int, ds, PAGE_SIZE);
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001556 data = kmap(page);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001557 err = drbd_recv_all_warn(peer_device->connection, data, len);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001558 if (drbd_insert_fault(device, DRBD_FAULT_RECEIVE)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001559 drbd_err(device, "Fault injection: Corrupting data on receive\n");
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001560 data[0] = data[0] ^ (unsigned long)-1;
1561 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001562 kunmap(page);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001563 if (err) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001564 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001565 return NULL;
1566 }
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001567 ds -= len;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001568 }
1569
1570 if (dgs) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001571 drbd_csum_ee(peer_device->connection->peer_integrity_tfm, peer_req, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001572 if (memcmp(dig_in, dig_vv, dgs)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001573 drbd_err(device, "Digest integrity check FAILED: %llus +%u\n",
Lars Ellenberg470be442010-11-10 10:36:52 +01001574 (unsigned long long)sector, data_size);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001575 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001576 return NULL;
1577 }
1578 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001579 device->recv_cnt += data_size>>9;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001580 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001581}
1582
1583/* drbd_drain_block() just takes a data block
1584 * out of the socket input buffer, and discards it.
1585 */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001586static int drbd_drain_block(struct drbd_peer_device *peer_device, int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001587{
1588 struct page *page;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001589 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001590 void *data;
1591
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001592 if (!data_size)
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001593 return 0;
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001594
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001595 page = drbd_alloc_pages(peer_device, 1, 1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001596
1597 data = kmap(page);
1598 while (data_size) {
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001599 unsigned int len = min_t(int, data_size, PAGE_SIZE);
1600
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001601 err = drbd_recv_all_warn(peer_device->connection, data, len);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001602 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001603 break;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001604 data_size -= len;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001605 }
1606 kunmap(page);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001607 drbd_free_pages(peer_device->device, page, 0);
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001608 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001609}
1610
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001611static int recv_dless_read(struct drbd_peer_device *peer_device, struct drbd_request *req,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001612 sector_t sector, int data_size)
1613{
Kent Overstreet79886132013-11-23 17:19:00 -08001614 struct bio_vec bvec;
1615 struct bvec_iter iter;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001616 struct bio *bio;
Kent Overstreet79886132013-11-23 17:19:00 -08001617 int dgs, err, expect;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001618 void *dig_in = peer_device->connection->int_dig_in;
1619 void *dig_vv = peer_device->connection->int_dig_vv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001620
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001621 dgs = 0;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001622 if (peer_device->connection->peer_integrity_tfm) {
1623 dgs = crypto_hash_digestsize(peer_device->connection->peer_integrity_tfm);
1624 err = drbd_recv_all_warn(peer_device->connection, dig_in, dgs);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001625 if (err)
1626 return err;
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001627 data_size -= dgs;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001628 }
1629
Philipp Reisnerb411b362009-09-25 16:07:19 -07001630 /* optimistically update recv_cnt. if receiving fails below,
1631 * we disconnect anyways, and counters will be reset. */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001632 peer_device->device->recv_cnt += data_size>>9;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001633
1634 bio = req->master_bio;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001635 D_ASSERT(peer_device->device, sector == bio->bi_iter.bi_sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001636
Kent Overstreet79886132013-11-23 17:19:00 -08001637 bio_for_each_segment(bvec, bio, iter) {
1638 void *mapped = kmap(bvec.bv_page) + bvec.bv_offset;
1639 expect = min_t(int, data_size, bvec.bv_len);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001640 err = drbd_recv_all_warn(peer_device->connection, mapped, expect);
Kent Overstreet79886132013-11-23 17:19:00 -08001641 kunmap(bvec.bv_page);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001642 if (err)
1643 return err;
1644 data_size -= expect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001645 }
1646
1647 if (dgs) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001648 drbd_csum_bio(peer_device->connection->peer_integrity_tfm, bio, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001649 if (memcmp(dig_in, dig_vv, dgs)) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001650 drbd_err(peer_device, "Digest integrity check FAILED. Broken NICs?\n");
Andreas Gruenbacher28284ce2011-03-16 17:54:02 +01001651 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001652 }
1653 }
1654
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001655 D_ASSERT(peer_device->device, data_size == 0);
Andreas Gruenbacher28284ce2011-03-16 17:54:02 +01001656 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001657}
1658
Andreas Gruenbachera990be42011-04-06 17:56:48 +02001659/*
1660 * e_end_resync_block() is called in asender context via
1661 * drbd_finish_peer_reqs().
1662 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001663static int e_end_resync_block(struct drbd_work *w, int unused)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001664{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001665 struct drbd_peer_request *peer_req =
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001666 container_of(w, struct drbd_peer_request, w);
1667 struct drbd_peer_device *peer_device = peer_req->peer_device;
1668 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001669 sector_t sector = peer_req->i.sector;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001670 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001671
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001672 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001673
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001674 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001675 drbd_set_in_sync(device, sector, peer_req->i.size);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001676 err = drbd_send_ack(peer_device, P_RS_WRITE_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001677 } else {
1678 /* Record failure to sync */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001679 drbd_rs_failed_io(device, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001680
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001681 err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001682 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001683 dec_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001684
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001685 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001686}
1687
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001688static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t sector,
1689 int data_size) __releases(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001690{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001691 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001692 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001693
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001694 peer_req = read_in_block(peer_device, ID_SYNCER, sector, data_size);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001695 if (!peer_req)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001696 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001697
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001698 dec_rs_pending(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001699
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001700 inc_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001701 /* corresponding dec_unacked() in e_end_resync_block()
1702 * respective _drbd_clear_done_ee */
1703
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001704 peer_req->w.cb = e_end_resync_block;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001705
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001706 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001707 list_add(&peer_req->w.list, &device->sync_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001708 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001709
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001710 atomic_add(data_size >> 9, &device->rs_sect_ev);
1711 if (drbd_submit_peer_request(device, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0)
Andreas Gruenbachere1c1b0f2011-03-16 17:58:27 +01001712 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001713
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001714 /* don't care for the reason here */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001715 drbd_err(device, "submit failed, triggering re-connect\n");
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001716 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001717 list_del(&peer_req->w.list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001718 spin_unlock_irq(&device->resource->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02001719
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001720 drbd_free_peer_req(device, peer_req);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001721fail:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001722 put_ldev(device);
Andreas Gruenbachere1c1b0f2011-03-16 17:58:27 +01001723 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001724}
1725
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001726static struct drbd_request *
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001727find_request(struct drbd_device *device, struct rb_root *root, u64 id,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001728 sector_t sector, bool missing_ok, const char *func)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001729{
1730 struct drbd_request *req;
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001731
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001732 /* Request object according to our peer */
1733 req = (struct drbd_request *)(unsigned long)id;
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001734 if (drbd_contains_interval(root, sector, &req->i) && req->i.local)
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001735 return req;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001736 if (!missing_ok) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001737 drbd_err(device, "%s: failed to find request 0x%lx, sector %llus\n", func,
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001738 (unsigned long)id, (unsigned long long)sector);
1739 }
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001740 return NULL;
1741}
1742
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001743static int receive_DataReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001744{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001745 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001746 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001747 struct drbd_request *req;
1748 sector_t sector;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001749 int err;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001750 struct p_data *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001751
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001752 peer_device = conn_peer_device(connection, pi->vnr);
1753 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001754 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001755 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001756
1757 sector = be64_to_cpu(p->sector);
1758
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001759 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001760 req = find_request(device, &device->read_requests, p->block_id, sector, false, __func__);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001761 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001762 if (unlikely(!req))
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001763 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001764
Bart Van Assche24c48302011-05-21 18:32:29 +02001765 /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid
Philipp Reisnerb411b362009-09-25 16:07:19 -07001766 * special casing it there for the various failure cases.
1767 * still no race with drbd_fail_pending_reads */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001768 err = recv_dless_read(peer_device, req, sector, pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001769 if (!err)
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001770 req_mod(req, DATA_RECEIVED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001771 /* else: nothing. handled from drbd_disconnect...
1772 * I don't think we may complete this just yet
1773 * in case we are "on-disconnect: freeze" */
1774
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001775 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001776}
1777
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001778static int receive_RSDataReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001779{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001780 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001781 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001782 sector_t sector;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001783 int err;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001784 struct p_data *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001785
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001786 peer_device = conn_peer_device(connection, pi->vnr);
1787 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001788 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001789 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001790
1791 sector = be64_to_cpu(p->sector);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001792 D_ASSERT(device, p->block_id == ID_SYNCER);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001793
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001794 if (get_ldev(device)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001795 /* data is submitted to disk within recv_resync_read.
1796 * corresponding put_ldev done below on error,
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001797 * or in drbd_peer_request_endio. */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001798 err = recv_resync_read(peer_device, sector, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001799 } else {
1800 if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001801 drbd_err(device, "Can not write resync data to local disk.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001802
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001803 err = drbd_drain_block(peer_device, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001804
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001805 drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001806 }
1807
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001808 atomic_add(pi->size >> 9, &device->rs_sect_in);
Philipp Reisner778f2712010-07-06 11:14:00 +02001809
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001810 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001811}
1812
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001813static void restart_conflicting_writes(struct drbd_device *device,
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001814 sector_t sector, int size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001815{
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001816 struct drbd_interval *i;
1817 struct drbd_request *req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001818
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001819 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001820 if (!i->local)
1821 continue;
1822 req = container_of(i, struct drbd_request, i);
1823 if (req->rq_state & RQ_LOCAL_PENDING ||
1824 !(req->rq_state & RQ_POSTPONED))
1825 continue;
Lars Ellenberg2312f0b32011-11-24 10:36:25 +01001826 /* as it is RQ_POSTPONED, this will cause it to
1827 * be queued on the retry workqueue. */
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02001828 __req_mod(req, CONFLICT_RESOLVED, NULL);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001829 }
1830}
1831
Andreas Gruenbachera990be42011-04-06 17:56:48 +02001832/*
1833 * e_end_block() is called in asender context via drbd_finish_peer_reqs().
Philipp Reisnerb411b362009-09-25 16:07:19 -07001834 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001835static int e_end_block(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001836{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001837 struct drbd_peer_request *peer_req =
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001838 container_of(w, struct drbd_peer_request, w);
1839 struct drbd_peer_device *peer_device = peer_req->peer_device;
1840 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001841 sector_t sector = peer_req->i.sector;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001842 int err = 0, pcmd;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001843
Philipp Reisner303d1442011-04-13 16:24:47 -07001844 if (peer_req->flags & EE_SEND_WRITE_ACK) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001845 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001846 pcmd = (device->state.conn >= C_SYNC_SOURCE &&
1847 device->state.conn <= C_PAUSED_SYNC_T &&
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001848 peer_req->flags & EE_MAY_SET_IN_SYNC) ?
Philipp Reisnerb411b362009-09-25 16:07:19 -07001849 P_RS_WRITE_ACK : P_WRITE_ACK;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001850 err = drbd_send_ack(peer_device, pcmd, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001851 if (pcmd == P_RS_WRITE_ACK)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001852 drbd_set_in_sync(device, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001853 } else {
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001854 err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001855 /* we expect it to be marked out of sync anyways...
1856 * maybe assert this? */
1857 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001858 dec_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001859 }
1860 /* we delete from the conflict detection hash _after_ we sent out the
1861 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */
Philipp Reisner302bdea2011-04-21 11:36:49 +02001862 if (peer_req->flags & EE_IN_INTERVAL_TREE) {
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001863 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001864 D_ASSERT(device, !drbd_interval_empty(&peer_req->i));
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001865 drbd_remove_epoch_entry_interval(device, peer_req);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001866 if (peer_req->flags & EE_RESTART_REQUESTS)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001867 restart_conflicting_writes(device, sector, peer_req->i.size);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001868 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +01001869 } else
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001870 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001871
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001872 drbd_may_finish_epoch(first_peer_device(device)->connection, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001873
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001874 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001875}
1876
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001877static int e_send_ack(struct drbd_work *w, enum drbd_packet ack)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001878{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001879 struct drbd_peer_request *peer_req =
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001880 container_of(w, struct drbd_peer_request, w);
1881 struct drbd_peer_device *peer_device = peer_req->peer_device;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001882 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001883
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001884 err = drbd_send_ack(peer_device, ack, peer_req);
1885 dec_unacked(peer_device->device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001886
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001887 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001888}
1889
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02001890static int e_send_superseded(struct drbd_work *w, int unused)
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001891{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001892 return e_send_ack(w, P_SUPERSEDED);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001893}
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001894
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001895static int e_send_retry_write(struct drbd_work *w, int unused)
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001896{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001897 struct drbd_peer_request *peer_req =
1898 container_of(w, struct drbd_peer_request, w);
1899 struct drbd_connection *connection = peer_req->peer_device->connection;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001900
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001901 return e_send_ack(w, connection->agreed_pro_version >= 100 ?
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02001902 P_RETRY_WRITE : P_SUPERSEDED);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001903}
1904
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001905static bool seq_greater(u32 a, u32 b)
1906{
1907 /*
1908 * We assume 32-bit wrap-around here.
1909 * For 24-bit wrap-around, we would have to shift:
1910 * a <<= 8; b <<= 8;
1911 */
1912 return (s32)a - (s32)b > 0;
1913}
1914
1915static u32 seq_max(u32 a, u32 b)
1916{
1917 return seq_greater(a, b) ? a : b;
1918}
1919
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001920static void update_peer_seq(struct drbd_peer_device *peer_device, unsigned int peer_seq)
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001921{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001922 struct drbd_device *device = peer_device->device;
Lars Ellenberg3c13b682011-02-23 16:10:01 +01001923 unsigned int newest_peer_seq;
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001924
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001925 if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001926 spin_lock(&device->peer_seq_lock);
1927 newest_peer_seq = seq_max(device->peer_seq, peer_seq);
1928 device->peer_seq = newest_peer_seq;
1929 spin_unlock(&device->peer_seq_lock);
1930 /* wake up only if we actually changed device->peer_seq */
Lars Ellenberg3c13b682011-02-23 16:10:01 +01001931 if (peer_seq == newest_peer_seq)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001932 wake_up(&device->seq_wait);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001933 }
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001934}
1935
Lars Ellenbergd93f6302012-03-26 15:49:13 +02001936static inline int overlaps(sector_t s1, int l1, sector_t s2, int l2)
1937{
1938 return !((s1 + (l1>>9) <= s2) || (s1 >= s2 + (l2>>9)));
1939}
1940
1941/* maybe change sync_ee into interval trees as well? */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001942static bool overlapping_resync_write(struct drbd_device *device, struct drbd_peer_request *peer_req)
Lars Ellenbergd93f6302012-03-26 15:49:13 +02001943{
1944 struct drbd_peer_request *rs_req;
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001945 bool rv = 0;
1946
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001947 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001948 list_for_each_entry(rs_req, &device->sync_ee, w.list) {
Lars Ellenbergd93f6302012-03-26 15:49:13 +02001949 if (overlaps(peer_req->i.sector, peer_req->i.size,
1950 rs_req->i.sector, rs_req->i.size)) {
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001951 rv = 1;
1952 break;
1953 }
1954 }
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001955 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001956
1957 return rv;
1958}
1959
Philipp Reisnerb411b362009-09-25 16:07:19 -07001960/* Called from receive_Data.
1961 * Synchronize packets on sock with packets on msock.
1962 *
1963 * This is here so even when a P_DATA packet traveling via sock overtook an Ack
1964 * packet traveling on msock, they are still processed in the order they have
1965 * been sent.
1966 *
1967 * Note: we don't care for Ack packets overtaking P_DATA packets.
1968 *
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001969 * In case packet_seq is larger than device->peer_seq number, there are
Philipp Reisnerb411b362009-09-25 16:07:19 -07001970 * outstanding packets on the msock. We wait for them to arrive.
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001971 * In case we are the logically next packet, we update device->peer_seq
Philipp Reisnerb411b362009-09-25 16:07:19 -07001972 * ourselves. Correctly handles 32bit wrap around.
1973 *
1974 * Assume we have a 10 GBit connection, that is about 1<<30 byte per second,
1975 * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds
1976 * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have
1977 * 1<<9 == 512 seconds aka ages for the 32bit wrap around...
1978 *
1979 * returns 0 if we may process the packet,
1980 * -ERESTARTSYS if we were interrupted (by disconnect signal). */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001981static int wait_for_and_update_peer_seq(struct drbd_peer_device *peer_device, const u32 peer_seq)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001982{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001983 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001984 DEFINE_WAIT(wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001985 long timeout;
Philipp Reisnerb874d232013-10-23 10:59:16 +02001986 int ret = 0, tp;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001987
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001988 if (!test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags))
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001989 return 0;
1990
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001991 spin_lock(&device->peer_seq_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001992 for (;;) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001993 if (!seq_greater(peer_seq - 1, device->peer_seq)) {
1994 device->peer_seq = seq_max(device->peer_seq, peer_seq);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001995 break;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001996 }
Philipp Reisnerb874d232013-10-23 10:59:16 +02001997
Philipp Reisnerb411b362009-09-25 16:07:19 -07001998 if (signal_pending(current)) {
1999 ret = -ERESTARTSYS;
2000 break;
2001 }
Philipp Reisnerb874d232013-10-23 10:59:16 +02002002
2003 rcu_read_lock();
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002004 tp = rcu_dereference(first_peer_device(device)->connection->net_conf)->two_primaries;
Philipp Reisnerb874d232013-10-23 10:59:16 +02002005 rcu_read_unlock();
2006
2007 if (!tp)
2008 break;
2009
2010 /* Only need to wait if two_primaries is enabled */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002011 prepare_to_wait(&device->seq_wait, &wait, TASK_INTERRUPTIBLE);
2012 spin_unlock(&device->peer_seq_lock);
Philipp Reisner44ed1672011-04-19 17:10:19 +02002013 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002014 timeout = rcu_dereference(peer_device->connection->net_conf)->ping_timeo*HZ/10;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002015 rcu_read_unlock();
Andreas Gruenbacher71b1c1e2011-03-01 15:40:43 +01002016 timeout = schedule_timeout(timeout);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002017 spin_lock(&device->peer_seq_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002018 if (!timeout) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002019 ret = -ETIMEDOUT;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002020 drbd_err(device, "Timed out waiting for missing ack packets; disconnecting\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002021 break;
2022 }
2023 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002024 spin_unlock(&device->peer_seq_lock);
2025 finish_wait(&device->seq_wait, &wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002026 return ret;
2027}
2028
Lars Ellenberg688593c2010-11-17 22:25:03 +01002029/* see also bio_flags_to_wire()
2030 * DRBD_REQ_*, because we need to semantically map the flags to data packet
2031 * flags and back. We may replicate to other kernel versions. */
Andreas Gruenbacher81f0ffd2011-08-30 16:22:33 +02002032static unsigned long wire_flags_to_bio(u32 dpf)
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02002033{
Lars Ellenberg688593c2010-11-17 22:25:03 +01002034 return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
2035 (dpf & DP_FUA ? REQ_FUA : 0) |
2036 (dpf & DP_FLUSH ? REQ_FLUSH : 0) |
2037 (dpf & DP_DISCARD ? REQ_DISCARD : 0);
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02002038}
2039
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002040static void fail_postponed_requests(struct drbd_device *device, sector_t sector,
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002041 unsigned int size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002042{
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002043 struct drbd_interval *i;
2044
2045 repeat:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002046 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002047 struct drbd_request *req;
2048 struct bio_and_error m;
2049
2050 if (!i->local)
2051 continue;
2052 req = container_of(i, struct drbd_request, i);
2053 if (!(req->rq_state & RQ_POSTPONED))
2054 continue;
2055 req->rq_state &= ~RQ_POSTPONED;
2056 __req_mod(req, NEG_ACKED, &m);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002057 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002058 if (m.bio)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002059 complete_master_bio(device, &m);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002060 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002061 goto repeat;
2062 }
2063}
2064
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002065static int handle_write_conflicts(struct drbd_device *device,
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002066 struct drbd_peer_request *peer_req)
2067{
Andreas Gruenbachere33b32d2011-08-30 15:38:04 +02002068 struct drbd_connection *connection = peer_req->peer_device->connection;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002069 bool resolve_conflicts = test_bit(RESOLVE_CONFLICTS, &connection->flags);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002070 sector_t sector = peer_req->i.sector;
2071 const unsigned int size = peer_req->i.size;
2072 struct drbd_interval *i;
2073 bool equal;
2074 int err;
2075
2076 /*
2077 * Inserting the peer request into the write_requests tree will prevent
2078 * new conflicting local requests from being added.
2079 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002080 drbd_insert_interval(&device->write_requests, &peer_req->i);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002081
2082 repeat:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002083 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002084 if (i == &peer_req->i)
2085 continue;
2086
2087 if (!i->local) {
2088 /*
2089 * Our peer has sent a conflicting remote request; this
2090 * should not happen in a two-node setup. Wait for the
2091 * earlier peer request to complete.
2092 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002093 err = drbd_wait_misc(device, i);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002094 if (err)
2095 goto out;
2096 goto repeat;
2097 }
2098
2099 equal = i->sector == sector && i->size == size;
2100 if (resolve_conflicts) {
2101 /*
2102 * If the peer request is fully contained within the
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002103 * overlapping request, it can be considered overwritten
2104 * and thus superseded; otherwise, it will be retried
2105 * once all overlapping requests have completed.
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002106 */
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002107 bool superseded = i->sector <= sector && i->sector +
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002108 (i->size >> 9) >= sector + (size >> 9);
2109
2110 if (!equal)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002111 drbd_alert(device, "Concurrent writes detected: "
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002112 "local=%llus +%u, remote=%llus +%u, "
2113 "assuming %s came first\n",
2114 (unsigned long long)i->sector, i->size,
2115 (unsigned long long)sector, size,
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002116 superseded ? "local" : "remote");
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002117
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002118 inc_unacked(device);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002119 peer_req->w.cb = superseded ? e_send_superseded :
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002120 e_send_retry_write;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002121 list_add_tail(&peer_req->w.list, &device->done_ee);
Andreas Gruenbachere33b32d2011-08-30 15:38:04 +02002122 wake_asender(connection);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002123
2124 err = -ENOENT;
2125 goto out;
2126 } else {
2127 struct drbd_request *req =
2128 container_of(i, struct drbd_request, i);
2129
2130 if (!equal)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002131 drbd_alert(device, "Concurrent writes detected: "
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002132 "local=%llus +%u, remote=%llus +%u\n",
2133 (unsigned long long)i->sector, i->size,
2134 (unsigned long long)sector, size);
2135
2136 if (req->rq_state & RQ_LOCAL_PENDING ||
2137 !(req->rq_state & RQ_POSTPONED)) {
2138 /*
2139 * Wait for the node with the discard flag to
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002140 * decide if this request has been superseded
2141 * or needs to be retried.
2142 * Requests that have been superseded will
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002143 * disappear from the write_requests tree.
2144 *
2145 * In addition, wait for the conflicting
2146 * request to finish locally before submitting
2147 * the conflicting peer request.
2148 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002149 err = drbd_wait_misc(device, &req->i);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002150 if (err) {
Andreas Gruenbachere33b32d2011-08-30 15:38:04 +02002151 _conn_request_state(connection, NS(conn, C_TIMEOUT), CS_HARD);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002152 fail_postponed_requests(device, sector, size);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002153 goto out;
2154 }
2155 goto repeat;
2156 }
2157 /*
2158 * Remember to restart the conflicting requests after
2159 * the new peer request has completed.
2160 */
2161 peer_req->flags |= EE_RESTART_REQUESTS;
2162 }
2163 }
2164 err = 0;
2165
2166 out:
2167 if (err)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002168 drbd_remove_epoch_entry_interval(device, peer_req);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002169 return err;
2170}
2171
Philipp Reisnerb411b362009-09-25 16:07:19 -07002172/* mirrored write */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002173static int receive_Data(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002174{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002175 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002176 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002177 sector_t sector;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002178 struct drbd_peer_request *peer_req;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02002179 struct p_data *p = pi->data;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002180 u32 peer_seq = be32_to_cpu(p->seq_num);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002181 int rw = WRITE;
2182 u32 dp_flags;
Philipp Reisner302bdea2011-04-21 11:36:49 +02002183 int err, tp;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002184
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002185 peer_device = conn_peer_device(connection, pi->vnr);
2186 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002187 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002188 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002189
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002190 if (!get_ldev(device)) {
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002191 int err2;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002192
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002193 err = wait_for_and_update_peer_seq(peer_device, peer_seq);
2194 drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002195 atomic_inc(&connection->current_epoch->epoch_size);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002196 err2 = drbd_drain_block(peer_device, pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002197 if (!err)
2198 err = err2;
2199 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002200 }
2201
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01002202 /*
2203 * Corresponding put_ldev done either below (on various errors), or in
2204 * drbd_peer_request_endio, if we successfully submit the data at the
2205 * end of this function.
2206 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002207
2208 sector = be64_to_cpu(p->sector);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002209 peer_req = read_in_block(peer_device, p->block_id, sector, pi->size);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002210 if (!peer_req) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002211 put_ldev(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002212 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002213 }
2214
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002215 peer_req->w.cb = e_end_block;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002216
Lars Ellenberg688593c2010-11-17 22:25:03 +01002217 dp_flags = be32_to_cpu(p->dp_flags);
Andreas Gruenbacher81f0ffd2011-08-30 16:22:33 +02002218 rw |= wire_flags_to_bio(dp_flags);
Lars Ellenberg81a35372012-07-30 09:00:54 +02002219 if (peer_req->pages == NULL) {
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02002220 D_ASSERT(device, peer_req->i.size == 0);
2221 D_ASSERT(device, dp_flags & DP_FLUSH);
Lars Ellenberga73ff322012-06-25 19:15:38 +02002222 }
Lars Ellenberg688593c2010-11-17 22:25:03 +01002223
2224 if (dp_flags & DP_MAY_SET_IN_SYNC)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002225 peer_req->flags |= EE_MAY_SET_IN_SYNC;
Lars Ellenberg688593c2010-11-17 22:25:03 +01002226
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002227 spin_lock(&connection->epoch_lock);
2228 peer_req->epoch = connection->current_epoch;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002229 atomic_inc(&peer_req->epoch->epoch_size);
2230 atomic_inc(&peer_req->epoch->active);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002231 spin_unlock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002232
Philipp Reisner302bdea2011-04-21 11:36:49 +02002233 rcu_read_lock();
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002234 tp = rcu_dereference(peer_device->connection->net_conf)->two_primaries;
Philipp Reisner302bdea2011-04-21 11:36:49 +02002235 rcu_read_unlock();
2236 if (tp) {
2237 peer_req->flags |= EE_IN_INTERVAL_TREE;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002238 err = wait_for_and_update_peer_seq(peer_device, peer_seq);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002239 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002240 goto out_interrupted;
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002241 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002242 err = handle_write_conflicts(device, peer_req);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002243 if (err) {
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002244 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002245 if (err == -ENOENT) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002246 put_ldev(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002247 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002248 }
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002249 goto out_interrupted;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002250 }
Philipp Reisnerb874d232013-10-23 10:59:16 +02002251 } else {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002252 update_peer_seq(peer_device, peer_seq);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002253 spin_lock_irq(&device->resource->req_lock);
Philipp Reisnerb874d232013-10-23 10:59:16 +02002254 }
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002255 list_add(&peer_req->w.list, &device->active_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002256 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002257
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002258 if (device->state.conn == C_SYNC_TARGET)
2259 wait_event(device->ee_wait, !overlapping_resync_write(device, peer_req));
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01002260
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002261 if (peer_device->connection->agreed_pro_version < 100) {
Philipp Reisner44ed1672011-04-19 17:10:19 +02002262 rcu_read_lock();
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002263 switch (rcu_dereference(peer_device->connection->net_conf)->wire_protocol) {
Philipp Reisner303d1442011-04-13 16:24:47 -07002264 case DRBD_PROT_C:
2265 dp_flags |= DP_SEND_WRITE_ACK;
2266 break;
2267 case DRBD_PROT_B:
2268 dp_flags |= DP_SEND_RECEIVE_ACK;
2269 break;
2270 }
Philipp Reisner44ed1672011-04-19 17:10:19 +02002271 rcu_read_unlock();
Philipp Reisner303d1442011-04-13 16:24:47 -07002272 }
2273
2274 if (dp_flags & DP_SEND_WRITE_ACK) {
2275 peer_req->flags |= EE_SEND_WRITE_ACK;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002276 inc_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002277 /* corresponding dec_unacked() in e_end_block()
2278 * respective _drbd_clear_done_ee */
Philipp Reisner303d1442011-04-13 16:24:47 -07002279 }
2280
2281 if (dp_flags & DP_SEND_RECEIVE_ACK) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002282 /* I really don't like it that the receiver thread
2283 * sends on the msock, but anyways */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002284 drbd_send_ack(first_peer_device(device), P_RECV_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002285 }
2286
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002287 if (device->state.pdsk < D_INCONSISTENT) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002288 /* In case we have the only disk of the cluster, */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002289 drbd_set_out_of_sync(device, peer_req->i.sector, peer_req->i.size);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002290 peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
2291 peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002292 drbd_al_begin_io(device, &peer_req->i, true);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002293 }
2294
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002295 err = drbd_submit_peer_request(device, peer_req, rw, DRBD_FAULT_DT_WR);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002296 if (!err)
2297 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002298
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002299 /* don't care for the reason here */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002300 drbd_err(device, "submit failed, triggering re-connect\n");
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002301 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002302 list_del(&peer_req->w.list);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002303 drbd_remove_epoch_entry_interval(device, peer_req);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002304 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002305 if (peer_req->flags & EE_CALL_AL_COMPLETE_IO)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002306 drbd_al_complete_io(device, &peer_req->i);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002307
Philipp Reisnerb411b362009-09-25 16:07:19 -07002308out_interrupted:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002309 drbd_may_finish_epoch(connection, peer_req->epoch, EV_PUT + EV_CLEANUP);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002310 put_ldev(device);
2311 drbd_free_peer_req(device, peer_req);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002312 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002313}
2314
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002315/* We may throttle resync, if the lower device seems to be busy,
2316 * and current sync rate is above c_min_rate.
2317 *
2318 * To decide whether or not the lower device is busy, we use a scheme similar
2319 * to MD RAID is_mddev_idle(): if the partition stats reveal "significant"
2320 * (more than 64 sectors) of activity we cannot account for with our own resync
2321 * activity, it obviously is "busy".
2322 *
2323 * The current sync rate used here uses only the most recent two step marks,
2324 * to have a short time average so we can react faster.
2325 */
Lars Ellenberge8299872014-04-28 18:43:19 +02002326bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector)
2327{
2328 struct lc_element *tmp;
2329 bool throttle = true;
2330
2331 if (!drbd_rs_c_min_rate_throttle(device))
2332 return false;
2333
2334 spin_lock_irq(&device->al_lock);
2335 tmp = lc_find(device->resync, BM_SECT_TO_EXT(sector));
2336 if (tmp) {
2337 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
2338 if (test_bit(BME_PRIORITY, &bm_ext->flags))
2339 throttle = false;
2340 /* Do not slow down if app IO is already waiting for this extent */
2341 }
2342 spin_unlock_irq(&device->al_lock);
2343
2344 return throttle;
2345}
2346
2347bool drbd_rs_c_min_rate_throttle(struct drbd_device *device)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002348{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002349 struct gendisk *disk = device->ldev->backing_bdev->bd_contains->bd_disk;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002350 unsigned long db, dt, dbdt;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002351 unsigned int c_min_rate;
Lars Ellenberge8299872014-04-28 18:43:19 +02002352 int curr_events;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002353
2354 rcu_read_lock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002355 c_min_rate = rcu_dereference(device->ldev->disk_conf)->c_min_rate;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002356 rcu_read_unlock();
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002357
2358 /* feature disabled? */
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002359 if (c_min_rate == 0)
Lars Ellenberge8299872014-04-28 18:43:19 +02002360 return false;
Philipp Reisnere3555d82010-11-07 15:56:29 +01002361
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002362 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
2363 (int)part_stat_read(&disk->part0, sectors[1]) -
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002364 atomic_read(&device->rs_sect_ev);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002365 if (!device->rs_last_events || curr_events - device->rs_last_events > 64) {
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002366 unsigned long rs_left;
2367 int i;
2368
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002369 device->rs_last_events = curr_events;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002370
2371 /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
2372 * approx. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002373 i = (device->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
Lars Ellenberg2649f082010-11-05 10:05:47 +01002374
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002375 if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T)
2376 rs_left = device->ov_left;
Lars Ellenberg2649f082010-11-05 10:05:47 +01002377 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002378 rs_left = drbd_bm_total_weight(device) - device->rs_failed;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002379
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002380 dt = ((long)jiffies - (long)device->rs_mark_time[i]) / HZ;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002381 if (!dt)
2382 dt++;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002383 db = device->rs_mark_left[i] - rs_left;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002384 dbdt = Bit2KB(db/dt);
2385
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002386 if (dbdt > c_min_rate)
Lars Ellenberge8299872014-04-28 18:43:19 +02002387 return true;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002388 }
Lars Ellenberge8299872014-04-28 18:43:19 +02002389 return false;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002390}
2391
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002392static int receive_DataRequest(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002393{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002394 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002395 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002396 sector_t sector;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002397 sector_t capacity;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002398 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002399 struct digest_info *di = NULL;
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002400 int size, verb;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002401 unsigned int fault_type;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02002402 struct p_block_req *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002403
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002404 peer_device = conn_peer_device(connection, pi->vnr);
2405 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002406 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002407 device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002408 capacity = drbd_get_capacity(device->this_bdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002409
2410 sector = be64_to_cpu(p->sector);
2411 size = be32_to_cpu(p->blksize);
2412
Andreas Gruenbacherc670a392011-02-21 12:41:39 +01002413 if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002414 drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
Philipp Reisnerb411b362009-09-25 16:07:19 -07002415 (unsigned long long)sector, size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002416 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002417 }
2418 if (sector + (size>>9) > capacity) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002419 drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
Philipp Reisnerb411b362009-09-25 16:07:19 -07002420 (unsigned long long)sector, size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002421 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002422 }
2423
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002424 if (!get_ldev_if_state(device, D_UP_TO_DATE)) {
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002425 verb = 1;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002426 switch (pi->cmd) {
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002427 case P_DATA_REQUEST:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002428 drbd_send_ack_rp(peer_device, P_NEG_DREPLY, p);
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002429 break;
2430 case P_RS_DATA_REQUEST:
2431 case P_CSUM_RS_REQUEST:
2432 case P_OV_REQUEST:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002433 drbd_send_ack_rp(peer_device, P_NEG_RS_DREPLY , p);
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002434 break;
2435 case P_OV_REPLY:
2436 verb = 0;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002437 dec_rs_pending(device);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002438 drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size, ID_IN_SYNC);
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002439 break;
2440 default:
Andreas Gruenbacher49ba9b12011-03-25 00:35:45 +01002441 BUG();
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002442 }
2443 if (verb && __ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002444 drbd_err(device, "Can not satisfy peer's read request, "
Philipp Reisnerb411b362009-09-25 16:07:19 -07002445 "no local data.\n");
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002446
Lars Ellenberga821cc42010-09-06 12:31:37 +02002447 /* drain possibly payload */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002448 return drbd_drain_block(peer_device, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002449 }
2450
2451 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
2452 * "criss-cross" setup, that might cause write-out on some other DRBD,
2453 * which in turn might block on the other node at this very place. */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002454 peer_req = drbd_alloc_peer_req(peer_device, p->block_id, sector, size, GFP_NOIO);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002455 if (!peer_req) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002456 put_ldev(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002457 return -ENOMEM;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002458 }
2459
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002460 switch (pi->cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002461 case P_DATA_REQUEST:
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002462 peer_req->w.cb = w_e_end_data_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002463 fault_type = DRBD_FAULT_DT_RD;
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002464 /* application IO, don't drbd_rs_begin_io */
2465 goto submit;
2466
Philipp Reisnerb411b362009-09-25 16:07:19 -07002467 case P_RS_DATA_REQUEST:
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002468 peer_req->w.cb = w_e_end_rsdata_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002469 fault_type = DRBD_FAULT_RS_RD;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002470 /* used in the sector offset progress display */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002471 device->bm_resync_fo = BM_SECT_TO_BIT(sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002472 break;
2473
2474 case P_OV_REPLY:
2475 case P_CSUM_RS_REQUEST:
2476 fault_type = DRBD_FAULT_RS_RD;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002477 di = kmalloc(sizeof(*di) + pi->size, GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002478 if (!di)
2479 goto out_free_e;
2480
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002481 di->digest_size = pi->size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002482 di->digest = (((char *)di)+sizeof(struct digest_info));
2483
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002484 peer_req->digest = di;
2485 peer_req->flags |= EE_HAS_DIGEST;
Lars Ellenbergc36c3ce2010-08-11 20:42:55 +02002486
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002487 if (drbd_recv_all(peer_device->connection, di->digest, pi->size))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002488 goto out_free_e;
2489
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002490 if (pi->cmd == P_CSUM_RS_REQUEST) {
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002491 D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002492 peer_req->w.cb = w_e_end_csum_rs_req;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002493 /* used in the sector offset progress display */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002494 device->bm_resync_fo = BM_SECT_TO_BIT(sector);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002495 } else if (pi->cmd == P_OV_REPLY) {
Lars Ellenberg2649f082010-11-05 10:05:47 +01002496 /* track progress, we may need to throttle */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002497 atomic_add(size >> 9, &device->rs_sect_in);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002498 peer_req->w.cb = w_e_end_ov_reply;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002499 dec_rs_pending(device);
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002500 /* drbd_rs_begin_io done when we sent this request,
2501 * but accounting still needs to be done. */
2502 goto submit_for_resync;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002503 }
2504 break;
2505
2506 case P_OV_REQUEST:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002507 if (device->ov_start_sector == ~(sector_t)0 &&
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002508 peer_device->connection->agreed_pro_version >= 90) {
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002509 unsigned long now = jiffies;
2510 int i;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002511 device->ov_start_sector = sector;
2512 device->ov_position = sector;
2513 device->ov_left = drbd_bm_bits(device) - BM_SECT_TO_BIT(sector);
2514 device->rs_total = device->ov_left;
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002515 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002516 device->rs_mark_left[i] = device->ov_left;
2517 device->rs_mark_time[i] = now;
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002518 }
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002519 drbd_info(device, "Online Verify start sector: %llu\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07002520 (unsigned long long)sector);
2521 }
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002522 peer_req->w.cb = w_e_end_ov_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002523 fault_type = DRBD_FAULT_RS_RD;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002524 break;
2525
Philipp Reisnerb411b362009-09-25 16:07:19 -07002526 default:
Andreas Gruenbacher49ba9b12011-03-25 00:35:45 +01002527 BUG();
Philipp Reisnerb411b362009-09-25 16:07:19 -07002528 }
2529
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002530 /* Throttle, drbd_rs_begin_io and submit should become asynchronous
2531 * wrt the receiver, but it is not as straightforward as it may seem.
2532 * Various places in the resync start and stop logic assume resync
2533 * requests are processed in order, requeuing this on the worker thread
2534 * introduces a bunch of new code for synchronization between threads.
2535 *
2536 * Unlimited throttling before drbd_rs_begin_io may stall the resync
2537 * "forever", throttling after drbd_rs_begin_io will lock that extent
2538 * for application writes for the same time. For now, just throttle
2539 * here, where the rest of the code expects the receiver to sleep for
2540 * a while, anyways.
2541 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002542
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002543 /* Throttle before drbd_rs_begin_io, as that locks out application IO;
2544 * this defers syncer requests for some time, before letting at least
2545 * on request through. The resync controller on the receiving side
2546 * will adapt to the incoming rate accordingly.
2547 *
2548 * We cannot throttle here if remote is Primary/SyncTarget:
2549 * we would also throttle its application reads.
2550 * In that case, throttling is done on the SyncTarget only.
2551 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002552 if (device->state.peer != R_PRIMARY && drbd_rs_should_slow_down(device, sector))
Philipp Reisnere3555d82010-11-07 15:56:29 +01002553 schedule_timeout_uninterruptible(HZ/10);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002554 if (drbd_rs_begin_io(device, sector))
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002555 goto out_free_e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002556
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002557submit_for_resync:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002558 atomic_add(size >> 9, &device->rs_sect_ev);
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002559
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002560submit:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002561 inc_unacked(device);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002562 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002563 list_add_tail(&peer_req->w.list, &device->read_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002564 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002565
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002566 if (drbd_submit_peer_request(device, peer_req, READ, fault_type) == 0)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002567 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002568
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002569 /* don't care for the reason here */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002570 drbd_err(device, "submit failed, triggering re-connect\n");
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002571 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002572 list_del(&peer_req->w.list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002573 spin_unlock_irq(&device->resource->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002574 /* no drbd_rs_complete_io(), we are dropping the connection anyways */
2575
Philipp Reisnerb411b362009-09-25 16:07:19 -07002576out_free_e:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002577 put_ldev(device);
2578 drbd_free_peer_req(device, peer_req);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002579 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002580}
2581
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002582/**
2583 * drbd_asb_recover_0p - Recover after split-brain with no remaining primaries
2584 */
2585static int drbd_asb_recover_0p(struct drbd_peer_device *peer_device) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002586{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002587 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002588 int self, peer, rv = -100;
2589 unsigned long ch_self, ch_peer;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002590 enum drbd_after_sb_p after_sb_0p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002591
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002592 self = device->ldev->md.uuid[UI_BITMAP] & 1;
2593 peer = device->p_uuid[UI_BITMAP] & 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002594
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002595 ch_peer = device->p_uuid[UI_SIZE];
2596 ch_self = device->comm_bm_set;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002597
Philipp Reisner44ed1672011-04-19 17:10:19 +02002598 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002599 after_sb_0p = rcu_dereference(peer_device->connection->net_conf)->after_sb_0p;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002600 rcu_read_unlock();
2601 switch (after_sb_0p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002602 case ASB_CONSENSUS:
2603 case ASB_DISCARD_SECONDARY:
2604 case ASB_CALL_HELPER:
Philipp Reisner44ed1672011-04-19 17:10:19 +02002605 case ASB_VIOLENTLY:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002606 drbd_err(device, "Configuration error.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002607 break;
2608 case ASB_DISCONNECT:
2609 break;
2610 case ASB_DISCARD_YOUNGER_PRI:
2611 if (self == 0 && peer == 1) {
2612 rv = -1;
2613 break;
2614 }
2615 if (self == 1 && peer == 0) {
2616 rv = 1;
2617 break;
2618 }
2619 /* Else fall through to one of the other strategies... */
2620 case ASB_DISCARD_OLDER_PRI:
2621 if (self == 0 && peer == 1) {
2622 rv = 1;
2623 break;
2624 }
2625 if (self == 1 && peer == 0) {
2626 rv = -1;
2627 break;
2628 }
2629 /* Else fall through to one of the other strategies... */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002630 drbd_warn(device, "Discard younger/older primary did not find a decision\n"
Philipp Reisnerb411b362009-09-25 16:07:19 -07002631 "Using discard-least-changes instead\n");
2632 case ASB_DISCARD_ZERO_CHG:
2633 if (ch_peer == 0 && ch_self == 0) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002634 rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002635 ? -1 : 1;
2636 break;
2637 } else {
2638 if (ch_peer == 0) { rv = 1; break; }
2639 if (ch_self == 0) { rv = -1; break; }
2640 }
Philipp Reisner44ed1672011-04-19 17:10:19 +02002641 if (after_sb_0p == ASB_DISCARD_ZERO_CHG)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002642 break;
2643 case ASB_DISCARD_LEAST_CHG:
2644 if (ch_self < ch_peer)
2645 rv = -1;
2646 else if (ch_self > ch_peer)
2647 rv = 1;
2648 else /* ( ch_self == ch_peer ) */
2649 /* Well, then use something else. */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002650 rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002651 ? -1 : 1;
2652 break;
2653 case ASB_DISCARD_LOCAL:
2654 rv = -1;
2655 break;
2656 case ASB_DISCARD_REMOTE:
2657 rv = 1;
2658 }
2659
2660 return rv;
2661}
2662
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002663/**
2664 * drbd_asb_recover_1p - Recover after split-brain with one remaining primary
2665 */
2666static int drbd_asb_recover_1p(struct drbd_peer_device *peer_device) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002667{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002668 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002669 int hg, rv = -100;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002670 enum drbd_after_sb_p after_sb_1p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002671
Philipp Reisner44ed1672011-04-19 17:10:19 +02002672 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002673 after_sb_1p = rcu_dereference(peer_device->connection->net_conf)->after_sb_1p;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002674 rcu_read_unlock();
2675 switch (after_sb_1p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002676 case ASB_DISCARD_YOUNGER_PRI:
2677 case ASB_DISCARD_OLDER_PRI:
2678 case ASB_DISCARD_LEAST_CHG:
2679 case ASB_DISCARD_LOCAL:
2680 case ASB_DISCARD_REMOTE:
Philipp Reisner44ed1672011-04-19 17:10:19 +02002681 case ASB_DISCARD_ZERO_CHG:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002682 drbd_err(device, "Configuration error.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002683 break;
2684 case ASB_DISCONNECT:
2685 break;
2686 case ASB_CONSENSUS:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002687 hg = drbd_asb_recover_0p(peer_device);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002688 if (hg == -1 && device->state.role == R_SECONDARY)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002689 rv = hg;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002690 if (hg == 1 && device->state.role == R_PRIMARY)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002691 rv = hg;
2692 break;
2693 case ASB_VIOLENTLY:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002694 rv = drbd_asb_recover_0p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002695 break;
2696 case ASB_DISCARD_SECONDARY:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002697 return device->state.role == R_PRIMARY ? 1 : -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002698 case ASB_CALL_HELPER:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002699 hg = drbd_asb_recover_0p(peer_device);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002700 if (hg == -1 && device->state.role == R_PRIMARY) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002701 enum drbd_state_rv rv2;
2702
Philipp Reisnerb411b362009-09-25 16:07:19 -07002703 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2704 * we might be here in C_WF_REPORT_PARAMS which is transient.
2705 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002706 rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002707 if (rv2 != SS_SUCCESS) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002708 drbd_khelper(device, "pri-lost-after-sb");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002709 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002710 drbd_warn(device, "Successfully gave up primary role.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002711 rv = hg;
2712 }
2713 } else
2714 rv = hg;
2715 }
2716
2717 return rv;
2718}
2719
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002720/**
2721 * drbd_asb_recover_2p - Recover after split-brain with two remaining primaries
2722 */
2723static int drbd_asb_recover_2p(struct drbd_peer_device *peer_device) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002724{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002725 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002726 int hg, rv = -100;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002727 enum drbd_after_sb_p after_sb_2p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002728
Philipp Reisner44ed1672011-04-19 17:10:19 +02002729 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002730 after_sb_2p = rcu_dereference(peer_device->connection->net_conf)->after_sb_2p;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002731 rcu_read_unlock();
2732 switch (after_sb_2p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002733 case ASB_DISCARD_YOUNGER_PRI:
2734 case ASB_DISCARD_OLDER_PRI:
2735 case ASB_DISCARD_LEAST_CHG:
2736 case ASB_DISCARD_LOCAL:
2737 case ASB_DISCARD_REMOTE:
2738 case ASB_CONSENSUS:
2739 case ASB_DISCARD_SECONDARY:
Philipp Reisner44ed1672011-04-19 17:10:19 +02002740 case ASB_DISCARD_ZERO_CHG:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002741 drbd_err(device, "Configuration error.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002742 break;
2743 case ASB_VIOLENTLY:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002744 rv = drbd_asb_recover_0p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002745 break;
2746 case ASB_DISCONNECT:
2747 break;
2748 case ASB_CALL_HELPER:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002749 hg = drbd_asb_recover_0p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002750 if (hg == -1) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002751 enum drbd_state_rv rv2;
2752
Philipp Reisnerb411b362009-09-25 16:07:19 -07002753 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2754 * we might be here in C_WF_REPORT_PARAMS which is transient.
2755 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002756 rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002757 if (rv2 != SS_SUCCESS) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002758 drbd_khelper(device, "pri-lost-after-sb");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002759 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002760 drbd_warn(device, "Successfully gave up primary role.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002761 rv = hg;
2762 }
2763 } else
2764 rv = hg;
2765 }
2766
2767 return rv;
2768}
2769
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002770static void drbd_uuid_dump(struct drbd_device *device, char *text, u64 *uuid,
Philipp Reisnerb411b362009-09-25 16:07:19 -07002771 u64 bits, u64 flags)
2772{
2773 if (!uuid) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002774 drbd_info(device, "%s uuid info vanished while I was looking!\n", text);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002775 return;
2776 }
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002777 drbd_info(device, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07002778 text,
2779 (unsigned long long)uuid[UI_CURRENT],
2780 (unsigned long long)uuid[UI_BITMAP],
2781 (unsigned long long)uuid[UI_HISTORY_START],
2782 (unsigned long long)uuid[UI_HISTORY_END],
2783 (unsigned long long)bits,
2784 (unsigned long long)flags);
2785}
2786
2787/*
2788 100 after split brain try auto recover
2789 2 C_SYNC_SOURCE set BitMap
2790 1 C_SYNC_SOURCE use BitMap
2791 0 no Sync
2792 -1 C_SYNC_TARGET use BitMap
2793 -2 C_SYNC_TARGET set BitMap
2794 -100 after split brain, disconnect
2795-1000 unrelated data
Philipp Reisner4a23f262011-01-11 17:42:17 +01002796-1091 requires proto 91
2797-1096 requires proto 96
Philipp Reisnerb411b362009-09-25 16:07:19 -07002798 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002799static int drbd_uuid_compare(struct drbd_device *device, int *rule_nr) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002800{
2801 u64 self, peer;
2802 int i, j;
2803
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002804 self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2805 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002806
2807 *rule_nr = 10;
2808 if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED)
2809 return 0;
2810
2811 *rule_nr = 20;
2812 if ((self == UUID_JUST_CREATED || self == (u64)0) &&
2813 peer != UUID_JUST_CREATED)
2814 return -2;
2815
2816 *rule_nr = 30;
2817 if (self != UUID_JUST_CREATED &&
2818 (peer == UUID_JUST_CREATED || peer == (u64)0))
2819 return 2;
2820
2821 if (self == peer) {
2822 int rct, dc; /* roles at crash time */
2823
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002824 if (device->p_uuid[UI_BITMAP] == (u64)0 && device->ldev->md.uuid[UI_BITMAP] != (u64)0) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002825
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002826 if (first_peer_device(device)->connection->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002827 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002828
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002829 if ((device->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
2830 (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002831 drbd_info(device, "was SyncSource, missed the resync finished event, corrected myself:\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002832 drbd_uuid_move_history(device);
2833 device->ldev->md.uuid[UI_HISTORY_START] = device->ldev->md.uuid[UI_BITMAP];
2834 device->ldev->md.uuid[UI_BITMAP] = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002835
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002836 drbd_uuid_dump(device, "self", device->ldev->md.uuid,
2837 device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002838 *rule_nr = 34;
2839 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002840 drbd_info(device, "was SyncSource (peer failed to write sync_uuid)\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002841 *rule_nr = 36;
2842 }
2843
2844 return 1;
2845 }
2846
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002847 if (device->ldev->md.uuid[UI_BITMAP] == (u64)0 && device->p_uuid[UI_BITMAP] != (u64)0) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002848
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002849 if (first_peer_device(device)->connection->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002850 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002851
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002852 if ((device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_BITMAP] & ~((u64)1)) &&
2853 (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002854 drbd_info(device, "was SyncTarget, peer missed the resync finished event, corrected peer:\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002855
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002856 device->p_uuid[UI_HISTORY_START + 1] = device->p_uuid[UI_HISTORY_START];
2857 device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_BITMAP];
2858 device->p_uuid[UI_BITMAP] = 0UL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002859
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002860 drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002861 *rule_nr = 35;
2862 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002863 drbd_info(device, "was SyncTarget (failed to write sync_uuid)\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002864 *rule_nr = 37;
2865 }
2866
2867 return -1;
2868 }
2869
2870 /* Common power [off|failure] */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002871 rct = (test_bit(CRASHED_PRIMARY, &device->flags) ? 1 : 0) +
2872 (device->p_uuid[UI_FLAGS] & 2);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002873 /* lowest bit is set when we were primary,
2874 * next bit (weight 2) is set when peer was primary */
2875 *rule_nr = 40;
2876
2877 switch (rct) {
2878 case 0: /* !self_pri && !peer_pri */ return 0;
2879 case 1: /* self_pri && !peer_pri */ return 1;
2880 case 2: /* !self_pri && peer_pri */ return -1;
2881 case 3: /* self_pri && peer_pri */
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002882 dc = test_bit(RESOLVE_CONFLICTS, &first_peer_device(device)->connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002883 return dc ? -1 : 1;
2884 }
2885 }
2886
2887 *rule_nr = 50;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002888 peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002889 if (self == peer)
2890 return -1;
2891
2892 *rule_nr = 51;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002893 peer = device->p_uuid[UI_HISTORY_START] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002894 if (self == peer) {
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002895 if (first_peer_device(device)->connection->agreed_pro_version < 96 ?
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002896 (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
2897 (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
2898 peer + UUID_NEW_BM_OFFSET == (device->p_uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002899 /* The last P_SYNC_UUID did not get though. Undo the last start of
2900 resync as sync source modifications of the peer's UUIDs. */
2901
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002902 if (first_peer_device(device)->connection->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002903 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002904
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002905 device->p_uuid[UI_BITMAP] = device->p_uuid[UI_HISTORY_START];
2906 device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_HISTORY_START + 1];
Philipp Reisner4a23f262011-01-11 17:42:17 +01002907
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002908 drbd_info(device, "Lost last syncUUID packet, corrected:\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002909 drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
Philipp Reisner4a23f262011-01-11 17:42:17 +01002910
Philipp Reisnerb411b362009-09-25 16:07:19 -07002911 return -1;
2912 }
2913 }
2914
2915 *rule_nr = 60;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002916 self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002917 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002918 peer = device->p_uuid[i] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002919 if (self == peer)
2920 return -2;
2921 }
2922
2923 *rule_nr = 70;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002924 self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2925 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002926 if (self == peer)
2927 return 1;
2928
2929 *rule_nr = 71;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002930 self = device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002931 if (self == peer) {
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002932 if (first_peer_device(device)->connection->agreed_pro_version < 96 ?
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002933 (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
2934 (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
2935 self + UUID_NEW_BM_OFFSET == (device->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002936 /* The last P_SYNC_UUID did not get though. Undo the last start of
2937 resync as sync source modifications of our UUIDs. */
2938
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002939 if (first_peer_device(device)->connection->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002940 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002941
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002942 __drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_HISTORY_START]);
2943 __drbd_uuid_set(device, UI_HISTORY_START, device->ldev->md.uuid[UI_HISTORY_START + 1]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002944
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002945 drbd_info(device, "Last syncUUID did not get through, corrected:\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002946 drbd_uuid_dump(device, "self", device->ldev->md.uuid,
2947 device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002948
2949 return 1;
2950 }
2951 }
2952
2953
2954 *rule_nr = 80;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002955 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002956 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002957 self = device->ldev->md.uuid[i] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002958 if (self == peer)
2959 return 2;
2960 }
2961
2962 *rule_nr = 90;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002963 self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2964 peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002965 if (self == peer && self != ((u64)0))
2966 return 100;
2967
2968 *rule_nr = 100;
2969 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002970 self = device->ldev->md.uuid[i] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002971 for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002972 peer = device->p_uuid[j] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002973 if (self == peer)
2974 return -100;
2975 }
2976 }
2977
2978 return -1000;
2979}
2980
2981/* drbd_sync_handshake() returns the new conn state on success, or
2982 CONN_MASK (-1) on failure.
2983 */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002984static enum drbd_conns drbd_sync_handshake(struct drbd_peer_device *peer_device,
2985 enum drbd_role peer_role,
Philipp Reisnerb411b362009-09-25 16:07:19 -07002986 enum drbd_disk_state peer_disk) __must_hold(local)
2987{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002988 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002989 enum drbd_conns rv = C_MASK;
2990 enum drbd_disk_state mydisk;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002991 struct net_conf *nc;
Andreas Gruenbacher6dff2902011-06-28 14:18:12 +02002992 int hg, rule_nr, rr_conflict, tentative;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002993
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002994 mydisk = device->state.disk;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002995 if (mydisk == D_NEGOTIATING)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002996 mydisk = device->new_state_tmp.disk;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002997
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002998 drbd_info(device, "drbd_sync_handshake:\n");
Philipp Reisner9f2247b2012-08-16 14:25:58 +02002999
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003000 spin_lock_irq(&device->ldev->md.uuid_lock);
3001 drbd_uuid_dump(device, "self", device->ldev->md.uuid, device->comm_bm_set, 0);
3002 drbd_uuid_dump(device, "peer", device->p_uuid,
3003 device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003004
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003005 hg = drbd_uuid_compare(device, &rule_nr);
3006 spin_unlock_irq(&device->ldev->md.uuid_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003007
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003008 drbd_info(device, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003009
3010 if (hg == -1000) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003011 drbd_alert(device, "Unrelated data, aborting!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003012 return C_MASK;
3013 }
Philipp Reisner4a23f262011-01-11 17:42:17 +01003014 if (hg < -1000) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003015 drbd_alert(device, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003016 return C_MASK;
3017 }
3018
3019 if ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) ||
3020 (peer_disk == D_INCONSISTENT && mydisk > D_INCONSISTENT)) {
3021 int f = (hg == -100) || abs(hg) == 2;
3022 hg = mydisk > D_INCONSISTENT ? 1 : -1;
3023 if (f)
3024 hg = hg*2;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003025 drbd_info(device, "Becoming sync %s due to disk states.\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07003026 hg > 0 ? "source" : "target");
3027 }
3028
Adam Gandelman3a11a482010-04-08 16:48:23 -07003029 if (abs(hg) == 100)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003030 drbd_khelper(device, "initial-split-brain");
Adam Gandelman3a11a482010-04-08 16:48:23 -07003031
Philipp Reisner44ed1672011-04-19 17:10:19 +02003032 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003033 nc = rcu_dereference(peer_device->connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +02003034
3035 if (hg == 100 || (hg == -100 && nc->always_asbp)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003036 int pcount = (device->state.role == R_PRIMARY)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003037 + (peer_role == R_PRIMARY);
3038 int forced = (hg == -100);
3039
3040 switch (pcount) {
3041 case 0:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003042 hg = drbd_asb_recover_0p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003043 break;
3044 case 1:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003045 hg = drbd_asb_recover_1p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003046 break;
3047 case 2:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003048 hg = drbd_asb_recover_2p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003049 break;
3050 }
3051 if (abs(hg) < 100) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003052 drbd_warn(device, "Split-Brain detected, %d primaries, "
Philipp Reisnerb411b362009-09-25 16:07:19 -07003053 "automatically solved. Sync from %s node\n",
3054 pcount, (hg < 0) ? "peer" : "this");
3055 if (forced) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003056 drbd_warn(device, "Doing a full sync, since"
Philipp Reisnerb411b362009-09-25 16:07:19 -07003057 " UUIDs where ambiguous.\n");
3058 hg = hg*2;
3059 }
3060 }
3061 }
3062
3063 if (hg == -100) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003064 if (test_bit(DISCARD_MY_DATA, &device->flags) && !(device->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003065 hg = -1;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003066 if (!test_bit(DISCARD_MY_DATA, &device->flags) && (device->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003067 hg = 1;
3068
3069 if (abs(hg) < 100)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003070 drbd_warn(device, "Split-Brain detected, manually solved. "
Philipp Reisnerb411b362009-09-25 16:07:19 -07003071 "Sync from %s node\n",
3072 (hg < 0) ? "peer" : "this");
3073 }
Philipp Reisner44ed1672011-04-19 17:10:19 +02003074 rr_conflict = nc->rr_conflict;
Andreas Gruenbacher6dff2902011-06-28 14:18:12 +02003075 tentative = nc->tentative;
Philipp Reisner44ed1672011-04-19 17:10:19 +02003076 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07003077
3078 if (hg == -100) {
Lars Ellenberg580b9762010-02-26 23:15:23 +01003079 /* FIXME this log message is not correct if we end up here
3080 * after an attempted attach on a diskless node.
3081 * We just refuse to attach -- well, we drop the "connection"
3082 * to that disk, in a way... */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003083 drbd_alert(device, "Split-Brain detected but unresolved, dropping connection!\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003084 drbd_khelper(device, "split-brain");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003085 return C_MASK;
3086 }
3087
3088 if (hg > 0 && mydisk <= D_INCONSISTENT) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003089 drbd_err(device, "I shall become SyncSource, but I am inconsistent!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003090 return C_MASK;
3091 }
3092
3093 if (hg < 0 && /* by intention we do not use mydisk here. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003094 device->state.role == R_PRIMARY && device->state.disk >= D_CONSISTENT) {
Philipp Reisner44ed1672011-04-19 17:10:19 +02003095 switch (rr_conflict) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003096 case ASB_CALL_HELPER:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003097 drbd_khelper(device, "pri-lost");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003098 /* fall through */
3099 case ASB_DISCONNECT:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003100 drbd_err(device, "I shall become SyncTarget, but I am primary!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003101 return C_MASK;
3102 case ASB_VIOLENTLY:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003103 drbd_warn(device, "Becoming SyncTarget, violating the stable-data"
Philipp Reisnerb411b362009-09-25 16:07:19 -07003104 "assumption\n");
3105 }
3106 }
3107
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003108 if (tentative || test_bit(CONN_DRY_RUN, &peer_device->connection->flags)) {
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003109 if (hg == 0)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003110 drbd_info(device, "dry-run connect: No resync, would become Connected immediately.\n");
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003111 else
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003112 drbd_info(device, "dry-run connect: Would become %s, doing a %s resync.",
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003113 drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET),
3114 abs(hg) >= 2 ? "full" : "bit-map based");
3115 return C_MASK;
3116 }
3117
Philipp Reisnerb411b362009-09-25 16:07:19 -07003118 if (abs(hg) >= 2) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003119 drbd_info(device, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003120 if (drbd_bitmap_io(device, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003121 BM_LOCKED_SET_ALLOWED))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003122 return C_MASK;
3123 }
3124
3125 if (hg > 0) { /* become sync source. */
3126 rv = C_WF_BITMAP_S;
3127 } else if (hg < 0) { /* become sync target */
3128 rv = C_WF_BITMAP_T;
3129 } else {
3130 rv = C_CONNECTED;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003131 if (drbd_bm_total_weight(device)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003132 drbd_info(device, "No resync, but %lu bits in bitmap!\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003133 drbd_bm_total_weight(device));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003134 }
3135 }
3136
3137 return rv;
3138}
3139
Philipp Reisnerf179d762011-05-16 17:31:47 +02003140static enum drbd_after_sb_p convert_after_sb(enum drbd_after_sb_p peer)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003141{
3142 /* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */
Philipp Reisnerf179d762011-05-16 17:31:47 +02003143 if (peer == ASB_DISCARD_REMOTE)
3144 return ASB_DISCARD_LOCAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003145
3146 /* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */
Philipp Reisnerf179d762011-05-16 17:31:47 +02003147 if (peer == ASB_DISCARD_LOCAL)
3148 return ASB_DISCARD_REMOTE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003149
3150 /* everything else is valid if they are equal on both sides. */
Philipp Reisnerf179d762011-05-16 17:31:47 +02003151 return peer;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003152}
3153
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003154static int receive_protocol(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003155{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003156 struct p_protocol *p = pi->data;
Philipp Reisner036b17e2011-05-16 17:38:11 +02003157 enum drbd_after_sb_p p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
3158 int p_proto, p_discard_my_data, p_two_primaries, cf;
3159 struct net_conf *nc, *old_net_conf, *new_net_conf = NULL;
3160 char integrity_alg[SHARED_SECRET_MAX] = "";
Andreas Gruenbacheraccdbcc2011-07-15 17:41:09 +02003161 struct crypto_hash *peer_integrity_tfm = NULL;
Philipp Reisner7aca6c72011-05-17 10:12:56 +02003162 void *int_dig_in = NULL, *int_dig_vv = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003163
Philipp Reisnerb411b362009-09-25 16:07:19 -07003164 p_proto = be32_to_cpu(p->protocol);
3165 p_after_sb_0p = be32_to_cpu(p->after_sb_0p);
3166 p_after_sb_1p = be32_to_cpu(p->after_sb_1p);
3167 p_after_sb_2p = be32_to_cpu(p->after_sb_2p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003168 p_two_primaries = be32_to_cpu(p->two_primaries);
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003169 cf = be32_to_cpu(p->conn_flags);
Andreas Gruenbacher6139f602011-05-06 20:00:02 +02003170 p_discard_my_data = cf & CF_DISCARD_MY_DATA;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003171
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003172 if (connection->agreed_pro_version >= 87) {
Andreas Gruenbacher86db0612011-04-28 15:24:18 +02003173 int err;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003174
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02003175 if (pi->size > sizeof(integrity_alg))
Andreas Gruenbacher86db0612011-04-28 15:24:18 +02003176 return -EIO;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003177 err = drbd_recv_all(connection, integrity_alg, pi->size);
Andreas Gruenbacher86db0612011-04-28 15:24:18 +02003178 if (err)
3179 return err;
Philipp Reisner036b17e2011-05-16 17:38:11 +02003180 integrity_alg[SHARED_SECRET_MAX - 1] = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003181 }
3182
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003183 if (pi->cmd != P_PROTOCOL_UPDATE) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003184 clear_bit(CONN_DRY_RUN, &connection->flags);
Philipp Reisner036b17e2011-05-16 17:38:11 +02003185
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003186 if (cf & CF_DRY_RUN)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003187 set_bit(CONN_DRY_RUN, &connection->flags);
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003188
3189 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003190 nc = rcu_dereference(connection->net_conf);
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003191
3192 if (p_proto != nc->wire_protocol) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003193 drbd_err(connection, "incompatible %s settings\n", "protocol");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003194 goto disconnect_rcu_unlock;
3195 }
3196
3197 if (convert_after_sb(p_after_sb_0p) != nc->after_sb_0p) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003198 drbd_err(connection, "incompatible %s settings\n", "after-sb-0pri");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003199 goto disconnect_rcu_unlock;
3200 }
3201
3202 if (convert_after_sb(p_after_sb_1p) != nc->after_sb_1p) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003203 drbd_err(connection, "incompatible %s settings\n", "after-sb-1pri");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003204 goto disconnect_rcu_unlock;
3205 }
3206
3207 if (convert_after_sb(p_after_sb_2p) != nc->after_sb_2p) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003208 drbd_err(connection, "incompatible %s settings\n", "after-sb-2pri");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003209 goto disconnect_rcu_unlock;
3210 }
3211
3212 if (p_discard_my_data && nc->discard_my_data) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003213 drbd_err(connection, "incompatible %s settings\n", "discard-my-data");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003214 goto disconnect_rcu_unlock;
3215 }
3216
3217 if (p_two_primaries != nc->two_primaries) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003218 drbd_err(connection, "incompatible %s settings\n", "allow-two-primaries");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003219 goto disconnect_rcu_unlock;
3220 }
3221
3222 if (strcmp(integrity_alg, nc->integrity_alg)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003223 drbd_err(connection, "incompatible %s settings\n", "data-integrity-alg");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003224 goto disconnect_rcu_unlock;
3225 }
3226
3227 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07003228 }
3229
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003230 if (integrity_alg[0]) {
3231 int hash_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003232
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003233 /*
3234 * We can only change the peer data integrity algorithm
3235 * here. Changing our own data integrity algorithm
3236 * requires that we send a P_PROTOCOL_UPDATE packet at
3237 * the same time; otherwise, the peer has no way to
3238 * tell between which packets the algorithm should
3239 * change.
3240 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003241
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003242 peer_integrity_tfm = crypto_alloc_hash(integrity_alg, 0, CRYPTO_ALG_ASYNC);
3243 if (!peer_integrity_tfm) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003244 drbd_err(connection, "peer data-integrity-alg %s not supported\n",
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003245 integrity_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003246 goto disconnect;
3247 }
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003248
3249 hash_size = crypto_hash_digestsize(peer_integrity_tfm);
3250 int_dig_in = kmalloc(hash_size, GFP_KERNEL);
3251 int_dig_vv = kmalloc(hash_size, GFP_KERNEL);
3252 if (!(int_dig_in && int_dig_vv)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003253 drbd_err(connection, "Allocation of buffers for data integrity checking failed\n");
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003254 goto disconnect;
3255 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003256 }
3257
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003258 new_net_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL);
3259 if (!new_net_conf) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003260 drbd_err(connection, "Allocation of new net_conf failed\n");
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003261 goto disconnect;
3262 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003263
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003264 mutex_lock(&connection->data.mutex);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003265 mutex_lock(&connection->resource->conf_update);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003266 old_net_conf = connection->net_conf;
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003267 *new_net_conf = *old_net_conf;
3268
3269 new_net_conf->wire_protocol = p_proto;
3270 new_net_conf->after_sb_0p = convert_after_sb(p_after_sb_0p);
3271 new_net_conf->after_sb_1p = convert_after_sb(p_after_sb_1p);
3272 new_net_conf->after_sb_2p = convert_after_sb(p_after_sb_2p);
3273 new_net_conf->two_primaries = p_two_primaries;
3274
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003275 rcu_assign_pointer(connection->net_conf, new_net_conf);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003276 mutex_unlock(&connection->resource->conf_update);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003277 mutex_unlock(&connection->data.mutex);
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003278
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003279 crypto_free_hash(connection->peer_integrity_tfm);
3280 kfree(connection->int_dig_in);
3281 kfree(connection->int_dig_vv);
3282 connection->peer_integrity_tfm = peer_integrity_tfm;
3283 connection->int_dig_in = int_dig_in;
3284 connection->int_dig_vv = int_dig_vv;
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003285
3286 if (strcmp(old_net_conf->integrity_alg, integrity_alg))
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003287 drbd_info(connection, "peer data-integrity-alg: %s\n",
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003288 integrity_alg[0] ? integrity_alg : "(none)");
3289
3290 synchronize_rcu();
3291 kfree(old_net_conf);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003292 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003293
Philipp Reisner44ed1672011-04-19 17:10:19 +02003294disconnect_rcu_unlock:
3295 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07003296disconnect:
Andreas Gruenbacherb792c352011-07-15 16:48:49 +02003297 crypto_free_hash(peer_integrity_tfm);
Philipp Reisner036b17e2011-05-16 17:38:11 +02003298 kfree(int_dig_in);
3299 kfree(int_dig_vv);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003300 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003301 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003302}
3303
3304/* helper function
3305 * input: alg name, feature name
3306 * return: NULL (alg name was "")
3307 * ERR_PTR(error) if something goes wrong
3308 * or the crypto hash ptr, if it worked out ok. */
Rashika Kheriaf63e6312013-12-19 15:11:09 +05303309static
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003310struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_device *device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003311 const char *alg, const char *name)
3312{
3313 struct crypto_hash *tfm;
3314
3315 if (!alg[0])
3316 return NULL;
3317
3318 tfm = crypto_alloc_hash(alg, 0, CRYPTO_ALG_ASYNC);
3319 if (IS_ERR(tfm)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003320 drbd_err(device, "Can not allocate \"%s\" as %s (reason: %ld)\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07003321 alg, name, PTR_ERR(tfm));
3322 return tfm;
3323 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003324 return tfm;
3325}
3326
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003327static int ignore_remaining_packet(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003328{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003329 void *buffer = connection->data.rbuf;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003330 int size = pi->size;
3331
3332 while (size) {
3333 int s = min_t(int, size, DRBD_SOCKET_BUFFER_SIZE);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003334 s = drbd_recv(connection, buffer, s);
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003335 if (s <= 0) {
3336 if (s < 0)
3337 return s;
3338 break;
3339 }
3340 size -= s;
3341 }
3342 if (size)
3343 return -EIO;
3344 return 0;
3345}
3346
3347/*
3348 * config_unknown_volume - device configuration command for unknown volume
3349 *
3350 * When a device is added to an existing connection, the node on which the
3351 * device is added first will send configuration commands to its peer but the
3352 * peer will not know about the device yet. It will warn and ignore these
3353 * commands. Once the device is added on the second node, the second node will
3354 * send the same device configuration commands, but in the other direction.
3355 *
3356 * (We can also end up here if drbd is misconfigured.)
3357 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003358static int config_unknown_volume(struct drbd_connection *connection, struct packet_info *pi)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003359{
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003360 drbd_warn(connection, "%s packet received for volume %u, which is not configured locally\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02003361 cmdname(pi->cmd), pi->vnr);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003362 return ignore_remaining_packet(connection, pi);
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003363}
3364
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003365static int receive_SyncParam(struct drbd_connection *connection, struct packet_info *pi)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003366{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003367 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003368 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003369 struct p_rs_param_95 *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003370 unsigned int header_size, data_size, exp_max_sz;
3371 struct crypto_hash *verify_tfm = NULL;
3372 struct crypto_hash *csums_tfm = NULL;
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003373 struct net_conf *old_net_conf, *new_net_conf = NULL;
Philipp Reisner813472c2011-05-03 16:47:02 +02003374 struct disk_conf *old_disk_conf = NULL, *new_disk_conf = NULL;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003375 const int apv = connection->agreed_pro_version;
Philipp Reisner813472c2011-05-03 16:47:02 +02003376 struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
Philipp Reisner778f2712010-07-06 11:14:00 +02003377 int fifo_size = 0;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003378 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003379
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003380 peer_device = conn_peer_device(connection, pi->vnr);
3381 if (!peer_device)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003382 return config_unknown_volume(connection, pi);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003383 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003384
3385 exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param)
3386 : apv == 88 ? sizeof(struct p_rs_param)
3387 + SHARED_SECRET_MAX
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003388 : apv <= 94 ? sizeof(struct p_rs_param_89)
3389 : /* apv >= 95 */ sizeof(struct p_rs_param_95);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003390
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003391 if (pi->size > exp_max_sz) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003392 drbd_err(device, "SyncParam packet too long: received %u, expected <= %u bytes\n",
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003393 pi->size, exp_max_sz);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003394 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003395 }
3396
3397 if (apv <= 88) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003398 header_size = sizeof(struct p_rs_param);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003399 data_size = pi->size - header_size;
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003400 } else if (apv <= 94) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003401 header_size = sizeof(struct p_rs_param_89);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003402 data_size = pi->size - header_size;
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02003403 D_ASSERT(device, data_size == 0);
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003404 } else {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003405 header_size = sizeof(struct p_rs_param_95);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003406 data_size = pi->size - header_size;
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02003407 D_ASSERT(device, data_size == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003408 }
3409
3410 /* initialize verify_alg and csums_alg */
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003411 p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003412 memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
3413
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003414 err = drbd_recv_all(peer_device->connection, p, header_size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003415 if (err)
3416 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003417
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003418 mutex_lock(&connection->resource->conf_update);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003419 old_net_conf = peer_device->connection->net_conf;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003420 if (get_ldev(device)) {
Philipp Reisner813472c2011-05-03 16:47:02 +02003421 new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3422 if (!new_disk_conf) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003423 put_ldev(device);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003424 mutex_unlock(&connection->resource->conf_update);
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003425 drbd_err(device, "Allocation of new disk_conf failed\n");
Philipp Reisner813472c2011-05-03 16:47:02 +02003426 return -ENOMEM;
3427 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003428
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003429 old_disk_conf = device->ldev->disk_conf;
Philipp Reisner813472c2011-05-03 16:47:02 +02003430 *new_disk_conf = *old_disk_conf;
3431
Andreas Gruenbacher6394b932011-05-11 14:29:52 +02003432 new_disk_conf->resync_rate = be32_to_cpu(p->resync_rate);
Philipp Reisner813472c2011-05-03 16:47:02 +02003433 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003434
3435 if (apv >= 88) {
3436 if (apv == 88) {
Philipp Reisner5de73822012-03-28 10:17:32 +02003437 if (data_size > SHARED_SECRET_MAX || data_size == 0) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003438 drbd_err(device, "verify-alg of wrong size, "
Philipp Reisner5de73822012-03-28 10:17:32 +02003439 "peer wants %u, accepting only up to %u byte\n",
3440 data_size, SHARED_SECRET_MAX);
Philipp Reisner813472c2011-05-03 16:47:02 +02003441 err = -EIO;
3442 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003443 }
3444
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003445 err = drbd_recv_all(peer_device->connection, p->verify_alg, data_size);
Philipp Reisner813472c2011-05-03 16:47:02 +02003446 if (err)
3447 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003448 /* we expect NUL terminated string */
3449 /* but just in case someone tries to be evil */
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02003450 D_ASSERT(device, p->verify_alg[data_size-1] == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003451 p->verify_alg[data_size-1] = 0;
3452
3453 } else /* apv >= 89 */ {
3454 /* we still expect NUL terminated strings */
3455 /* but just in case someone tries to be evil */
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02003456 D_ASSERT(device, p->verify_alg[SHARED_SECRET_MAX-1] == 0);
3457 D_ASSERT(device, p->csums_alg[SHARED_SECRET_MAX-1] == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003458 p->verify_alg[SHARED_SECRET_MAX-1] = 0;
3459 p->csums_alg[SHARED_SECRET_MAX-1] = 0;
3460 }
3461
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003462 if (strcmp(old_net_conf->verify_alg, p->verify_alg)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003463 if (device->state.conn == C_WF_REPORT_PARAMS) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003464 drbd_err(device, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003465 old_net_conf->verify_alg, p->verify_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003466 goto disconnect;
3467 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003468 verify_tfm = drbd_crypto_alloc_digest_safe(device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003469 p->verify_alg, "verify-alg");
3470 if (IS_ERR(verify_tfm)) {
3471 verify_tfm = NULL;
3472 goto disconnect;
3473 }
3474 }
3475
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003476 if (apv >= 89 && strcmp(old_net_conf->csums_alg, p->csums_alg)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003477 if (device->state.conn == C_WF_REPORT_PARAMS) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003478 drbd_err(device, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003479 old_net_conf->csums_alg, p->csums_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003480 goto disconnect;
3481 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003482 csums_tfm = drbd_crypto_alloc_digest_safe(device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003483 p->csums_alg, "csums-alg");
3484 if (IS_ERR(csums_tfm)) {
3485 csums_tfm = NULL;
3486 goto disconnect;
3487 }
3488 }
3489
Philipp Reisner813472c2011-05-03 16:47:02 +02003490 if (apv > 94 && new_disk_conf) {
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003491 new_disk_conf->c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
3492 new_disk_conf->c_delay_target = be32_to_cpu(p->c_delay_target);
3493 new_disk_conf->c_fill_target = be32_to_cpu(p->c_fill_target);
3494 new_disk_conf->c_max_rate = be32_to_cpu(p->c_max_rate);
Philipp Reisner778f2712010-07-06 11:14:00 +02003495
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003496 fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003497 if (fifo_size != device->rs_plan_s->size) {
Philipp Reisner813472c2011-05-03 16:47:02 +02003498 new_plan = fifo_alloc(fifo_size);
3499 if (!new_plan) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003500 drbd_err(device, "kmalloc of fifo_buffer failed");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003501 put_ldev(device);
Philipp Reisner778f2712010-07-06 11:14:00 +02003502 goto disconnect;
3503 }
3504 }
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003505 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003506
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003507 if (verify_tfm || csums_tfm) {
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003508 new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
3509 if (!new_net_conf) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003510 drbd_err(device, "Allocation of new net_conf failed\n");
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003511 goto disconnect;
3512 }
3513
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003514 *new_net_conf = *old_net_conf;
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003515
3516 if (verify_tfm) {
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003517 strcpy(new_net_conf->verify_alg, p->verify_alg);
3518 new_net_conf->verify_alg_len = strlen(p->verify_alg) + 1;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003519 crypto_free_hash(peer_device->connection->verify_tfm);
3520 peer_device->connection->verify_tfm = verify_tfm;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003521 drbd_info(device, "using verify-alg: \"%s\"\n", p->verify_alg);
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003522 }
3523 if (csums_tfm) {
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003524 strcpy(new_net_conf->csums_alg, p->csums_alg);
3525 new_net_conf->csums_alg_len = strlen(p->csums_alg) + 1;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003526 crypto_free_hash(peer_device->connection->csums_tfm);
3527 peer_device->connection->csums_tfm = csums_tfm;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003528 drbd_info(device, "using csums-alg: \"%s\"\n", p->csums_alg);
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003529 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003530 rcu_assign_pointer(connection->net_conf, new_net_conf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003531 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003532 }
3533
Philipp Reisner813472c2011-05-03 16:47:02 +02003534 if (new_disk_conf) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003535 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
3536 put_ldev(device);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003537 }
Philipp Reisner813472c2011-05-03 16:47:02 +02003538
3539 if (new_plan) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003540 old_plan = device->rs_plan_s;
3541 rcu_assign_pointer(device->rs_plan_s, new_plan);
Philipp Reisner813472c2011-05-03 16:47:02 +02003542 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003543
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003544 mutex_unlock(&connection->resource->conf_update);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003545 synchronize_rcu();
3546 if (new_net_conf)
3547 kfree(old_net_conf);
3548 kfree(old_disk_conf);
Philipp Reisner813472c2011-05-03 16:47:02 +02003549 kfree(old_plan);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003550
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003551 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003552
Philipp Reisner813472c2011-05-03 16:47:02 +02003553reconnect:
3554 if (new_disk_conf) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003555 put_ldev(device);
Philipp Reisner813472c2011-05-03 16:47:02 +02003556 kfree(new_disk_conf);
3557 }
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003558 mutex_unlock(&connection->resource->conf_update);
Philipp Reisner813472c2011-05-03 16:47:02 +02003559 return -EIO;
3560
Philipp Reisnerb411b362009-09-25 16:07:19 -07003561disconnect:
Philipp Reisner813472c2011-05-03 16:47:02 +02003562 kfree(new_plan);
3563 if (new_disk_conf) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003564 put_ldev(device);
Philipp Reisner813472c2011-05-03 16:47:02 +02003565 kfree(new_disk_conf);
3566 }
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003567 mutex_unlock(&connection->resource->conf_update);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003568 /* just for completeness: actually not needed,
3569 * as this is not reached if csums_tfm was ok. */
3570 crypto_free_hash(csums_tfm);
3571 /* but free the verify_tfm again, if csums_tfm did not work out */
3572 crypto_free_hash(verify_tfm);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003573 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003574 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003575}
3576
Philipp Reisnerb411b362009-09-25 16:07:19 -07003577/* warn if the arguments differ by more than 12.5% */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003578static void warn_if_differ_considerably(struct drbd_device *device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003579 const char *s, sector_t a, sector_t b)
3580{
3581 sector_t d;
3582 if (a == 0 || b == 0)
3583 return;
3584 d = (a > b) ? (a - b) : (b - a);
3585 if (d > (a>>3) || d > (b>>3))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003586 drbd_warn(device, "Considerable difference in %s: %llus vs. %llus\n", s,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003587 (unsigned long long)a, (unsigned long long)b);
3588}
3589
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003590static int receive_sizes(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003591{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003592 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003593 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003594 struct p_sizes *p = pi->data;
Philipp Reisnere96c9632013-06-25 16:50:07 +02003595 enum determine_dev_size dd = DS_UNCHANGED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003596 sector_t p_size, p_usize, my_usize;
3597 int ldsc = 0; /* local disk size changed */
Philipp Reisnere89b5912010-03-24 17:11:33 +01003598 enum dds_flags ddsf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003599
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003600 peer_device = conn_peer_device(connection, pi->vnr);
3601 if (!peer_device)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003602 return config_unknown_volume(connection, pi);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003603 device = peer_device->device;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003604
Philipp Reisnerb411b362009-09-25 16:07:19 -07003605 p_size = be64_to_cpu(p->d_size);
3606 p_usize = be64_to_cpu(p->u_size);
3607
Philipp Reisnerb411b362009-09-25 16:07:19 -07003608 /* just store the peer's disk size for now.
3609 * we still need to figure out whether we accept that. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003610 device->p_size = p_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003611
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003612 if (get_ldev(device)) {
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003613 rcu_read_lock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003614 my_usize = rcu_dereference(device->ldev->disk_conf)->disk_size;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003615 rcu_read_unlock();
3616
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003617 warn_if_differ_considerably(device, "lower level device sizes",
3618 p_size, drbd_get_max_capacity(device->ldev));
3619 warn_if_differ_considerably(device, "user requested size",
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003620 p_usize, my_usize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003621
3622 /* if this is the first connect, or an otherwise expected
3623 * param exchange, choose the minimum */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003624 if (device->state.conn == C_WF_REPORT_PARAMS)
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003625 p_usize = min_not_zero(my_usize, p_usize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003626
3627 /* Never shrink a device with usable data during connect.
3628 But allow online shrinking if we are connected. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003629 if (drbd_new_dev_size(device, device->ldev, p_usize, 0) <
3630 drbd_get_capacity(device->this_bdev) &&
3631 device->state.disk >= D_OUTDATED &&
3632 device->state.conn < C_CONNECTED) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003633 drbd_err(device, "The peer's disk size is too small!\n");
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003634 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003635 put_ldev(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003636 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003637 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003638
3639 if (my_usize != p_usize) {
3640 struct disk_conf *old_disk_conf, *new_disk_conf = NULL;
3641
3642 new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3643 if (!new_disk_conf) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003644 drbd_err(device, "Allocation of new disk_conf failed\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003645 put_ldev(device);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003646 return -ENOMEM;
3647 }
3648
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003649 mutex_lock(&connection->resource->conf_update);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003650 old_disk_conf = device->ldev->disk_conf;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003651 *new_disk_conf = *old_disk_conf;
3652 new_disk_conf->disk_size = p_usize;
3653
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003654 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003655 mutex_unlock(&connection->resource->conf_update);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003656 synchronize_rcu();
3657 kfree(old_disk_conf);
3658
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003659 drbd_info(device, "Peer sets u_size to %lu sectors\n",
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003660 (unsigned long)my_usize);
3661 }
3662
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003663 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003664 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003665
Philipp Reisnere89b5912010-03-24 17:11:33 +01003666 ddsf = be16_to_cpu(p->dds_flags);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003667 if (get_ldev(device)) {
3668 dd = drbd_determine_dev_size(device, ddsf, NULL);
3669 put_ldev(device);
Philipp Reisnere96c9632013-06-25 16:50:07 +02003670 if (dd == DS_ERROR)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003671 return -EIO;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003672 drbd_md_sync(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003673 } else {
3674 /* I am diskless, need to accept the peer's size. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003675 drbd_set_my_capacity(device, p_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003676 }
3677
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003678 device->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
3679 drbd_reconsider_max_bio_size(device);
Philipp Reisner99432fc2011-05-20 16:39:13 +02003680
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003681 if (get_ldev(device)) {
3682 if (device->ldev->known_size != drbd_get_capacity(device->ldev->backing_bdev)) {
3683 device->ldev->known_size = drbd_get_capacity(device->ldev->backing_bdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003684 ldsc = 1;
3685 }
3686
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003687 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003688 }
3689
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003690 if (device->state.conn > C_WF_REPORT_PARAMS) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003691 if (be64_to_cpu(p->c_size) !=
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003692 drbd_get_capacity(device->this_bdev) || ldsc) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003693 /* we have different sizes, probably peer
3694 * needs to know my new size... */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003695 drbd_send_sizes(peer_device, 0, ddsf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003696 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003697 if (test_and_clear_bit(RESIZE_PENDING, &device->flags) ||
3698 (dd == DS_GREW && device->state.conn == C_CONNECTED)) {
3699 if (device->state.pdsk >= D_INCONSISTENT &&
3700 device->state.disk >= D_INCONSISTENT) {
Philipp Reisnere89b5912010-03-24 17:11:33 +01003701 if (ddsf & DDSF_NO_RESYNC)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003702 drbd_info(device, "Resync of new storage suppressed with --assume-clean\n");
Philipp Reisnere89b5912010-03-24 17:11:33 +01003703 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003704 resync_after_online_grow(device);
Philipp Reisnere89b5912010-03-24 17:11:33 +01003705 } else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003706 set_bit(RESYNC_AFTER_NEG, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003707 }
3708 }
3709
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003710 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003711}
3712
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003713static int receive_uuids(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003714{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003715 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003716 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003717 struct p_uuids *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003718 u64 *p_uuid;
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003719 int i, updated_uuids = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003720
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003721 peer_device = conn_peer_device(connection, pi->vnr);
3722 if (!peer_device)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003723 return config_unknown_volume(connection, pi);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003724 device = peer_device->device;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003725
Philipp Reisnerb411b362009-09-25 16:07:19 -07003726 p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO);
Jing Wang063eacf2012-10-25 15:00:56 +08003727 if (!p_uuid) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003728 drbd_err(device, "kmalloc of p_uuid failed\n");
Jing Wang063eacf2012-10-25 15:00:56 +08003729 return false;
3730 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003731
3732 for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
3733 p_uuid[i] = be64_to_cpu(p->uuid[i]);
3734
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003735 kfree(device->p_uuid);
3736 device->p_uuid = p_uuid;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003737
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003738 if (device->state.conn < C_CONNECTED &&
3739 device->state.disk < D_INCONSISTENT &&
3740 device->state.role == R_PRIMARY &&
3741 (device->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003742 drbd_err(device, "Can only connect to data with current UUID=%016llX\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003743 (unsigned long long)device->ed_uuid);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003744 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003745 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003746 }
3747
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003748 if (get_ldev(device)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003749 int skip_initial_sync =
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003750 device->state.conn == C_CONNECTED &&
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003751 peer_device->connection->agreed_pro_version >= 90 &&
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003752 device->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003753 (p_uuid[UI_FLAGS] & 8);
3754 if (skip_initial_sync) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003755 drbd_info(device, "Accepted new current UUID, preparing to skip initial sync\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003756 drbd_bitmap_io(device, &drbd_bmio_clear_n_write,
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003757 "clear_n_write from receive_uuids",
3758 BM_LOCKED_TEST_ALLOWED);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003759 _drbd_uuid_set(device, UI_CURRENT, p_uuid[UI_CURRENT]);
3760 _drbd_uuid_set(device, UI_BITMAP, 0);
3761 _drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
Philipp Reisnerb411b362009-09-25 16:07:19 -07003762 CS_VERBOSE, NULL);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003763 drbd_md_sync(device);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003764 updated_uuids = 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003765 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003766 put_ldev(device);
3767 } else if (device->state.disk < D_INCONSISTENT &&
3768 device->state.role == R_PRIMARY) {
Philipp Reisner18a50fa2010-06-21 14:14:15 +02003769 /* I am a diskless primary, the peer just created a new current UUID
3770 for me. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003771 updated_uuids = drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003772 }
3773
3774 /* Before we test for the disk state, we should wait until an eventually
3775 ongoing cluster wide state change is finished. That is important if
3776 we are primary and are detaching from our disk. We need to see the
3777 new disk state... */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003778 mutex_lock(device->state_mutex);
3779 mutex_unlock(device->state_mutex);
3780 if (device->state.conn >= C_CONNECTED && device->state.disk < D_INCONSISTENT)
3781 updated_uuids |= drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003782
3783 if (updated_uuids)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003784 drbd_print_uuids(device, "receiver updated UUIDs to");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003785
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003786 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003787}
3788
3789/**
3790 * convert_state() - Converts the peer's view of the cluster state to our point of view
3791 * @ps: The state as seen by the peer.
3792 */
3793static union drbd_state convert_state(union drbd_state ps)
3794{
3795 union drbd_state ms;
3796
3797 static enum drbd_conns c_tab[] = {
Philipp Reisner369bea62011-07-06 23:04:44 +02003798 [C_WF_REPORT_PARAMS] = C_WF_REPORT_PARAMS,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003799 [C_CONNECTED] = C_CONNECTED,
3800
3801 [C_STARTING_SYNC_S] = C_STARTING_SYNC_T,
3802 [C_STARTING_SYNC_T] = C_STARTING_SYNC_S,
3803 [C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */
3804 [C_VERIFY_S] = C_VERIFY_T,
3805 [C_MASK] = C_MASK,
3806 };
3807
3808 ms.i = ps.i;
3809
3810 ms.conn = c_tab[ps.conn];
3811 ms.peer = ps.role;
3812 ms.role = ps.peer;
3813 ms.pdsk = ps.disk;
3814 ms.disk = ps.pdsk;
3815 ms.peer_isp = (ps.aftr_isp | ps.user_isp);
3816
3817 return ms;
3818}
3819
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003820static int receive_req_state(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003821{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003822 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003823 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003824 struct p_req_state *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003825 union drbd_state mask, val;
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +01003826 enum drbd_state_rv rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003827
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003828 peer_device = conn_peer_device(connection, pi->vnr);
3829 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003830 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003831 device = peer_device->device;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003832
Philipp Reisnerb411b362009-09-25 16:07:19 -07003833 mask.i = be32_to_cpu(p->mask);
3834 val.i = be32_to_cpu(p->val);
3835
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003836 if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags) &&
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003837 mutex_is_locked(device->state_mutex)) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003838 drbd_send_sr_reply(peer_device, SS_CONCURRENT_ST_CHG);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003839 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003840 }
3841
3842 mask = convert_state(mask);
3843 val = convert_state(val);
3844
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003845 rv = drbd_change_state(device, CS_VERBOSE, mask, val);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003846 drbd_send_sr_reply(peer_device, rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003847
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003848 drbd_md_sync(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003849
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003850 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003851}
3852
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003853static int receive_req_conn_state(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003854{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003855 struct p_req_state *p = pi->data;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003856 union drbd_state mask, val;
3857 enum drbd_state_rv rv;
3858
3859 mask.i = be32_to_cpu(p->mask);
3860 val.i = be32_to_cpu(p->val);
3861
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003862 if (test_bit(RESOLVE_CONFLICTS, &connection->flags) &&
3863 mutex_is_locked(&connection->cstate_mutex)) {
3864 conn_send_sr_reply(connection, SS_CONCURRENT_ST_CHG);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003865 return 0;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003866 }
3867
3868 mask = convert_state(mask);
3869 val = convert_state(val);
3870
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003871 rv = conn_request_state(connection, mask, val, CS_VERBOSE | CS_LOCAL_ONLY | CS_IGN_OUTD_FAIL);
3872 conn_send_sr_reply(connection, rv);
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003873
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003874 return 0;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003875}
3876
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003877static int receive_state(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003878{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003879 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003880 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003881 struct p_state *p = pi->data;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003882 union drbd_state os, ns, peer_state;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003883 enum drbd_disk_state real_peer_disk;
Philipp Reisner65d922c2010-06-16 16:18:09 +02003884 enum chg_state_flags cs_flags;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003885 int rv;
3886
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003887 peer_device = conn_peer_device(connection, pi->vnr);
3888 if (!peer_device)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003889 return config_unknown_volume(connection, pi);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003890 device = peer_device->device;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003891
Philipp Reisnerb411b362009-09-25 16:07:19 -07003892 peer_state.i = be32_to_cpu(p->state);
3893
3894 real_peer_disk = peer_state.disk;
3895 if (peer_state.disk == D_NEGOTIATING) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003896 real_peer_disk = device->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003897 drbd_info(device, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003898 }
3899
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003900 spin_lock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003901 retry:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003902 os = ns = drbd_read_state(device);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003903 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003904
Lars Ellenberg545752d2011-12-05 14:39:25 +01003905 /* If some other part of the code (asender thread, timeout)
3906 * already decided to close the connection again,
3907 * we must not "re-establish" it here. */
3908 if (os.conn <= C_TEAR_DOWN)
Lars Ellenberg58ffa582012-07-26 14:09:49 +02003909 return -ECONNRESET;
Lars Ellenberg545752d2011-12-05 14:39:25 +01003910
Lars Ellenberg40424e42011-09-26 15:24:56 +02003911 /* If this is the "end of sync" confirmation, usually the peer disk
3912 * transitions from D_INCONSISTENT to D_UP_TO_DATE. For empty (0 bits
3913 * set) resync started in PausedSyncT, or if the timing of pause-/
3914 * unpause-sync events has been "just right", the peer disk may
3915 * transition from D_CONSISTENT to D_UP_TO_DATE as well.
3916 */
3917 if ((os.pdsk == D_INCONSISTENT || os.pdsk == D_CONSISTENT) &&
3918 real_peer_disk == D_UP_TO_DATE &&
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003919 os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
3920 /* If we are (becoming) SyncSource, but peer is still in sync
3921 * preparation, ignore its uptodate-ness to avoid flapping, it
3922 * will change to inconsistent once the peer reaches active
3923 * syncing states.
3924 * It may have changed syncer-paused flags, however, so we
3925 * cannot ignore this completely. */
3926 if (peer_state.conn > C_CONNECTED &&
3927 peer_state.conn < C_SYNC_SOURCE)
3928 real_peer_disk = D_INCONSISTENT;
3929
3930 /* if peer_state changes to connected at the same time,
3931 * it explicitly notifies us that it finished resync.
3932 * Maybe we should finish it up, too? */
3933 else if (os.conn >= C_SYNC_SOURCE &&
3934 peer_state.conn == C_CONNECTED) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003935 if (drbd_bm_total_weight(device) <= device->rs_failed)
3936 drbd_resync_finished(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003937 return 0;
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003938 }
3939 }
3940
Lars Ellenberg02b91b52012-06-28 18:26:52 +02003941 /* explicit verify finished notification, stop sector reached. */
3942 if (os.conn == C_VERIFY_T && os.disk == D_UP_TO_DATE &&
3943 peer_state.conn == C_CONNECTED && real_peer_disk == D_UP_TO_DATE) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003944 ov_out_of_sync_print(device);
3945 drbd_resync_finished(device);
Lars Ellenberg58ffa582012-07-26 14:09:49 +02003946 return 0;
Lars Ellenberg02b91b52012-06-28 18:26:52 +02003947 }
3948
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003949 /* peer says his disk is inconsistent, while we think it is uptodate,
3950 * and this happens while the peer still thinks we have a sync going on,
3951 * but we think we are already done with the sync.
3952 * We ignore this to avoid flapping pdsk.
3953 * This should not happen, if the peer is a recent version of drbd. */
3954 if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT &&
3955 os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE)
3956 real_peer_disk = D_UP_TO_DATE;
3957
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003958 if (ns.conn == C_WF_REPORT_PARAMS)
3959 ns.conn = C_CONNECTED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003960
Philipp Reisner67531712010-10-27 12:21:30 +02003961 if (peer_state.conn == C_AHEAD)
3962 ns.conn = C_BEHIND;
3963
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003964 if (device->p_uuid && peer_state.disk >= D_NEGOTIATING &&
3965 get_ldev_if_state(device, D_NEGOTIATING)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003966 int cr; /* consider resync */
3967
3968 /* if we established a new connection */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003969 cr = (os.conn < C_CONNECTED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003970 /* if we had an established connection
3971 * and one of the nodes newly attaches a disk */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003972 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003973 (peer_state.disk == D_NEGOTIATING ||
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003974 os.disk == D_NEGOTIATING));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003975 /* if we have both been inconsistent, and the peer has been
3976 * forced to be UpToDate with --overwrite-data */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003977 cr |= test_bit(CONSIDER_RESYNC, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003978 /* if we had been plain connected, and the admin requested to
3979 * start a sync by "invalidate" or "invalidate-remote" */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003980 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003981 (peer_state.conn >= C_STARTING_SYNC_S &&
3982 peer_state.conn <= C_WF_BITMAP_T));
3983
3984 if (cr)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003985 ns.conn = drbd_sync_handshake(peer_device, peer_state.role, real_peer_disk);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003986
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003987 put_ldev(device);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003988 if (ns.conn == C_MASK) {
3989 ns.conn = C_CONNECTED;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003990 if (device->state.disk == D_NEGOTIATING) {
3991 drbd_force_state(device, NS(disk, D_FAILED));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003992 } else if (peer_state.disk == D_NEGOTIATING) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003993 drbd_err(device, "Disk attach process on the peer node was aborted.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003994 peer_state.disk = D_DISKLESS;
Lars Ellenberg580b9762010-02-26 23:15:23 +01003995 real_peer_disk = D_DISKLESS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003996 } else {
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003997 if (test_and_clear_bit(CONN_DRY_RUN, &peer_device->connection->flags))
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003998 return -EIO;
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02003999 D_ASSERT(device, os.conn == C_WF_REPORT_PARAMS);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004000 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004001 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004002 }
4003 }
4004 }
4005
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004006 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004007 if (os.i != drbd_read_state(device).i)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004008 goto retry;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004009 clear_bit(CONSIDER_RESYNC, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004010 ns.peer = peer_state.role;
4011 ns.pdsk = real_peer_disk;
4012 ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004013 if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004014 ns.disk = device->new_state_tmp.disk;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004015 cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004016 if (ns.pdsk == D_CONSISTENT && drbd_suspended(device) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
4017 test_bit(NEW_CUR_UUID, &device->flags)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004018 /* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this
Philipp Reisner481c6f52010-06-22 14:03:27 +02004019 for temporal network outages! */
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004020 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004021 drbd_err(device, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004022 tl_clear(peer_device->connection);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004023 drbd_uuid_new_current(device);
4024 clear_bit(NEW_CUR_UUID, &device->flags);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004025 conn_request_state(peer_device->connection, NS2(conn, C_PROTOCOL_ERROR, susp, 0), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004026 return -EIO;
Philipp Reisner481c6f52010-06-22 14:03:27 +02004027 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004028 rv = _drbd_set_state(device, ns, cs_flags, NULL);
4029 ns = drbd_read_state(device);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004030 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004031
4032 if (rv < SS_SUCCESS) {
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004033 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004034 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004035 }
4036
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004037 if (os.conn > C_WF_REPORT_PARAMS) {
4038 if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07004039 peer_state.disk != D_NEGOTIATING ) {
4040 /* we want resync, peer has not yet decided to sync... */
4041 /* Nowadays only used when forcing a node into primary role and
4042 setting its disk to UpToDate with that */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004043 drbd_send_uuids(peer_device);
4044 drbd_send_current_state(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004045 }
4046 }
4047
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004048 clear_bit(DISCARD_MY_DATA, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004049
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004050 drbd_md_sync(device); /* update connected indicator, la_size_sect, ... */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004051
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004052 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004053}
4054
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004055static int receive_sync_uuid(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004056{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004057 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004058 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004059 struct p_rs_uuid *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004060
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004061 peer_device = conn_peer_device(connection, pi->vnr);
4062 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004063 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004064 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004065
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004066 wait_event(device->misc_wait,
4067 device->state.conn == C_WF_SYNC_UUID ||
4068 device->state.conn == C_BEHIND ||
4069 device->state.conn < C_CONNECTED ||
4070 device->state.disk < D_NEGOTIATING);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004071
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02004072 /* D_ASSERT(device, device->state.conn == C_WF_SYNC_UUID ); */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004073
Philipp Reisnerb411b362009-09-25 16:07:19 -07004074 /* Here the _drbd_uuid_ functions are right, current should
4075 _not_ be rotated into the history */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004076 if (get_ldev_if_state(device, D_NEGOTIATING)) {
4077 _drbd_uuid_set(device, UI_CURRENT, be64_to_cpu(p->uuid));
4078 _drbd_uuid_set(device, UI_BITMAP, 0UL);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004079
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004080 drbd_print_uuids(device, "updated sync uuid");
4081 drbd_start_resync(device, C_SYNC_TARGET);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004082
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004083 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004084 } else
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004085 drbd_err(device, "Ignoring SyncUUID packet!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004086
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004087 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004088}
4089
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004090/**
4091 * receive_bitmap_plain
4092 *
4093 * Return 0 when done, 1 when another iteration is needed, and a negative error
4094 * code upon failure.
4095 */
4096static int
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004097receive_bitmap_plain(struct drbd_peer_device *peer_device, unsigned int size,
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004098 unsigned long *p, struct bm_xfer_ctx *c)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004099{
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004100 unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE -
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004101 drbd_header_size(peer_device->connection);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004102 unsigned int num_words = min_t(size_t, data_size / sizeof(*p),
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004103 c->bm_words - c->word_offset);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004104 unsigned int want = num_words * sizeof(*p);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004105 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004106
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004107 if (want != size) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004108 drbd_err(peer_device, "%s:want (%u) != size (%u)\n", __func__, want, size);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004109 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004110 }
4111 if (want == 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004112 return 0;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004113 err = drbd_recv_all(peer_device->connection, p, want);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004114 if (err)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004115 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004116
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004117 drbd_bm_merge_lel(peer_device->device, c->word_offset, num_words, p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004118
4119 c->word_offset += num_words;
4120 c->bit_offset = c->word_offset * BITS_PER_LONG;
4121 if (c->bit_offset > c->bm_bits)
4122 c->bit_offset = c->bm_bits;
4123
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004124 return 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004125}
4126
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004127static enum drbd_bitmap_code dcbp_get_code(struct p_compressed_bm *p)
4128{
4129 return (enum drbd_bitmap_code)(p->encoding & 0x0f);
4130}
4131
4132static int dcbp_get_start(struct p_compressed_bm *p)
4133{
4134 return (p->encoding & 0x80) != 0;
4135}
4136
4137static int dcbp_get_pad_bits(struct p_compressed_bm *p)
4138{
4139 return (p->encoding >> 4) & 0x7;
4140}
4141
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004142/**
4143 * recv_bm_rle_bits
4144 *
4145 * Return 0 when done, 1 when another iteration is needed, and a negative error
4146 * code upon failure.
4147 */
4148static int
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004149recv_bm_rle_bits(struct drbd_peer_device *peer_device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07004150 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01004151 struct bm_xfer_ctx *c,
4152 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004153{
4154 struct bitstream bs;
4155 u64 look_ahead;
4156 u64 rl;
4157 u64 tmp;
4158 unsigned long s = c->bit_offset;
4159 unsigned long e;
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004160 int toggle = dcbp_get_start(p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004161 int have;
4162 int bits;
4163
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004164 bitstream_init(&bs, p->code, len, dcbp_get_pad_bits(p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004165
4166 bits = bitstream_get_bits(&bs, &look_ahead, 64);
4167 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004168 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004169
4170 for (have = bits; have > 0; s += rl, toggle = !toggle) {
4171 bits = vli_decode_bits(&rl, look_ahead);
4172 if (bits <= 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004173 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004174
4175 if (toggle) {
4176 e = s + rl -1;
4177 if (e >= c->bm_bits) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004178 drbd_err(peer_device, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004179 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004180 }
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004181 _drbd_bm_set_bits(peer_device->device, s, e);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004182 }
4183
4184 if (have < bits) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004185 drbd_err(peer_device, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07004186 have, bits, look_ahead,
4187 (unsigned int)(bs.cur.b - p->code),
4188 (unsigned int)bs.buf_len);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004189 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004190 }
Lars Ellenbergd2da5b02013-10-23 10:59:18 +02004191 /* if we consumed all 64 bits, assign 0; >> 64 is "undefined"; */
4192 if (likely(bits < 64))
4193 look_ahead >>= bits;
4194 else
4195 look_ahead = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004196 have -= bits;
4197
4198 bits = bitstream_get_bits(&bs, &tmp, 64 - have);
4199 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004200 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004201 look_ahead |= tmp << have;
4202 have += bits;
4203 }
4204
4205 c->bit_offset = s;
4206 bm_xfer_ctx_bit_to_word_offset(c);
4207
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004208 return (s != c->bm_bits);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004209}
4210
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004211/**
4212 * decode_bitmap_c
4213 *
4214 * Return 0 when done, 1 when another iteration is needed, and a negative error
4215 * code upon failure.
4216 */
4217static int
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004218decode_bitmap_c(struct drbd_peer_device *peer_device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07004219 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01004220 struct bm_xfer_ctx *c,
4221 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004222{
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004223 if (dcbp_get_code(p) == RLE_VLI_Bits)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004224 return recv_bm_rle_bits(peer_device, p, c, len - sizeof(*p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004225
4226 /* other variants had been implemented for evaluation,
4227 * but have been dropped as this one turned out to be "best"
4228 * during all our tests. */
4229
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004230 drbd_err(peer_device, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
4231 conn_request_state(peer_device->connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004232 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004233}
4234
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004235void INFO_bm_xfer_stats(struct drbd_device *device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07004236 const char *direction, struct bm_xfer_ctx *c)
4237{
4238 /* what would it take to transfer it "plaintext" */
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02004239 unsigned int header_size = drbd_header_size(first_peer_device(device)->connection);
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004240 unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE - header_size;
4241 unsigned int plain =
4242 header_size * (DIV_ROUND_UP(c->bm_words, data_size) + 1) +
4243 c->bm_words * sizeof(unsigned long);
4244 unsigned int total = c->bytes[0] + c->bytes[1];
4245 unsigned int r;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004246
4247 /* total can not be zero. but just in case: */
4248 if (total == 0)
4249 return;
4250
4251 /* don't report if not compressed */
4252 if (total >= plain)
4253 return;
4254
4255 /* total < plain. check for overflow, still */
4256 r = (total > UINT_MAX/1000) ? (total / (plain/1000))
4257 : (1000 * total / plain);
4258
4259 if (r > 1000)
4260 r = 1000;
4261
4262 r = 1000 - r;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004263 drbd_info(device, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
Philipp Reisnerb411b362009-09-25 16:07:19 -07004264 "total %u; compression: %u.%u%%\n",
4265 direction,
4266 c->bytes[1], c->packets[1],
4267 c->bytes[0], c->packets[0],
4268 total, r/10, r % 10);
4269}
4270
4271/* Since we are processing the bitfield from lower addresses to higher,
4272 it does not matter if the process it in 32 bit chunks or 64 bit
4273 chunks as long as it is little endian. (Understand it as byte stream,
4274 beginning with the lowest byte...) If we would use big endian
4275 we would need to process it from the highest address to the lowest,
4276 in order to be agnostic to the 32 vs 64 bits issue.
4277
4278 returns 0 on failure, 1 if we successfully received it. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004279static int receive_bitmap(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004280{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004281 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004282 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004283 struct bm_xfer_ctx c;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004284 int err;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004285
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004286 peer_device = conn_peer_device(connection, pi->vnr);
4287 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004288 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004289 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004290
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004291 drbd_bm_lock(device, "receive bitmap", BM_LOCKED_SET_ALLOWED);
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004292 /* you are supposed to send additional out-of-sync information
4293 * if you actually set bits during this phase */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004294
Philipp Reisnerb411b362009-09-25 16:07:19 -07004295 c = (struct bm_xfer_ctx) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004296 .bm_bits = drbd_bm_bits(device),
4297 .bm_words = drbd_bm_words(device),
Philipp Reisnerb411b362009-09-25 16:07:19 -07004298 };
4299
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004300 for(;;) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004301 if (pi->cmd == P_BITMAP)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004302 err = receive_bitmap_plain(peer_device, pi->size, pi->data, &c);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004303 else if (pi->cmd == P_COMPRESSED_BITMAP) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004304 /* MAYBE: sanity check that we speak proto >= 90,
4305 * and the feature is enabled! */
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004306 struct p_compressed_bm *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004307
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004308 if (pi->size > DRBD_SOCKET_BUFFER_SIZE - drbd_header_size(connection)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004309 drbd_err(device, "ReportCBitmap packet too large\n");
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004310 err = -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004311 goto out;
4312 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004313 if (pi->size <= sizeof(*p)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004314 drbd_err(device, "ReportCBitmap packet too small (l:%u)\n", pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004315 err = -EIO;
Andreas Gruenbacher78fcbda2010-12-10 22:18:27 +01004316 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004317 }
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004318 err = drbd_recv_all(peer_device->connection, p, pi->size);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004319 if (err)
4320 goto out;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004321 err = decode_bitmap_c(peer_device, p, &c, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004322 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004323 drbd_warn(device, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", pi->cmd);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004324 err = -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004325 goto out;
4326 }
4327
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004328 c.packets[pi->cmd == P_BITMAP]++;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004329 c.bytes[pi->cmd == P_BITMAP] += drbd_header_size(connection) + pi->size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004330
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004331 if (err <= 0) {
4332 if (err < 0)
4333 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004334 break;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004335 }
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004336 err = drbd_recv_header(peer_device->connection, pi);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004337 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004338 goto out;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004339 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004340
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004341 INFO_bm_xfer_stats(device, "receive", &c);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004342
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004343 if (device->state.conn == C_WF_BITMAP_T) {
Andreas Gruenbacherde1f8e42010-12-10 21:04:00 +01004344 enum drbd_state_rv rv;
4345
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004346 err = drbd_send_bitmap(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004347 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004348 goto out;
4349 /* Omit CS_ORDERED with this state transition to avoid deadlocks. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004350 rv = _drbd_request_state(device, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02004351 D_ASSERT(device, rv == SS_SUCCESS);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004352 } else if (device->state.conn != C_WF_BITMAP_S) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004353 /* admin may have requested C_DISCONNECTING,
4354 * other threads may have noticed network errors */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004355 drbd_info(device, "unexpected cstate (%s) in receive_bitmap\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004356 drbd_conn_str(device->state.conn));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004357 }
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004358 err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004359
Philipp Reisnerb411b362009-09-25 16:07:19 -07004360 out:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004361 drbd_bm_unlock(device);
4362 if (!err && device->state.conn == C_WF_BITMAP_S)
4363 drbd_start_resync(device, C_SYNC_SOURCE);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004364 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004365}
4366
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004367static int receive_skip(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004368{
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004369 drbd_warn(connection, "skipping unknown optional packet type %d, l: %d!\n",
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004370 pi->cmd, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004371
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004372 return ignore_remaining_packet(connection, pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004373}
4374
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004375static int receive_UnplugRemote(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004376{
Philipp Reisnerb411b362009-09-25 16:07:19 -07004377 /* Make sure we've acked all the TCP data associated
4378 * with the data requests being unplugged */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004379 drbd_tcp_quickack(connection->data.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004380
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004381 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004382}
4383
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004384static int receive_out_of_sync(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisner73a01a12010-10-27 14:33:00 +02004385{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004386 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004387 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004388 struct p_block_desc *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004389
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004390 peer_device = conn_peer_device(connection, pi->vnr);
4391 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004392 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004393 device = peer_device->device;
Philipp Reisner73a01a12010-10-27 14:33:00 +02004394
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004395 switch (device->state.conn) {
Lars Ellenbergf735e3632010-12-17 21:06:18 +01004396 case C_WF_SYNC_UUID:
4397 case C_WF_BITMAP_T:
4398 case C_BEHIND:
4399 break;
4400 default:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004401 drbd_err(device, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004402 drbd_conn_str(device->state.conn));
Lars Ellenbergf735e3632010-12-17 21:06:18 +01004403 }
4404
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004405 drbd_set_out_of_sync(device, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
Philipp Reisner73a01a12010-10-27 14:33:00 +02004406
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004407 return 0;
Philipp Reisner73a01a12010-10-27 14:33:00 +02004408}
4409
Philipp Reisner02918be2010-08-20 14:35:10 +02004410struct data_cmd {
4411 int expect_payload;
4412 size_t pkt_size;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004413 int (*fn)(struct drbd_connection *, struct packet_info *);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004414};
4415
Philipp Reisner02918be2010-08-20 14:35:10 +02004416static struct data_cmd drbd_cmd_handler[] = {
4417 [P_DATA] = { 1, sizeof(struct p_data), receive_Data },
4418 [P_DATA_REPLY] = { 1, sizeof(struct p_data), receive_DataReply },
4419 [P_RS_DATA_REPLY] = { 1, sizeof(struct p_data), receive_RSDataReply } ,
4420 [P_BARRIER] = { 0, sizeof(struct p_barrier), receive_Barrier } ,
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004421 [P_BITMAP] = { 1, 0, receive_bitmap } ,
4422 [P_COMPRESSED_BITMAP] = { 1, 0, receive_bitmap } ,
4423 [P_UNPLUG_REMOTE] = { 0, 0, receive_UnplugRemote },
Philipp Reisner02918be2010-08-20 14:35:10 +02004424 [P_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
4425 [P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004426 [P_SYNC_PARAM] = { 1, 0, receive_SyncParam },
4427 [P_SYNC_PARAM89] = { 1, 0, receive_SyncParam },
Philipp Reisner02918be2010-08-20 14:35:10 +02004428 [P_PROTOCOL] = { 1, sizeof(struct p_protocol), receive_protocol },
4429 [P_UUIDS] = { 0, sizeof(struct p_uuids), receive_uuids },
4430 [P_SIZES] = { 0, sizeof(struct p_sizes), receive_sizes },
4431 [P_STATE] = { 0, sizeof(struct p_state), receive_state },
4432 [P_STATE_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_state },
4433 [P_SYNC_UUID] = { 0, sizeof(struct p_rs_uuid), receive_sync_uuid },
4434 [P_OV_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
4435 [P_OV_REPLY] = { 1, sizeof(struct p_block_req), receive_DataRequest },
4436 [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest },
4437 [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip },
Philipp Reisner73a01a12010-10-27 14:33:00 +02004438 [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004439 [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_conn_state },
Philipp Reisner036b17e2011-05-16 17:38:11 +02004440 [P_PROTOCOL_UPDATE] = { 1, sizeof(struct p_protocol), receive_protocol },
Philipp Reisner02918be2010-08-20 14:35:10 +02004441};
4442
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004443static void drbdd(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004444{
Philipp Reisner77351055b2011-02-07 17:24:26 +01004445 struct packet_info pi;
Philipp Reisner02918be2010-08-20 14:35:10 +02004446 size_t shs; /* sub header size */
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004447 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004448
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004449 while (get_t_state(&connection->receiver) == RUNNING) {
Andreas Gruenbacherdeebe192011-03-25 00:01:04 +01004450 struct data_cmd *cmd;
4451
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004452 drbd_thread_current_set_cpu(&connection->receiver);
4453 if (drbd_recv_header(connection, &pi))
Philipp Reisner02918be2010-08-20 14:35:10 +02004454 goto err_out;
4455
Andreas Gruenbacherdeebe192011-03-25 00:01:04 +01004456 cmd = &drbd_cmd_handler[pi.cmd];
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004457 if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) || !cmd->fn)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004458 drbd_err(connection, "Unexpected data packet %s (0x%04x)",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004459 cmdname(pi.cmd), pi.cmd);
Philipp Reisner02918be2010-08-20 14:35:10 +02004460 goto err_out;
Lars Ellenberg0b33a912009-11-16 15:58:04 +01004461 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004462
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004463 shs = cmd->pkt_size;
4464 if (pi.size > shs && !cmd->expect_payload) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004465 drbd_err(connection, "No payload expected %s l:%d\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004466 cmdname(pi.cmd), pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02004467 goto err_out;
4468 }
4469
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02004470 if (shs) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004471 err = drbd_recv_all_warn(connection, pi.data, shs);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004472 if (err)
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02004473 goto err_out;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004474 pi.size -= shs;
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02004475 }
4476
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004477 err = cmd->fn(connection, &pi);
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004478 if (err) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004479 drbd_err(connection, "error receiving %s, e: %d l: %d!\n",
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004480 cmdname(pi.cmd), err, pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02004481 goto err_out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004482 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004483 }
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004484 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004485
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004486 err_out:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004487 conn_request_state(connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004488}
4489
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004490static void conn_disconnect(struct drbd_connection *connection)
Philipp Reisnerf70b35112010-06-24 14:34:40 +02004491{
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02004492 struct drbd_peer_device *peer_device;
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004493 enum drbd_conns oc;
Philipp Reisner376694a2011-11-07 10:54:28 +01004494 int vnr;
Philipp Reisnerf70b35112010-06-24 14:34:40 +02004495
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004496 if (connection->cstate == C_STANDALONE)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004497 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004498
Lars Ellenberg545752d2011-12-05 14:39:25 +01004499 /* We are about to start the cleanup after connection loss.
4500 * Make sure drbd_make_request knows about that.
4501 * Usually we should be in some network failure state already,
4502 * but just in case we are not, we fix it up here.
4503 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004504 conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
Lars Ellenberg545752d2011-12-05 14:39:25 +01004505
Philipp Reisnerb411b362009-09-25 16:07:19 -07004506 /* asender does not clean up anything. it must not interfere, either */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004507 drbd_thread_stop(&connection->asender);
4508 drbd_free_sock(connection);
Philipp Reisner360cc742011-02-08 14:29:53 +01004509
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02004510 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02004511 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
4512 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004513 kref_get(&device->kref);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02004514 rcu_read_unlock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004515 drbd_disconnected(peer_device);
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02004516 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02004517 rcu_read_lock();
4518 }
4519 rcu_read_unlock();
4520
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004521 if (!list_empty(&connection->current_epoch->list))
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004522 drbd_err(connection, "ASSERTION FAILED: connection->current_epoch->list not empty\n");
Philipp Reisner12038a32011-11-09 19:18:00 +01004523 /* ok, no more ee's on the fly, it is safe to reset the epoch_size */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004524 atomic_set(&connection->current_epoch->epoch_size, 0);
4525 connection->send.seen_any_write_yet = false;
Philipp Reisner12038a32011-11-09 19:18:00 +01004526
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004527 drbd_info(connection, "Connection closed\n");
Philipp Reisner360cc742011-02-08 14:29:53 +01004528
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004529 if (conn_highest_role(connection) == R_PRIMARY && conn_highest_pdsk(connection) >= D_UNKNOWN)
4530 conn_try_outdate_peer_async(connection);
Philipp Reisnercb703452011-03-24 11:03:07 +01004531
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004532 spin_lock_irq(&connection->resource->req_lock);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004533 oc = connection->cstate;
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004534 if (oc >= C_UNCONNECTED)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004535 _conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE);
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004536
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004537 spin_unlock_irq(&connection->resource->req_lock);
Philipp Reisner360cc742011-02-08 14:29:53 +01004538
Lars Ellenbergf3dfa402011-05-02 10:45:05 +02004539 if (oc == C_DISCONNECTING)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004540 conn_request_state(connection, NS(conn, C_STANDALONE), CS_VERBOSE | CS_HARD);
Philipp Reisner360cc742011-02-08 14:29:53 +01004541}
4542
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004543static int drbd_disconnected(struct drbd_peer_device *peer_device)
Philipp Reisner360cc742011-02-08 14:29:53 +01004544{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004545 struct drbd_device *device = peer_device->device;
Philipp Reisner360cc742011-02-08 14:29:53 +01004546 unsigned int i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004547
Philipp Reisner85719572010-07-21 10:20:17 +02004548 /* wait for current activity to cease. */
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004549 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004550 _drbd_wait_ee_list_empty(device, &device->active_ee);
4551 _drbd_wait_ee_list_empty(device, &device->sync_ee);
4552 _drbd_wait_ee_list_empty(device, &device->read_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004553 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004554
4555 /* We do not have data structures that would allow us to
4556 * get the rs_pending_cnt down to 0 again.
4557 * * On C_SYNC_TARGET we do not have any data structures describing
4558 * the pending RSDataRequest's we have sent.
4559 * * On C_SYNC_SOURCE there is no data structure that tracks
4560 * the P_RS_DATA_REPLY blocks that we sent to the SyncTarget.
4561 * And no, it is not the sum of the reference counts in the
4562 * resync_LRU. The resync_LRU tracks the whole operation including
4563 * the disk-IO, while the rs_pending_cnt only tracks the blocks
4564 * on the fly. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004565 drbd_rs_cancel_all(device);
4566 device->rs_total = 0;
4567 device->rs_failed = 0;
4568 atomic_set(&device->rs_pending_cnt, 0);
4569 wake_up(&device->misc_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004570
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004571 del_timer_sync(&device->resync_timer);
4572 resync_timer_fn((unsigned long)device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004573
Philipp Reisnerb411b362009-09-25 16:07:19 -07004574 /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
4575 * w_make_resync_request etc. which may still be on the worker queue
4576 * to be "canceled" */
Andreas Gruenbacherb5043c52011-07-28 15:56:02 +02004577 drbd_flush_workqueue(&peer_device->connection->sender_work);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004578
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004579 drbd_finish_peer_reqs(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004580
Philipp Reisnerd10b4ea2011-11-30 23:25:36 +01004581 /* This second workqueue flush is necessary, since drbd_finish_peer_reqs()
4582 might have issued a work again. The one before drbd_finish_peer_reqs() is
4583 necessary to reclain net_ee in drbd_finish_peer_reqs(). */
Andreas Gruenbacherb5043c52011-07-28 15:56:02 +02004584 drbd_flush_workqueue(&peer_device->connection->sender_work);
Philipp Reisnerd10b4ea2011-11-30 23:25:36 +01004585
Lars Ellenberg08332d72012-08-17 15:09:13 +02004586 /* need to do it again, drbd_finish_peer_reqs() may have populated it
4587 * again via drbd_try_clear_on_disk_bm(). */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004588 drbd_rs_cancel_all(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004589
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004590 kfree(device->p_uuid);
4591 device->p_uuid = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004592
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004593 if (!drbd_suspended(device))
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004594 tl_clear(peer_device->connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004595
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004596 drbd_md_sync(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004597
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004598 /* serialize with bitmap writeout triggered by the state change,
4599 * if any. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004600 wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004601
Philipp Reisnerb411b362009-09-25 16:07:19 -07004602 /* tcp_close and release of sendpage pages can be deferred. I don't
4603 * want to use SO_LINGER, because apparently it can be deferred for
4604 * more than 20 seconds (longest time I checked).
4605 *
4606 * Actually we don't care for exactly when the network stack does its
4607 * put_page(), but release our reference on these pages right here.
4608 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004609 i = drbd_free_peer_reqs(device, &device->net_ee);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004610 if (i)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004611 drbd_info(device, "net_ee not empty, killed %u entries\n", i);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004612 i = atomic_read(&device->pp_in_use_by_net);
Lars Ellenberg435f0742010-09-06 12:30:25 +02004613 if (i)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004614 drbd_info(device, "pp_in_use_by_net = %d, expected 0\n", i);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004615 i = atomic_read(&device->pp_in_use);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004616 if (i)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004617 drbd_info(device, "pp_in_use = %d, expected 0\n", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004618
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02004619 D_ASSERT(device, list_empty(&device->read_ee));
4620 D_ASSERT(device, list_empty(&device->active_ee));
4621 D_ASSERT(device, list_empty(&device->sync_ee));
4622 D_ASSERT(device, list_empty(&device->done_ee));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004623
Philipp Reisner360cc742011-02-08 14:29:53 +01004624 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004625}
4626
4627/*
4628 * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version
4629 * we can agree on is stored in agreed_pro_version.
4630 *
4631 * feature flags and the reserved array should be enough room for future
4632 * enhancements of the handshake protocol, and possible plugins...
4633 *
4634 * for now, they are expected to be zero, but ignored.
4635 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004636static int drbd_send_features(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004637{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004638 struct drbd_socket *sock;
4639 struct p_connection_features *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004640
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004641 sock = &connection->data;
4642 p = conn_prepare_command(connection, sock);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004643 if (!p)
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004644 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004645 memset(p, 0, sizeof(*p));
4646 p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
4647 p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004648 return conn_send_command(connection, sock, P_CONNECTION_FEATURES, sizeof(*p), NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004649}
4650
4651/*
4652 * return values:
4653 * 1 yes, we have a valid connection
4654 * 0 oops, did not work out, please try again
4655 * -1 peer talks different language,
4656 * no point in trying again, please go standalone.
4657 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004658static int drbd_do_features(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004659{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004660 /* ASSERT current == connection->receiver ... */
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004661 struct p_connection_features *p;
4662 const int expect = sizeof(struct p_connection_features);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004663 struct packet_info pi;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004664 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004665
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004666 err = drbd_send_features(connection);
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004667 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004668 return 0;
4669
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004670 err = drbd_recv_header(connection, &pi);
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004671 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004672 return 0;
4673
Andreas Gruenbacher60381782011-03-28 17:05:50 +02004674 if (pi.cmd != P_CONNECTION_FEATURES) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004675 drbd_err(connection, "expected ConnectionFeatures packet, received: %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004676 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004677 return -1;
4678 }
4679
Philipp Reisner77351055b2011-02-07 17:24:26 +01004680 if (pi.size != expect) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004681 drbd_err(connection, "expected ConnectionFeatures length: %u, received: %u\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004682 expect, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004683 return -1;
4684 }
4685
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004686 p = pi.data;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004687 err = drbd_recv_all_warn(connection, p, expect);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004688 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004689 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004690
Philipp Reisnerb411b362009-09-25 16:07:19 -07004691 p->protocol_min = be32_to_cpu(p->protocol_min);
4692 p->protocol_max = be32_to_cpu(p->protocol_max);
4693 if (p->protocol_max == 0)
4694 p->protocol_max = p->protocol_min;
4695
4696 if (PRO_VERSION_MAX < p->protocol_min ||
4697 PRO_VERSION_MIN > p->protocol_max)
4698 goto incompat;
4699
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004700 connection->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004701
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004702 drbd_info(connection, "Handshake successful: "
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004703 "Agreed network protocol version %d\n", connection->agreed_pro_version);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004704
4705 return 1;
4706
4707 incompat:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004708 drbd_err(connection, "incompatible DRBD dialects: "
Philipp Reisnerb411b362009-09-25 16:07:19 -07004709 "I support %d-%d, peer supports %d-%d\n",
4710 PRO_VERSION_MIN, PRO_VERSION_MAX,
4711 p->protocol_min, p->protocol_max);
4712 return -1;
4713}
4714
4715#if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004716static int drbd_do_auth(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004717{
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004718 drbd_err(connection, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
4719 drbd_err(connection, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004720 return -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004721}
4722#else
4723#define CHALLENGE_LEN 64
Johannes Thomab10d96c2010-01-07 16:02:50 +01004724
4725/* Return value:
4726 1 - auth succeeded,
4727 0 - failed, try again (network error),
4728 -1 - auth failed, don't try again.
4729*/
4730
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004731static int drbd_do_auth(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004732{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004733 struct drbd_socket *sock;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004734 char my_challenge[CHALLENGE_LEN]; /* 64 Bytes... */
4735 struct scatterlist sg;
4736 char *response = NULL;
4737 char *right_response = NULL;
4738 char *peers_ch = NULL;
Philipp Reisner44ed1672011-04-19 17:10:19 +02004739 unsigned int key_len;
4740 char secret[SHARED_SECRET_MAX]; /* 64 byte */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004741 unsigned int resp_size;
4742 struct hash_desc desc;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004743 struct packet_info pi;
Philipp Reisner44ed1672011-04-19 17:10:19 +02004744 struct net_conf *nc;
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004745 int err, rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004746
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004747 /* FIXME: Put the challenge/response into the preallocated socket buffer. */
4748
Philipp Reisner44ed1672011-04-19 17:10:19 +02004749 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004750 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +02004751 key_len = strlen(nc->shared_secret);
4752 memcpy(secret, nc->shared_secret, key_len);
4753 rcu_read_unlock();
4754
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004755 desc.tfm = connection->cram_hmac_tfm;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004756 desc.flags = 0;
4757
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004758 rv = crypto_hash_setkey(connection->cram_hmac_tfm, (u8 *)secret, key_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004759 if (rv) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004760 drbd_err(connection, "crypto_hash_setkey() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004761 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004762 goto fail;
4763 }
4764
4765 get_random_bytes(my_challenge, CHALLENGE_LEN);
4766
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004767 sock = &connection->data;
4768 if (!conn_prepare_command(connection, sock)) {
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004769 rv = 0;
4770 goto fail;
4771 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004772 rv = !conn_send_command(connection, sock, P_AUTH_CHALLENGE, 0,
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004773 my_challenge, CHALLENGE_LEN);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004774 if (!rv)
4775 goto fail;
4776
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004777 err = drbd_recv_header(connection, &pi);
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004778 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004779 rv = 0;
4780 goto fail;
4781 }
4782
Philipp Reisner77351055b2011-02-07 17:24:26 +01004783 if (pi.cmd != P_AUTH_CHALLENGE) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004784 drbd_err(connection, "expected AuthChallenge packet, received: %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004785 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004786 rv = 0;
4787 goto fail;
4788 }
4789
Philipp Reisner77351055b2011-02-07 17:24:26 +01004790 if (pi.size > CHALLENGE_LEN * 2) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004791 drbd_err(connection, "expected AuthChallenge payload too big.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004792 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004793 goto fail;
4794 }
4795
Philipp Reisner77351055b2011-02-07 17:24:26 +01004796 peers_ch = kmalloc(pi.size, GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004797 if (peers_ch == NULL) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004798 drbd_err(connection, "kmalloc of peers_ch failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004799 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004800 goto fail;
4801 }
4802
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004803 err = drbd_recv_all_warn(connection, peers_ch, pi.size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004804 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004805 rv = 0;
4806 goto fail;
4807 }
4808
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004809 resp_size = crypto_hash_digestsize(connection->cram_hmac_tfm);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004810 response = kmalloc(resp_size, GFP_NOIO);
4811 if (response == NULL) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004812 drbd_err(connection, "kmalloc of response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004813 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004814 goto fail;
4815 }
4816
4817 sg_init_table(&sg, 1);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004818 sg_set_buf(&sg, peers_ch, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004819
4820 rv = crypto_hash_digest(&desc, &sg, sg.length, response);
4821 if (rv) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004822 drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004823 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004824 goto fail;
4825 }
4826
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004827 if (!conn_prepare_command(connection, sock)) {
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004828 rv = 0;
4829 goto fail;
4830 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004831 rv = !conn_send_command(connection, sock, P_AUTH_RESPONSE, 0,
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004832 response, resp_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004833 if (!rv)
4834 goto fail;
4835
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004836 err = drbd_recv_header(connection, &pi);
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004837 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004838 rv = 0;
4839 goto fail;
4840 }
4841
Philipp Reisner77351055b2011-02-07 17:24:26 +01004842 if (pi.cmd != P_AUTH_RESPONSE) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004843 drbd_err(connection, "expected AuthResponse packet, received: %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004844 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004845 rv = 0;
4846 goto fail;
4847 }
4848
Philipp Reisner77351055b2011-02-07 17:24:26 +01004849 if (pi.size != resp_size) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004850 drbd_err(connection, "expected AuthResponse payload of wrong size\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004851 rv = 0;
4852 goto fail;
4853 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004854
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004855 err = drbd_recv_all_warn(connection, response , resp_size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004856 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004857 rv = 0;
4858 goto fail;
4859 }
4860
4861 right_response = kmalloc(resp_size, GFP_NOIO);
Julia Lawall2d1ee872009-12-27 22:27:11 +01004862 if (right_response == NULL) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004863 drbd_err(connection, "kmalloc of right_response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004864 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004865 goto fail;
4866 }
4867
4868 sg_set_buf(&sg, my_challenge, CHALLENGE_LEN);
4869
4870 rv = crypto_hash_digest(&desc, &sg, sg.length, right_response);
4871 if (rv) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004872 drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004873 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004874 goto fail;
4875 }
4876
4877 rv = !memcmp(response, right_response, resp_size);
4878
4879 if (rv)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004880 drbd_info(connection, "Peer authenticated using %d bytes HMAC\n",
Philipp Reisner44ed1672011-04-19 17:10:19 +02004881 resp_size);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004882 else
4883 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004884
4885 fail:
4886 kfree(peers_ch);
4887 kfree(response);
4888 kfree(right_response);
4889
4890 return rv;
4891}
4892#endif
4893
Andreas Gruenbacher8fe60552011-07-22 11:04:36 +02004894int drbd_receiver(struct drbd_thread *thi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004895{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004896 struct drbd_connection *connection = thi->connection;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004897 int h;
4898
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004899 drbd_info(connection, "receiver (re)started\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004900
4901 do {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004902 h = conn_connect(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004903 if (h == 0) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004904 conn_disconnect(connection);
Philipp Reisner20ee6392011-01-18 15:28:59 +01004905 schedule_timeout_interruptible(HZ);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004906 }
4907 if (h == -1) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004908 drbd_warn(connection, "Discarding network configuration.\n");
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004909 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004910 }
4911 } while (h == 0);
4912
Philipp Reisner91fd4da2011-04-20 17:47:29 +02004913 if (h > 0)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004914 drbdd(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004915
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004916 conn_disconnect(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004917
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004918 drbd_info(connection, "receiver terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004919 return 0;
4920}
4921
4922/* ********* acknowledge sender ******** */
4923
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004924static int got_conn_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004925{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004926 struct p_req_state_reply *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004927 int retcode = be32_to_cpu(p->retcode);
4928
4929 if (retcode >= SS_SUCCESS) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004930 set_bit(CONN_WD_ST_CHG_OKAY, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004931 } else {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004932 set_bit(CONN_WD_ST_CHG_FAIL, &connection->flags);
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004933 drbd_err(connection, "Requested state change failed by peer: %s (%d)\n",
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004934 drbd_set_st_err_str(retcode), retcode);
4935 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004936 wake_up(&connection->ping_wait);
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004937
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004938 return 0;
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004939}
4940
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004941static int got_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004942{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004943 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004944 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004945 struct p_req_state_reply *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004946 int retcode = be32_to_cpu(p->retcode);
4947
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004948 peer_device = conn_peer_device(connection, pi->vnr);
4949 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004950 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004951 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004952
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004953 if (test_bit(CONN_WD_ST_CHG_REQ, &connection->flags)) {
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02004954 D_ASSERT(device, connection->agreed_pro_version < 100);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004955 return got_conn_RqSReply(connection, pi);
Philipp Reisner4d0fc3f2012-01-20 13:52:27 +01004956 }
4957
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004958 if (retcode >= SS_SUCCESS) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004959 set_bit(CL_ST_CHG_SUCCESS, &device->flags);
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004960 } else {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004961 set_bit(CL_ST_CHG_FAIL, &device->flags);
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004962 drbd_err(device, "Requested state change failed by peer: %s (%d)\n",
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004963 drbd_set_st_err_str(retcode), retcode);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004964 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004965 wake_up(&device->state_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004966
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004967 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004968}
4969
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004970static int got_Ping(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004971{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004972 return drbd_send_ping_ack(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004973
4974}
4975
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004976static int got_PingAck(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004977{
4978 /* restore idle timeout */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004979 connection->meta.socket->sk->sk_rcvtimeo = connection->net_conf->ping_int*HZ;
4980 if (!test_and_set_bit(GOT_PING_ACK, &connection->flags))
4981 wake_up(&connection->ping_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004982
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004983 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004984}
4985
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004986static int got_IsInSync(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004987{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004988 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004989 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004990 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004991 sector_t sector = be64_to_cpu(p->sector);
4992 int blksize = be32_to_cpu(p->blksize);
4993
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004994 peer_device = conn_peer_device(connection, pi->vnr);
4995 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004996 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004997 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004998
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004999 D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005000
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005001 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005002
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005003 if (get_ldev(device)) {
5004 drbd_rs_complete_io(device, sector);
5005 drbd_set_in_sync(device, sector, blksize);
Lars Ellenberg1d53f092010-09-05 01:13:24 +02005006 /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005007 device->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
5008 put_ldev(device);
Lars Ellenberg1d53f092010-09-05 01:13:24 +02005009 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005010 dec_rs_pending(device);
5011 atomic_add(blksize >> 9, &device->rs_sect_in);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005012
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005013 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005014}
5015
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01005016static int
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005017validate_req_change_req_state(struct drbd_device *device, u64 id, sector_t sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01005018 struct rb_root *root, const char *func,
5019 enum drbd_req_event what, bool missing_ok)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005020{
5021 struct drbd_request *req;
5022 struct bio_and_error m;
5023
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005024 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005025 req = find_request(device, root, id, sector, missing_ok, func);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005026 if (unlikely(!req)) {
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005027 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005028 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005029 }
5030 __req_mod(req, what, &m);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005031 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005032
5033 if (m.bio)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005034 complete_master_bio(device, &m);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005035 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005036}
5037
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005038static int got_BlockAck(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005039{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005040 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005041 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005042 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005043 sector_t sector = be64_to_cpu(p->sector);
5044 int blksize = be32_to_cpu(p->blksize);
5045 enum drbd_req_event what;
5046
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005047 peer_device = conn_peer_device(connection, pi->vnr);
5048 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005049 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005050 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005051
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005052 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005053
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01005054 if (p->block_id == ID_SYNCER) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005055 drbd_set_in_sync(device, sector, blksize);
5056 dec_rs_pending(device);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005057 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005058 }
Andreas Gruenbachere05e1e52011-03-25 15:16:26 +01005059 switch (pi->cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07005060 case P_RS_WRITE_ACK:
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01005061 what = WRITE_ACKED_BY_PEER_AND_SIS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005062 break;
5063 case P_WRITE_ACK:
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01005064 what = WRITE_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005065 break;
5066 case P_RECV_ACK:
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01005067 what = RECV_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005068 break;
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02005069 case P_SUPERSEDED:
5070 what = CONFLICT_RESOLVED;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01005071 break;
5072 case P_RETRY_WRITE:
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01005073 what = POSTPONE_WRITE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005074 break;
5075 default:
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005076 BUG();
Philipp Reisnerb411b362009-09-25 16:07:19 -07005077 }
5078
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005079 return validate_req_change_req_state(device, p->block_id, sector,
5080 &device->write_requests, __func__,
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005081 what, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005082}
5083
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005084static int got_NegAck(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005085{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005086 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005087 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005088 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005089 sector_t sector = be64_to_cpu(p->sector);
Philipp Reisner2deb8332011-01-17 18:39:18 +01005090 int size = be32_to_cpu(p->blksize);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005091 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005092
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005093 peer_device = conn_peer_device(connection, pi->vnr);
5094 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005095 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005096 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005097
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005098 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005099
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01005100 if (p->block_id == ID_SYNCER) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005101 dec_rs_pending(device);
5102 drbd_rs_failed_io(device, sector, size);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005103 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005104 }
Philipp Reisner2deb8332011-01-17 18:39:18 +01005105
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005106 err = validate_req_change_req_state(device, p->block_id, sector,
5107 &device->write_requests, __func__,
Philipp Reisner303d1442011-04-13 16:24:47 -07005108 NEG_ACKED, true);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005109 if (err) {
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01005110 /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs.
5111 The master bio might already be completed, therefore the
5112 request is no longer in the collision hash. */
5113 /* In Protocol B we might already have got a P_RECV_ACK
5114 but then get a P_NEG_ACK afterwards. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005115 drbd_set_out_of_sync(device, sector, size);
Philipp Reisner2deb8332011-01-17 18:39:18 +01005116 }
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005117 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005118}
5119
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005120static int got_NegDReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005121{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005122 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005123 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005124 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005125 sector_t sector = be64_to_cpu(p->sector);
5126
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005127 peer_device = conn_peer_device(connection, pi->vnr);
5128 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005129 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005130 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005131
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005132 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01005133
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02005134 drbd_err(device, "Got NegDReply; Sector %llus, len %u.\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07005135 (unsigned long long)sector, be32_to_cpu(p->blksize));
5136
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005137 return validate_req_change_req_state(device, p->block_id, sector,
5138 &device->read_requests, __func__,
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005139 NEG_ACKED, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005140}
5141
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005142static int got_NegRSDReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005143{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005144 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005145 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005146 sector_t sector;
5147 int size;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005148 struct p_block_ack *p = pi->data;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005149
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005150 peer_device = conn_peer_device(connection, pi->vnr);
5151 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005152 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005153 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005154
5155 sector = be64_to_cpu(p->sector);
5156 size = be32_to_cpu(p->blksize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005157
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005158 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005159
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005160 dec_rs_pending(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005161
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005162 if (get_ldev_if_state(device, D_FAILED)) {
5163 drbd_rs_complete_io(device, sector);
Andreas Gruenbachere05e1e52011-03-25 15:16:26 +01005164 switch (pi->cmd) {
Philipp Reisnerd612d302010-12-27 10:53:28 +01005165 case P_NEG_RS_DREPLY:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005166 drbd_rs_failed_io(device, sector, size);
Philipp Reisnerd612d302010-12-27 10:53:28 +01005167 case P_RS_CANCEL:
5168 break;
5169 default:
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005170 BUG();
Philipp Reisnerd612d302010-12-27 10:53:28 +01005171 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005172 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005173 }
5174
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005175 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005176}
5177
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005178static int got_BarrierAck(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005179{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005180 struct p_barrier_ack *p = pi->data;
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005181 struct drbd_peer_device *peer_device;
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005182 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005183
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005184 tl_release(connection, p->barrier, be32_to_cpu(p->set_size));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005185
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005186 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005187 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5188 struct drbd_device *device = peer_device->device;
5189
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005190 if (device->state.conn == C_AHEAD &&
5191 atomic_read(&device->ap_in_flight) == 0 &&
5192 !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &device->flags)) {
5193 device->start_resync_timer.expires = jiffies + HZ;
5194 add_timer(&device->start_resync_timer);
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005195 }
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02005196 }
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005197 rcu_read_unlock();
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02005198
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005199 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005200}
5201
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005202static int got_OVResult(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005203{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005204 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005205 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005206 struct p_block_ack *p = pi->data;
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02005207 struct drbd_device_work *dw;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005208 sector_t sector;
5209 int size;
5210
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005211 peer_device = conn_peer_device(connection, pi->vnr);
5212 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005213 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005214 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005215
Philipp Reisnerb411b362009-09-25 16:07:19 -07005216 sector = be64_to_cpu(p->sector);
5217 size = be32_to_cpu(p->blksize);
5218
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005219 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005220
5221 if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005222 drbd_ov_out_of_sync_found(device, sector, size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005223 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005224 ov_out_of_sync_print(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005225
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005226 if (!get_ldev(device))
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005227 return 0;
Lars Ellenberg1d53f092010-09-05 01:13:24 +02005228
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005229 drbd_rs_complete_io(device, sector);
5230 dec_rs_pending(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005231
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005232 --device->ov_left;
Lars Ellenbergea5442a2010-11-05 09:48:01 +01005233
5234 /* let's advance progress step marks only for every other megabyte */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005235 if ((device->ov_left & 0x200) == 0x200)
5236 drbd_advance_rs_marks(device, device->ov_left);
Lars Ellenbergea5442a2010-11-05 09:48:01 +01005237
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005238 if (device->ov_left == 0) {
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02005239 dw = kmalloc(sizeof(*dw), GFP_NOIO);
5240 if (dw) {
5241 dw->w.cb = w_ov_finished;
5242 dw->device = device;
5243 drbd_queue_work(&peer_device->connection->sender_work, &dw->w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005244 } else {
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02005245 drbd_err(device, "kmalloc(dw) failed.");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005246 ov_out_of_sync_print(device);
5247 drbd_resync_finished(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005248 }
5249 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005250 put_ldev(device);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005251 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005252}
5253
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005254static int got_skip(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisner0ced55a2010-04-30 15:26:20 +02005255{
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005256 return 0;
Philipp Reisner0ced55a2010-04-30 15:26:20 +02005257}
5258
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005259static int connection_finish_peer_reqs(struct drbd_connection *connection)
Philipp Reisner32862ec2011-02-08 16:41:01 +01005260{
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005261 struct drbd_peer_device *peer_device;
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005262 int vnr, not_empty = 0;
Philipp Reisner32862ec2011-02-08 16:41:01 +01005263
5264 do {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005265 clear_bit(SIGNAL_ASENDER, &connection->flags);
Philipp Reisner32862ec2011-02-08 16:41:01 +01005266 flush_signals(current);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005267
5268 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005269 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5270 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005271 kref_get(&device->kref);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005272 rcu_read_unlock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005273 if (drbd_finish_peer_reqs(device)) {
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02005274 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005275 return 1;
Philipp Reisnerd3fcb492011-04-13 14:46:05 -07005276 }
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02005277 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005278 rcu_read_lock();
Philipp Reisner082a3432011-03-15 16:05:42 +01005279 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005280 set_bit(SIGNAL_ASENDER, &connection->flags);
Philipp Reisner082a3432011-03-15 16:05:42 +01005281
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005282 spin_lock_irq(&connection->resource->req_lock);
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005283 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5284 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005285 not_empty = !list_empty(&device->done_ee);
Philipp Reisner082a3432011-03-15 16:05:42 +01005286 if (not_empty)
5287 break;
5288 }
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005289 spin_unlock_irq(&connection->resource->req_lock);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005290 rcu_read_unlock();
Philipp Reisner32862ec2011-02-08 16:41:01 +01005291 } while (not_empty);
5292
5293 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005294}
5295
5296struct asender_cmd {
5297 size_t pkt_size;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005298 int (*fn)(struct drbd_connection *connection, struct packet_info *);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005299};
5300
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01005301static struct asender_cmd asender_tbl[] = {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005302 [P_PING] = { 0, got_Ping },
5303 [P_PING_ACK] = { 0, got_PingAck },
Philipp Reisnerb411b362009-09-25 16:07:19 -07005304 [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
5305 [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
5306 [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02005307 [P_SUPERSEDED] = { sizeof(struct p_block_ack), got_BlockAck },
Philipp Reisnerb411b362009-09-25 16:07:19 -07005308 [P_NEG_ACK] = { sizeof(struct p_block_ack), got_NegAck },
5309 [P_NEG_DREPLY] = { sizeof(struct p_block_ack), got_NegDReply },
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005310 [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), got_NegRSDReply },
Philipp Reisnerb411b362009-09-25 16:07:19 -07005311 [P_OV_RESULT] = { sizeof(struct p_block_ack), got_OVResult },
5312 [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck },
5313 [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
5314 [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync },
Philipp Reisner02918be2010-08-20 14:35:10 +02005315 [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip },
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005316 [P_RS_CANCEL] = { sizeof(struct p_block_ack), got_NegRSDReply },
5317 [P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), got_conn_RqSReply },
5318 [P_RETRY_WRITE] = { sizeof(struct p_block_ack), got_BlockAck },
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01005319};
Philipp Reisnerb411b362009-09-25 16:07:19 -07005320
5321int drbd_asender(struct drbd_thread *thi)
5322{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005323 struct drbd_connection *connection = thi->connection;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005324 struct asender_cmd *cmd = NULL;
Philipp Reisner77351055b2011-02-07 17:24:26 +01005325 struct packet_info pi;
Philipp Reisner257d0af2011-01-26 12:15:29 +01005326 int rv;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005327 void *buf = connection->meta.rbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005328 int received = 0;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005329 unsigned int header_size = drbd_header_size(connection);
Andreas Gruenbacher52b061a2011-03-30 11:38:49 +02005330 int expect = header_size;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005331 bool ping_timeout_active = false;
5332 struct net_conf *nc;
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005333 int ping_timeo, tcp_cork, ping_int;
Philipp Reisner3990e042013-03-27 14:08:48 +01005334 struct sched_param param = { .sched_priority = 2 };
Philipp Reisnerb411b362009-09-25 16:07:19 -07005335
Philipp Reisner3990e042013-03-27 14:08:48 +01005336 rv = sched_setscheduler(current, SCHED_RR, &param);
5337 if (rv < 0)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005338 drbd_err(connection, "drbd_asender: ERROR set priority, ret=%d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005339
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +01005340 while (get_t_state(thi) == RUNNING) {
Philipp Reisner80822282011-02-08 12:46:30 +01005341 drbd_thread_current_set_cpu(thi);
Philipp Reisner44ed1672011-04-19 17:10:19 +02005342
5343 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005344 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +02005345 ping_timeo = nc->ping_timeo;
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005346 tcp_cork = nc->tcp_cork;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005347 ping_int = nc->ping_int;
5348 rcu_read_unlock();
5349
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005350 if (test_and_clear_bit(SEND_PING, &connection->flags)) {
5351 if (drbd_send_ping(connection)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005352 drbd_err(connection, "drbd_send_ping has failed\n");
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01005353 goto reconnect;
5354 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005355 connection->meta.socket->sk->sk_rcvtimeo = ping_timeo * HZ / 10;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005356 ping_timeout_active = true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005357 }
5358
Philipp Reisner32862ec2011-02-08 16:41:01 +01005359 /* TODO: conditionally cork; it may hurt latency if we cork without
5360 much to send */
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005361 if (tcp_cork)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005362 drbd_tcp_cork(connection->meta.socket);
5363 if (connection_finish_peer_reqs(connection)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005364 drbd_err(connection, "connection_finish_peer_reqs() failed\n");
Philipp Reisner32862ec2011-02-08 16:41:01 +01005365 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005366 }
5367 /* but unconditionally uncork unless disabled */
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005368 if (tcp_cork)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005369 drbd_tcp_uncork(connection->meta.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005370
5371 /* short circuit, recv_msg would return EINTR anyways. */
5372 if (signal_pending(current))
5373 continue;
5374
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005375 rv = drbd_recv_short(connection->meta.socket, buf, expect-received, 0);
5376 clear_bit(SIGNAL_ASENDER, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005377
5378 flush_signals(current);
5379
5380 /* Note:
5381 * -EINTR (on meta) we got a signal
5382 * -EAGAIN (on meta) rcvtimeo expired
5383 * -ECONNRESET other side closed the connection
5384 * -ERESTARTSYS (on data) we got a signal
5385 * rv < 0 other than above: unexpected error!
5386 * rv == expected: full header or command
5387 * rv < expected: "woken" by signal during receive
5388 * rv == 0 : "connection shut down by peer"
5389 */
5390 if (likely(rv > 0)) {
5391 received += rv;
5392 buf += rv;
5393 } else if (rv == 0) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005394 if (test_bit(DISCONNECT_SENT, &connection->flags)) {
Philipp Reisnerb66623e2012-08-08 21:19:09 +02005395 long t;
5396 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005397 t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
Philipp Reisnerb66623e2012-08-08 21:19:09 +02005398 rcu_read_unlock();
5399
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005400 t = wait_event_timeout(connection->ping_wait,
5401 connection->cstate < C_WF_REPORT_PARAMS,
Philipp Reisnerb66623e2012-08-08 21:19:09 +02005402 t);
Philipp Reisner599377a2012-08-17 14:50:22 +02005403 if (t)
5404 break;
5405 }
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005406 drbd_err(connection, "meta connection shut down by peer.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005407 goto reconnect;
5408 } else if (rv == -EAGAIN) {
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02005409 /* If the data socket received something meanwhile,
5410 * that is good enough: peer is still alive. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005411 if (time_after(connection->last_received,
5412 jiffies - connection->meta.socket->sk->sk_rcvtimeo))
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02005413 continue;
Lars Ellenbergf36af182011-03-09 22:44:55 +01005414 if (ping_timeout_active) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005415 drbd_err(connection, "PingAck did not arrive in time.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005416 goto reconnect;
5417 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005418 set_bit(SEND_PING, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005419 continue;
5420 } else if (rv == -EINTR) {
5421 continue;
5422 } else {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005423 drbd_err(connection, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005424 goto reconnect;
5425 }
5426
5427 if (received == expect && cmd == NULL) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005428 if (decode_header(connection, connection->meta.rbuf, &pi))
Philipp Reisnerb411b362009-09-25 16:07:19 -07005429 goto reconnect;
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01005430 cmd = &asender_tbl[pi.cmd];
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005431 if (pi.cmd >= ARRAY_SIZE(asender_tbl) || !cmd->fn) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005432 drbd_err(connection, "Unexpected meta packet %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02005433 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005434 goto disconnect;
5435 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005436 expect = header_size + cmd->pkt_size;
Andreas Gruenbacher52b061a2011-03-30 11:38:49 +02005437 if (pi.size != expect - header_size) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005438 drbd_err(connection, "Wrong packet size on meta (c: %d, l: %d)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01005439 pi.cmd, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005440 goto reconnect;
Philipp Reisner257d0af2011-01-26 12:15:29 +01005441 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005442 }
5443 if (received == expect) {
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005444 bool err;
Philipp Reisnera4fbda82011-03-16 11:13:17 +01005445
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005446 err = cmd->fn(connection, &pi);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005447 if (err) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005448 drbd_err(connection, "%pf failed\n", cmd->fn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005449 goto reconnect;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005450 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005451
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005452 connection->last_received = jiffies;
Lars Ellenbergf36af182011-03-09 22:44:55 +01005453
Philipp Reisner44ed1672011-04-19 17:10:19 +02005454 if (cmd == &asender_tbl[P_PING_ACK]) {
5455 /* restore idle timeout */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005456 connection->meta.socket->sk->sk_rcvtimeo = ping_int * HZ;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005457 ping_timeout_active = false;
5458 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005459
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005460 buf = connection->meta.rbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005461 received = 0;
Andreas Gruenbacher52b061a2011-03-30 11:38:49 +02005462 expect = header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005463 cmd = NULL;
5464 }
5465 }
5466
5467 if (0) {
5468reconnect:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005469 conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
5470 conn_md_sync(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005471 }
5472 if (0) {
5473disconnect:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005474 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005475 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005476 clear_bit(SIGNAL_ASENDER, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005477
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005478 drbd_info(connection, "asender terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005479
5480 return 0;
5481}