blob: 6ffbc22eba0b425a468a51365083f06334a087d1 [file] [log] [blame]
Philipp Reisnerb411b362009-09-25 16:07:19 -07001/*
2 drbd_receiver.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23 */
24
25
Philipp Reisnerb411b362009-09-25 16:07:19 -070026#include <linux/module.h>
27
28#include <asm/uaccess.h>
29#include <net/sock.h>
30
Philipp Reisnerb411b362009-09-25 16:07:19 -070031#include <linux/drbd.h>
32#include <linux/fs.h>
33#include <linux/file.h>
34#include <linux/in.h>
35#include <linux/mm.h>
36#include <linux/memcontrol.h>
37#include <linux/mm_inline.h>
38#include <linux/slab.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070039#include <linux/pkt_sched.h>
40#define __KERNEL_SYSCALLS__
41#include <linux/unistd.h>
42#include <linux/vmalloc.h>
43#include <linux/random.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070044#include <linux/string.h>
45#include <linux/scatterlist.h>
46#include "drbd_int.h"
Andreas Gruenbachera3603a62011-05-30 11:47:37 +020047#include "drbd_protocol.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070048#include "drbd_req.h"
49
50#include "drbd_vli.h"
51
Philipp Reisner77351055b2011-02-07 17:24:26 +010052struct packet_info {
53 enum drbd_packet cmd;
Andreas Gruenbachere2857212011-03-25 00:57:38 +010054 unsigned int size;
55 unsigned int vnr;
Andreas Gruenbachere6589832011-03-30 12:54:42 +020056 void *data;
Philipp Reisner77351055b2011-02-07 17:24:26 +010057};
58
Philipp Reisnerb411b362009-09-25 16:07:19 -070059enum finish_epoch {
60 FE_STILL_LIVE,
61 FE_DESTROYED,
62 FE_RECYCLED,
63};
64
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +020065static int drbd_do_features(struct drbd_connection *connection);
66static int drbd_do_auth(struct drbd_connection *connection);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +020067static int drbd_disconnected(struct drbd_peer_device *);
Philipp Reisnerb411b362009-09-25 16:07:19 -070068
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +020069static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *, struct drbd_epoch *, enum epoch_event);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +010070static int e_end_block(struct drbd_work *, int);
Philipp Reisnerb411b362009-09-25 16:07:19 -070071
Philipp Reisnerb411b362009-09-25 16:07:19 -070072
73#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
74
Lars Ellenberg45bb9122010-05-14 17:10:48 +020075/*
76 * some helper functions to deal with single linked page lists,
77 * page->private being our "next" pointer.
78 */
79
80/* If at least n pages are linked at head, get n pages off.
81 * Otherwise, don't modify head, and return NULL.
82 * Locking is the responsibility of the caller.
83 */
84static struct page *page_chain_del(struct page **head, int n)
85{
86 struct page *page;
87 struct page *tmp;
88
89 BUG_ON(!n);
90 BUG_ON(!head);
91
92 page = *head;
Philipp Reisner23ce4222010-05-20 13:35:31 +020093
94 if (!page)
95 return NULL;
96
Lars Ellenberg45bb9122010-05-14 17:10:48 +020097 while (page) {
98 tmp = page_chain_next(page);
99 if (--n == 0)
100 break; /* found sufficient pages */
101 if (tmp == NULL)
102 /* insufficient pages, don't use any of them. */
103 return NULL;
104 page = tmp;
105 }
106
107 /* add end of list marker for the returned list */
108 set_page_private(page, 0);
109 /* actual return value, and adjustment of head */
110 page = *head;
111 *head = tmp;
112 return page;
113}
114
115/* may be used outside of locks to find the tail of a (usually short)
116 * "private" page chain, before adding it back to a global chain head
117 * with page_chain_add() under a spinlock. */
118static struct page *page_chain_tail(struct page *page, int *len)
119{
120 struct page *tmp;
121 int i = 1;
122 while ((tmp = page_chain_next(page)))
123 ++i, page = tmp;
124 if (len)
125 *len = i;
126 return page;
127}
128
129static int page_chain_free(struct page *page)
130{
131 struct page *tmp;
132 int i = 0;
133 page_chain_for_each_safe(page, tmp) {
134 put_page(page);
135 ++i;
136 }
137 return i;
138}
139
140static void page_chain_add(struct page **head,
141 struct page *chain_first, struct page *chain_last)
142{
143#if 1
144 struct page *tmp;
145 tmp = page_chain_tail(chain_first, NULL);
146 BUG_ON(tmp != chain_last);
147#endif
148
149 /* add chain to head */
150 set_page_private(chain_last, (unsigned long)*head);
151 *head = chain_first;
152}
153
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200154static struct page *__drbd_alloc_pages(struct drbd_device *device,
Andreas Gruenbacher18c2d522011-04-07 21:08:50 +0200155 unsigned int number)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700156{
157 struct page *page = NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200158 struct page *tmp = NULL;
Andreas Gruenbacher18c2d522011-04-07 21:08:50 +0200159 unsigned int i = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700160
161 /* Yes, testing drbd_pp_vacant outside the lock is racy.
162 * So what. It saves a spin_lock. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200163 if (drbd_pp_vacant >= number) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700164 spin_lock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200165 page = page_chain_del(&drbd_pp_pool, number);
166 if (page)
167 drbd_pp_vacant -= number;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700168 spin_unlock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200169 if (page)
170 return page;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700171 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200172
Philipp Reisnerb411b362009-09-25 16:07:19 -0700173 /* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD
174 * "criss-cross" setup, that might cause write-out on some other DRBD,
175 * which in turn might block on the other node at this very place. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200176 for (i = 0; i < number; i++) {
177 tmp = alloc_page(GFP_TRY);
178 if (!tmp)
179 break;
180 set_page_private(tmp, (unsigned long)page);
181 page = tmp;
182 }
183
184 if (i == number)
185 return page;
186
187 /* Not enough pages immediately available this time.
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200188 * No need to jump around here, drbd_alloc_pages will retry this
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200189 * function "soon". */
190 if (page) {
191 tmp = page_chain_tail(page, NULL);
192 spin_lock(&drbd_pp_lock);
193 page_chain_add(&drbd_pp_pool, page, tmp);
194 drbd_pp_vacant += i;
195 spin_unlock(&drbd_pp_lock);
196 }
197 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700198}
199
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200200static void reclaim_finished_net_peer_reqs(struct drbd_device *device,
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200201 struct list_head *to_be_freed)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700202{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200203 struct drbd_peer_request *peer_req, *tmp;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700204
205 /* The EEs are always appended to the end of the list. Since
206 they are sent in order over the wire, they have to finish
207 in order. As soon as we see the first not finished we can
208 stop to examine the list... */
209
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200210 list_for_each_entry_safe(peer_req, tmp, &device->net_ee, w.list) {
Andreas Gruenbacher045417f2011-04-07 21:34:24 +0200211 if (drbd_peer_req_has_active_page(peer_req))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700212 break;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200213 list_move(&peer_req->w.list, to_be_freed);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700214 }
215}
216
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200217static void drbd_kick_lo_and_reclaim_net(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700218{
219 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100220 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700221
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200222 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200223 reclaim_finished_net_peer_reqs(device, &reclaimed);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200224 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700225
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200226 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200227 drbd_free_net_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700228}
229
230/**
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200231 * drbd_alloc_pages() - Returns @number pages, retries forever (or until signalled)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200232 * @device: DRBD device.
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200233 * @number: number of pages requested
234 * @retry: whether to retry, if not enough pages are available right now
Philipp Reisnerb411b362009-09-25 16:07:19 -0700235 *
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200236 * Tries to allocate number pages, first from our own page pool, then from
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200237 * the kernel.
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200238 * Possibly retry until DRBD frees sufficient pages somewhere else.
239 *
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200240 * If this allocation would exceed the max_buffers setting, we throttle
241 * allocation (schedule_timeout) to give the system some room to breathe.
242 *
243 * We do not use max-buffers as hard limit, because it could lead to
244 * congestion and further to a distributed deadlock during online-verify or
245 * (checksum based) resync, if the max-buffers, socket buffer sizes and
246 * resync-rate settings are mis-configured.
247 *
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200248 * Returns a page chain linked via page->private.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700249 */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200250struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int number,
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200251 bool retry)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700252{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200253 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700254 struct page *page = NULL;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200255 struct net_conf *nc;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700256 DEFINE_WAIT(wait);
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200257 unsigned int mxb;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700258
Philipp Reisner44ed1672011-04-19 17:10:19 +0200259 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200260 nc = rcu_dereference(peer_device->connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200261 mxb = nc ? nc->max_buffers : 1000000;
262 rcu_read_unlock();
263
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200264 if (atomic_read(&device->pp_in_use) < mxb)
265 page = __drbd_alloc_pages(device, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700266
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200267 while (page == NULL) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700268 prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
269
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200270 drbd_kick_lo_and_reclaim_net(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700271
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200272 if (atomic_read(&device->pp_in_use) < mxb) {
273 page = __drbd_alloc_pages(device, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700274 if (page)
275 break;
276 }
277
278 if (!retry)
279 break;
280
281 if (signal_pending(current)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200282 drbd_warn(device, "drbd_alloc_pages interrupted!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700283 break;
284 }
285
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200286 if (schedule_timeout(HZ/10) == 0)
287 mxb = UINT_MAX;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700288 }
289 finish_wait(&drbd_pp_wait, &wait);
290
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200291 if (page)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200292 atomic_add(number, &device->pp_in_use);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700293 return page;
294}
295
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200296/* Must not be used from irq, as that may deadlock: see drbd_alloc_pages.
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200297 * Is also used from inside an other spin_lock_irq(&resource->req_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200298 * Either links the page chain back to the global pool,
299 * or returns all pages to the system. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200300static void drbd_free_pages(struct drbd_device *device, struct page *page, int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700301{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200302 atomic_t *a = is_net ? &device->pp_in_use_by_net : &device->pp_in_use;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700303 int i;
Lars Ellenberg435f0742010-09-06 12:30:25 +0200304
Lars Ellenberga73ff322012-06-25 19:15:38 +0200305 if (page == NULL)
306 return;
307
Philipp Reisner81a5d602011-02-22 19:53:16 -0500308 if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200309 i = page_chain_free(page);
310 else {
311 struct page *tmp;
312 tmp = page_chain_tail(page, &i);
313 spin_lock(&drbd_pp_lock);
314 page_chain_add(&drbd_pp_pool, page, tmp);
315 drbd_pp_vacant += i;
316 spin_unlock(&drbd_pp_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700317 }
Lars Ellenberg435f0742010-09-06 12:30:25 +0200318 i = atomic_sub_return(i, a);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200319 if (i < 0)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200320 drbd_warn(device, "ASSERTION FAILED: %s: %d < 0\n",
Lars Ellenberg435f0742010-09-06 12:30:25 +0200321 is_net ? "pp_in_use_by_net" : "pp_in_use", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700322 wake_up(&drbd_pp_wait);
323}
324
325/*
326You need to hold the req_lock:
327 _drbd_wait_ee_list_empty()
328
329You must not have the req_lock:
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +0200330 drbd_free_peer_req()
Andreas Gruenbacher0db55362011-04-06 16:09:15 +0200331 drbd_alloc_peer_req()
Andreas Gruenbacher7721f562011-04-06 17:14:02 +0200332 drbd_free_peer_reqs()
Philipp Reisnerb411b362009-09-25 16:07:19 -0700333 drbd_ee_fix_bhs()
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200334 drbd_finish_peer_reqs()
Philipp Reisnerb411b362009-09-25 16:07:19 -0700335 drbd_clear_done_ee()
336 drbd_wait_ee_list_empty()
337*/
338
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100339struct drbd_peer_request *
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200340drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
Andreas Gruenbacher0db55362011-04-06 16:09:15 +0200341 unsigned int data_size, gfp_t gfp_mask) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700342{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200343 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100344 struct drbd_peer_request *peer_req;
Lars Ellenberga73ff322012-06-25 19:15:38 +0200345 struct page *page = NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200346 unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700347
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200348 if (drbd_insert_fault(device, DRBD_FAULT_AL_EE))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700349 return NULL;
350
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100351 peer_req = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
352 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700353 if (!(gfp_mask & __GFP_NOWARN))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200354 drbd_err(device, "%s: allocation failed\n", __func__);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700355 return NULL;
356 }
357
Lars Ellenberga73ff322012-06-25 19:15:38 +0200358 if (data_size) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200359 page = drbd_alloc_pages(peer_device, nr_pages, (gfp_mask & __GFP_WAIT));
Lars Ellenberga73ff322012-06-25 19:15:38 +0200360 if (!page)
361 goto fail;
362 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700363
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100364 drbd_clear_interval(&peer_req->i);
365 peer_req->i.size = data_size;
366 peer_req->i.sector = sector;
367 peer_req->i.local = false;
368 peer_req->i.waiting = false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700369
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100370 peer_req->epoch = NULL;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200371 peer_req->peer_device = peer_device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100372 peer_req->pages = page;
373 atomic_set(&peer_req->pending_bios, 0);
374 peer_req->flags = 0;
Andreas Gruenbacher9a8e7752011-01-11 14:04:09 +0100375 /*
376 * The block_id is opaque to the receiver. It is not endianness
377 * converted, and sent back to the sender unchanged.
378 */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100379 peer_req->block_id = id;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700380
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100381 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700382
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200383 fail:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100384 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700385 return NULL;
386}
387
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200388void __drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request *peer_req,
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100389 int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700390{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100391 if (peer_req->flags & EE_HAS_DIGEST)
392 kfree(peer_req->digest);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200393 drbd_free_pages(device, peer_req->pages, is_net);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +0200394 D_ASSERT(device, atomic_read(&peer_req->pending_bios) == 0);
395 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100396 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700397}
398
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200399int drbd_free_peer_reqs(struct drbd_device *device, struct list_head *list)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700400{
401 LIST_HEAD(work_list);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100402 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700403 int count = 0;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200404 int is_net = list == &device->net_ee;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700405
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200406 spin_lock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700407 list_splice_init(list, &work_list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200408 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700409
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200410 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200411 __drbd_free_peer_req(device, peer_req, is_net);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700412 count++;
413 }
414 return count;
415}
416
Philipp Reisnerb411b362009-09-25 16:07:19 -0700417/*
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200418 * See also comments in _req_mod(,BARRIER_ACKED) and receive_Barrier.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700419 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200420static int drbd_finish_peer_reqs(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700421{
422 LIST_HEAD(work_list);
423 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100424 struct drbd_peer_request *peer_req, *t;
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100425 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700426
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200427 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200428 reclaim_finished_net_peer_reqs(device, &reclaimed);
429 list_splice_init(&device->done_ee, &work_list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200430 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700431
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200432 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200433 drbd_free_net_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700434
435 /* possible callbacks here:
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +0200436 * e_end_block, and e_end_resync_block, e_send_superseded.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700437 * all ignore the last argument.
438 */
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200439 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100440 int err2;
441
Philipp Reisnerb411b362009-09-25 16:07:19 -0700442 /* list_del not necessary, next/prev members not touched */
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200443 err2 = peer_req->w.cb(&peer_req->w, !!err);
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100444 if (!err)
445 err = err2;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200446 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700447 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200448 wake_up(&device->ee_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700449
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100450 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700451}
452
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200453static void _drbd_wait_ee_list_empty(struct drbd_device *device,
Andreas Gruenbacherd4da1532011-04-07 00:06:56 +0200454 struct list_head *head)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700455{
456 DEFINE_WAIT(wait);
457
458 /* avoids spin_lock/unlock
459 * and calling prepare_to_wait in the fast path */
460 while (!list_empty(head)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200461 prepare_to_wait(&device->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200462 spin_unlock_irq(&device->resource->req_lock);
Jens Axboe7eaceac2011-03-10 08:52:07 +0100463 io_schedule();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200464 finish_wait(&device->ee_wait, &wait);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200465 spin_lock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700466 }
467}
468
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200469static void drbd_wait_ee_list_empty(struct drbd_device *device,
Andreas Gruenbacherd4da1532011-04-07 00:06:56 +0200470 struct list_head *head)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700471{
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200472 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200473 _drbd_wait_ee_list_empty(device, head);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200474 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700475}
476
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100477static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700478{
Philipp Reisnerb411b362009-09-25 16:07:19 -0700479 struct kvec iov = {
480 .iov_base = buf,
481 .iov_len = size,
482 };
483 struct msghdr msg = {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700484 .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
485 };
Al Virof730c842014-02-08 21:07:38 -0500486 return kernel_recvmsg(sock, &msg, &iov, 1, size, msg.msg_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700487}
488
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200489static int drbd_recv(struct drbd_connection *connection, void *buf, size_t size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700490{
Philipp Reisnerb411b362009-09-25 16:07:19 -0700491 int rv;
492
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200493 rv = drbd_recv_short(connection->data.socket, buf, size, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700494
Philipp Reisnerdbd08202012-08-17 16:55:47 +0200495 if (rv < 0) {
496 if (rv == -ECONNRESET)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200497 drbd_info(connection, "sock was reset by peer\n");
Philipp Reisnerdbd08202012-08-17 16:55:47 +0200498 else if (rv != -ERESTARTSYS)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200499 drbd_err(connection, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerdbd08202012-08-17 16:55:47 +0200500 } else if (rv == 0) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200501 if (test_bit(DISCONNECT_SENT, &connection->flags)) {
Philipp Reisnerb66623e2012-08-08 21:19:09 +0200502 long t;
503 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200504 t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
Philipp Reisnerb66623e2012-08-08 21:19:09 +0200505 rcu_read_unlock();
506
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200507 t = wait_event_timeout(connection->ping_wait, connection->cstate < C_WF_REPORT_PARAMS, t);
Philipp Reisnerb66623e2012-08-08 21:19:09 +0200508
Philipp Reisner599377a2012-08-17 14:50:22 +0200509 if (t)
510 goto out;
511 }
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200512 drbd_info(connection, "sock was shut down by peer\n");
Philipp Reisner599377a2012-08-17 14:50:22 +0200513 }
514
Philipp Reisnerb411b362009-09-25 16:07:19 -0700515 if (rv != size)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200516 conn_request_state(connection, NS(conn, C_BROKEN_PIPE), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700517
Philipp Reisner599377a2012-08-17 14:50:22 +0200518out:
Philipp Reisnerb411b362009-09-25 16:07:19 -0700519 return rv;
520}
521
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200522static int drbd_recv_all(struct drbd_connection *connection, void *buf, size_t size)
Andreas Gruenbacherc6967742011-03-17 17:15:20 +0100523{
524 int err;
525
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200526 err = drbd_recv(connection, buf, size);
Andreas Gruenbacherc6967742011-03-17 17:15:20 +0100527 if (err != size) {
528 if (err >= 0)
529 err = -EIO;
530 } else
531 err = 0;
532 return err;
533}
534
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200535static int drbd_recv_all_warn(struct drbd_connection *connection, void *buf, size_t size)
Andreas Gruenbachera5c31902011-03-24 03:28:04 +0100536{
537 int err;
538
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200539 err = drbd_recv_all(connection, buf, size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +0100540 if (err && !signal_pending(current))
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200541 drbd_warn(connection, "short read (expected size %d)\n", (int)size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +0100542 return err;
543}
544
Lars Ellenberg5dbf1672010-05-25 16:18:01 +0200545/* quoting tcp(7):
546 * On individual connections, the socket buffer size must be set prior to the
547 * listen(2) or connect(2) calls in order to have it take effect.
548 * This is our wrapper to do so.
549 */
550static void drbd_setbufsize(struct socket *sock, unsigned int snd,
551 unsigned int rcv)
552{
553 /* open coded SO_SNDBUF, SO_RCVBUF */
554 if (snd) {
555 sock->sk->sk_sndbuf = snd;
556 sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
557 }
558 if (rcv) {
559 sock->sk->sk_rcvbuf = rcv;
560 sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
561 }
562}
563
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200564static struct socket *drbd_try_connect(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700565{
566 const char *what;
567 struct socket *sock;
568 struct sockaddr_in6 src_in6;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200569 struct sockaddr_in6 peer_in6;
570 struct net_conf *nc;
571 int err, peer_addr_len, my_addr_len;
Andreas Gruenbacher69ef82d2011-05-11 14:34:35 +0200572 int sndbuf_size, rcvbuf_size, connect_int;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700573 int disconnect_on_error = 1;
574
Philipp Reisner44ed1672011-04-19 17:10:19 +0200575 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200576 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200577 if (!nc) {
578 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700579 return NULL;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200580 }
Philipp Reisner44ed1672011-04-19 17:10:19 +0200581 sndbuf_size = nc->sndbuf_size;
582 rcvbuf_size = nc->rcvbuf_size;
Andreas Gruenbacher69ef82d2011-05-11 14:34:35 +0200583 connect_int = nc->connect_int;
Andreas Gruenbacher089c0752011-06-14 18:28:09 +0200584 rcu_read_unlock();
Philipp Reisner44ed1672011-04-19 17:10:19 +0200585
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200586 my_addr_len = min_t(int, connection->my_addr_len, sizeof(src_in6));
587 memcpy(&src_in6, &connection->my_addr, my_addr_len);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200588
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200589 if (((struct sockaddr *)&connection->my_addr)->sa_family == AF_INET6)
Philipp Reisner44ed1672011-04-19 17:10:19 +0200590 src_in6.sin6_port = 0;
591 else
592 ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
593
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200594 peer_addr_len = min_t(int, connection->peer_addr_len, sizeof(src_in6));
595 memcpy(&peer_in6, &connection->peer_addr, peer_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700596
597 what = "sock_create_kern";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200598 err = sock_create_kern(((struct sockaddr *)&src_in6)->sa_family,
599 SOCK_STREAM, IPPROTO_TCP, &sock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700600 if (err < 0) {
601 sock = NULL;
602 goto out;
603 }
604
605 sock->sk->sk_rcvtimeo =
Andreas Gruenbacher69ef82d2011-05-11 14:34:35 +0200606 sock->sk->sk_sndtimeo = connect_int * HZ;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200607 drbd_setbufsize(sock, sndbuf_size, rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700608
609 /* explicitly bind to the configured IP as source IP
610 * for the outgoing connections.
611 * This is needed for multihomed hosts and to be
612 * able to use lo: interfaces for drbd.
613 * Make sure to use 0 as port number, so linux selects
614 * a free one dynamically.
615 */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700616 what = "bind before connect";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200617 err = sock->ops->bind(sock, (struct sockaddr *) &src_in6, my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700618 if (err < 0)
619 goto out;
620
621 /* connect may fail, peer not yet available.
622 * stay C_WF_CONNECTION, don't go Disconnecting! */
623 disconnect_on_error = 0;
624 what = "connect";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200625 err = sock->ops->connect(sock, (struct sockaddr *) &peer_in6, peer_addr_len, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700626
627out:
628 if (err < 0) {
629 if (sock) {
630 sock_release(sock);
631 sock = NULL;
632 }
633 switch (-err) {
634 /* timeout, busy, signal pending */
635 case ETIMEDOUT: case EAGAIN: case EINPROGRESS:
636 case EINTR: case ERESTARTSYS:
637 /* peer not (yet) available, network problem */
638 case ECONNREFUSED: case ENETUNREACH:
639 case EHOSTDOWN: case EHOSTUNREACH:
640 disconnect_on_error = 0;
641 break;
642 default:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200643 drbd_err(connection, "%s failed, err = %d\n", what, err);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700644 }
645 if (disconnect_on_error)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200646 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700647 }
Philipp Reisner44ed1672011-04-19 17:10:19 +0200648
Philipp Reisnerb411b362009-09-25 16:07:19 -0700649 return sock;
650}
651
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200652struct accept_wait_data {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200653 struct drbd_connection *connection;
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200654 struct socket *s_listen;
655 struct completion door_bell;
656 void (*original_sk_state_change)(struct sock *sk);
657
658};
659
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200660static void drbd_incoming_connection(struct sock *sk)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700661{
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200662 struct accept_wait_data *ad = sk->sk_user_data;
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200663 void (*state_change)(struct sock *sk);
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200664
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200665 state_change = ad->original_sk_state_change;
666 if (sk->sk_state == TCP_ESTABLISHED)
667 complete(&ad->door_bell);
668 state_change(sk);
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200669}
670
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200671static int prepare_listen_socket(struct drbd_connection *connection, struct accept_wait_data *ad)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700672{
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200673 int err, sndbuf_size, rcvbuf_size, my_addr_len;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200674 struct sockaddr_in6 my_addr;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200675 struct socket *s_listen;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200676 struct net_conf *nc;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700677 const char *what;
678
Philipp Reisner44ed1672011-04-19 17:10:19 +0200679 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200680 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200681 if (!nc) {
682 rcu_read_unlock();
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200683 return -EIO;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200684 }
Philipp Reisner44ed1672011-04-19 17:10:19 +0200685 sndbuf_size = nc->sndbuf_size;
686 rcvbuf_size = nc->rcvbuf_size;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200687 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700688
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200689 my_addr_len = min_t(int, connection->my_addr_len, sizeof(struct sockaddr_in6));
690 memcpy(&my_addr, &connection->my_addr, my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700691
692 what = "sock_create_kern";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200693 err = sock_create_kern(((struct sockaddr *)&my_addr)->sa_family,
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200694 SOCK_STREAM, IPPROTO_TCP, &s_listen);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700695 if (err) {
696 s_listen = NULL;
697 goto out;
698 }
699
Philipp Reisner98683652012-11-09 14:18:43 +0100700 s_listen->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
Philipp Reisner44ed1672011-04-19 17:10:19 +0200701 drbd_setbufsize(s_listen, sndbuf_size, rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700702
703 what = "bind before listen";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200704 err = s_listen->ops->bind(s_listen, (struct sockaddr *)&my_addr, my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700705 if (err < 0)
706 goto out;
707
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200708 ad->s_listen = s_listen;
709 write_lock_bh(&s_listen->sk->sk_callback_lock);
710 ad->original_sk_state_change = s_listen->sk->sk_state_change;
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200711 s_listen->sk->sk_state_change = drbd_incoming_connection;
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200712 s_listen->sk->sk_user_data = ad;
713 write_unlock_bh(&s_listen->sk->sk_callback_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700714
Philipp Reisner2820fd32012-07-12 10:22:48 +0200715 what = "listen";
716 err = s_listen->ops->listen(s_listen, 5);
717 if (err < 0)
718 goto out;
719
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200720 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700721out:
722 if (s_listen)
723 sock_release(s_listen);
724 if (err < 0) {
725 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200726 drbd_err(connection, "%s failed, err = %d\n", what, err);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200727 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700728 }
729 }
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200730
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200731 return -EIO;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200732}
733
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200734static void unregister_state_change(struct sock *sk, struct accept_wait_data *ad)
735{
736 write_lock_bh(&sk->sk_callback_lock);
737 sk->sk_state_change = ad->original_sk_state_change;
738 sk->sk_user_data = NULL;
739 write_unlock_bh(&sk->sk_callback_lock);
740}
741
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200742static struct socket *drbd_wait_for_connect(struct drbd_connection *connection, struct accept_wait_data *ad)
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200743{
744 int timeo, connect_int, err = 0;
745 struct socket *s_estab = NULL;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200746 struct net_conf *nc;
747
748 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200749 nc = rcu_dereference(connection->net_conf);
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200750 if (!nc) {
751 rcu_read_unlock();
752 return NULL;
753 }
754 connect_int = nc->connect_int;
755 rcu_read_unlock();
756
757 timeo = connect_int * HZ;
Akinobu Mita38b682b22013-04-29 16:21:31 -0700758 /* 28.5% random jitter */
759 timeo += (prandom_u32() & 1) ? timeo / 7 : -timeo / 7;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200760
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200761 err = wait_for_completion_interruptible_timeout(&ad->door_bell, timeo);
762 if (err <= 0)
763 return NULL;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200764
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200765 err = kernel_accept(ad->s_listen, &s_estab, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700766 if (err < 0) {
767 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200768 drbd_err(connection, "accept failed, err = %d\n", err);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200769 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700770 }
771 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700772
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200773 if (s_estab)
774 unregister_state_change(s_estab->sk, ad);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700775
776 return s_estab;
777}
778
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200779static int decode_header(struct drbd_connection *, void *, struct packet_info *);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700780
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200781static int send_first_packet(struct drbd_connection *connection, struct drbd_socket *sock,
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200782 enum drbd_packet cmd)
783{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200784 if (!conn_prepare_command(connection, sock))
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200785 return -EIO;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200786 return conn_send_command(connection, sock, cmd, 0, NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700787}
788
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200789static int receive_first_packet(struct drbd_connection *connection, struct socket *sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700790{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200791 unsigned int header_size = drbd_header_size(connection);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200792 struct packet_info pi;
793 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700794
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200795 err = drbd_recv_short(sock, connection->data.rbuf, header_size, 0);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200796 if (err != header_size) {
797 if (err >= 0)
798 err = -EIO;
799 return err;
800 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200801 err = decode_header(connection, connection->data.rbuf, &pi);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200802 if (err)
803 return err;
804 return pi.cmd;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700805}
806
807/**
808 * drbd_socket_okay() - Free the socket if its connection is not okay
Philipp Reisnerb411b362009-09-25 16:07:19 -0700809 * @sock: pointer to the pointer to the socket.
810 */
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100811static int drbd_socket_okay(struct socket **sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700812{
813 int rr;
814 char tb[4];
815
816 if (!*sock)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100817 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700818
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100819 rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700820
821 if (rr > 0 || rr == -EAGAIN) {
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100822 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700823 } else {
824 sock_release(*sock);
825 *sock = NULL;
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100826 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700827 }
828}
Philipp Reisner2325eb62011-03-15 16:56:18 +0100829/* Gets called if a connection is established, or if a new minor gets created
830 in a connection */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200831int drbd_connected(struct drbd_peer_device *peer_device)
Philipp Reisner907599e2011-02-08 11:25:37 +0100832{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200833 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100834 int err;
Philipp Reisner907599e2011-02-08 11:25:37 +0100835
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200836 atomic_set(&device->packet_seq, 0);
837 device->peer_seq = 0;
Philipp Reisner907599e2011-02-08 11:25:37 +0100838
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200839 device->state_mutex = peer_device->connection->agreed_pro_version < 100 ?
840 &peer_device->connection->cstate_mutex :
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200841 &device->own_state_mutex;
Philipp Reisner8410da82011-02-11 20:11:10 +0100842
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200843 err = drbd_send_sync_param(peer_device);
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100844 if (!err)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200845 err = drbd_send_sizes(peer_device, 0, 0);
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100846 if (!err)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200847 err = drbd_send_uuids(peer_device);
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100848 if (!err)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200849 err = drbd_send_current_state(peer_device);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200850 clear_bit(USE_DEGR_WFC_T, &device->flags);
851 clear_bit(RESIZE_PENDING, &device->flags);
852 atomic_set(&device->ap_in_flight, 0);
853 mod_timer(&device->request_timer, jiffies + HZ); /* just start it here. */
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100854 return err;
Philipp Reisner907599e2011-02-08 11:25:37 +0100855}
Philipp Reisnerb411b362009-09-25 16:07:19 -0700856
857/*
858 * return values:
859 * 1 yes, we have a valid connection
860 * 0 oops, did not work out, please try again
861 * -1 peer talks different language,
862 * no point in trying again, please go standalone.
863 * -2 We do not have a network config...
864 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200865static int conn_connect(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700866{
Philipp Reisner7da35862011-12-19 22:42:56 +0100867 struct drbd_socket sock, msock;
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +0200868 struct drbd_peer_device *peer_device;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200869 struct net_conf *nc;
Philipp Reisner92f14952012-08-01 11:41:01 +0200870 int vnr, timeout, h, ok;
Philipp Reisner08b165b2011-09-05 16:22:33 +0200871 bool discard_my_data;
Philipp Reisner197296f2012-03-26 16:47:11 +0200872 enum drbd_state_rv rv;
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200873 struct accept_wait_data ad = {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200874 .connection = connection,
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200875 .door_bell = COMPLETION_INITIALIZER_ONSTACK(ad.door_bell),
876 };
Philipp Reisnerb411b362009-09-25 16:07:19 -0700877
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200878 clear_bit(DISCONNECT_SENT, &connection->flags);
879 if (conn_request_state(connection, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700880 return -2;
881
Philipp Reisner7da35862011-12-19 22:42:56 +0100882 mutex_init(&sock.mutex);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200883 sock.sbuf = connection->data.sbuf;
884 sock.rbuf = connection->data.rbuf;
Philipp Reisner7da35862011-12-19 22:42:56 +0100885 sock.socket = NULL;
886 mutex_init(&msock.mutex);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200887 msock.sbuf = connection->meta.sbuf;
888 msock.rbuf = connection->meta.rbuf;
Philipp Reisner7da35862011-12-19 22:42:56 +0100889 msock.socket = NULL;
890
Andreas Gruenbacher0916e0e2011-03-21 14:10:15 +0100891 /* Assume that the peer only understands protocol 80 until we know better. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200892 connection->agreed_pro_version = 80;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700893
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200894 if (prepare_listen_socket(connection, &ad))
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200895 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700896
897 do {
Andreas Gruenbacher2bf89622011-03-28 16:33:12 +0200898 struct socket *s;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700899
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200900 s = drbd_try_connect(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700901 if (s) {
Philipp Reisner7da35862011-12-19 22:42:56 +0100902 if (!sock.socket) {
903 sock.socket = s;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200904 send_first_packet(connection, &sock, P_INITIAL_DATA);
Philipp Reisner7da35862011-12-19 22:42:56 +0100905 } else if (!msock.socket) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200906 clear_bit(RESOLVE_CONFLICTS, &connection->flags);
Philipp Reisner7da35862011-12-19 22:42:56 +0100907 msock.socket = s;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200908 send_first_packet(connection, &msock, P_INITIAL_META);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700909 } else {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200910 drbd_err(connection, "Logic error in conn_connect()\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700911 goto out_release_sockets;
912 }
913 }
914
Philipp Reisner7da35862011-12-19 22:42:56 +0100915 if (sock.socket && msock.socket) {
916 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200917 nc = rcu_dereference(connection->net_conf);
Philipp Reisner7da35862011-12-19 22:42:56 +0100918 timeout = nc->ping_timeo * HZ / 10;
919 rcu_read_unlock();
920 schedule_timeout_interruptible(timeout);
921 ok = drbd_socket_okay(&sock.socket);
922 ok = drbd_socket_okay(&msock.socket) && ok;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700923 if (ok)
924 break;
925 }
926
927retry:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200928 s = drbd_wait_for_connect(connection, &ad);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700929 if (s) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200930 int fp = receive_first_packet(connection, s);
Philipp Reisner7da35862011-12-19 22:42:56 +0100931 drbd_socket_okay(&sock.socket);
932 drbd_socket_okay(&msock.socket);
Philipp Reisner92f14952012-08-01 11:41:01 +0200933 switch (fp) {
Andreas Gruenbachere5d6f332011-03-28 16:44:40 +0200934 case P_INITIAL_DATA:
Philipp Reisner7da35862011-12-19 22:42:56 +0100935 if (sock.socket) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200936 drbd_warn(connection, "initial packet S crossed\n");
Philipp Reisner7da35862011-12-19 22:42:56 +0100937 sock_release(sock.socket);
Philipp Reisner80c6eed2012-08-01 14:53:39 +0200938 sock.socket = s;
939 goto randomize;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700940 }
Philipp Reisner7da35862011-12-19 22:42:56 +0100941 sock.socket = s;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700942 break;
Andreas Gruenbachere5d6f332011-03-28 16:44:40 +0200943 case P_INITIAL_META:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200944 set_bit(RESOLVE_CONFLICTS, &connection->flags);
Philipp Reisner7da35862011-12-19 22:42:56 +0100945 if (msock.socket) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200946 drbd_warn(connection, "initial packet M crossed\n");
Philipp Reisner7da35862011-12-19 22:42:56 +0100947 sock_release(msock.socket);
Philipp Reisner80c6eed2012-08-01 14:53:39 +0200948 msock.socket = s;
949 goto randomize;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700950 }
Philipp Reisner7da35862011-12-19 22:42:56 +0100951 msock.socket = s;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700952 break;
953 default:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200954 drbd_warn(connection, "Error receiving initial packet\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700955 sock_release(s);
Philipp Reisner80c6eed2012-08-01 14:53:39 +0200956randomize:
Akinobu Mita38b682b22013-04-29 16:21:31 -0700957 if (prandom_u32() & 1)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700958 goto retry;
959 }
960 }
961
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200962 if (connection->cstate <= C_DISCONNECTING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700963 goto out_release_sockets;
964 if (signal_pending(current)) {
965 flush_signals(current);
966 smp_rmb();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200967 if (get_t_state(&connection->receiver) == EXITING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700968 goto out_release_sockets;
969 }
970
Philipp Reisnerb666dbf2012-07-26 14:12:59 +0200971 ok = drbd_socket_okay(&sock.socket);
972 ok = drbd_socket_okay(&msock.socket) && ok;
973 } while (!ok);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700974
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200975 if (ad.s_listen)
976 sock_release(ad.s_listen);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700977
Philipp Reisner98683652012-11-09 14:18:43 +0100978 sock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
979 msock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700980
Philipp Reisner7da35862011-12-19 22:42:56 +0100981 sock.socket->sk->sk_allocation = GFP_NOIO;
982 msock.socket->sk->sk_allocation = GFP_NOIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700983
Philipp Reisner7da35862011-12-19 22:42:56 +0100984 sock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
985 msock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700986
Philipp Reisnerb411b362009-09-25 16:07:19 -0700987 /* NOT YET ...
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200988 * sock.socket->sk->sk_sndtimeo = connection->net_conf->timeout*HZ/10;
Philipp Reisner7da35862011-12-19 22:42:56 +0100989 * sock.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
Andreas Gruenbacher60381782011-03-28 17:05:50 +0200990 * first set it to the P_CONNECTION_FEATURES timeout,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700991 * which we set to 4x the configured ping_timeout. */
Philipp Reisner44ed1672011-04-19 17:10:19 +0200992 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200993 nc = rcu_dereference(connection->net_conf);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700994
Philipp Reisner7da35862011-12-19 22:42:56 +0100995 sock.socket->sk->sk_sndtimeo =
996 sock.socket->sk->sk_rcvtimeo = nc->ping_timeo*4*HZ/10;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200997
Philipp Reisner7da35862011-12-19 22:42:56 +0100998 msock.socket->sk->sk_rcvtimeo = nc->ping_int*HZ;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200999 timeout = nc->timeout * HZ / 10;
Philipp Reisner08b165b2011-09-05 16:22:33 +02001000 discard_my_data = nc->discard_my_data;
Philipp Reisner44ed1672011-04-19 17:10:19 +02001001 rcu_read_unlock();
1002
Philipp Reisner7da35862011-12-19 22:42:56 +01001003 msock.socket->sk->sk_sndtimeo = timeout;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001004
1005 /* we don't want delays.
Lucas De Marchi25985ed2011-03-30 22:57:33 -03001006 * we use TCP_CORK where appropriate, though */
Philipp Reisner7da35862011-12-19 22:42:56 +01001007 drbd_tcp_nodelay(sock.socket);
1008 drbd_tcp_nodelay(msock.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001009
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001010 connection->data.socket = sock.socket;
1011 connection->meta.socket = msock.socket;
1012 connection->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001013
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001014 h = drbd_do_features(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001015 if (h <= 0)
1016 return h;
1017
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001018 if (connection->cram_hmac_tfm) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001019 /* drbd_request_state(device, NS(conn, WFAuth)); */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001020 switch (drbd_do_auth(connection)) {
Johannes Thomab10d96c2010-01-07 16:02:50 +01001021 case -1:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001022 drbd_err(connection, "Authentication of peer failed\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001023 return -1;
Johannes Thomab10d96c2010-01-07 16:02:50 +01001024 case 0:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001025 drbd_err(connection, "Authentication of peer failed, trying again.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01001026 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001027 }
1028 }
1029
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001030 connection->data.socket->sk->sk_sndtimeo = timeout;
1031 connection->data.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001032
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001033 if (drbd_send_protocol(connection) == -EOPNOTSUPP)
Philipp Reisner7e2455c2010-04-22 14:50:23 +02001034 return -1;
Philipp Reisner1e86ac42011-08-04 10:33:08 +02001035
Philipp Reisner31007742014-04-28 18:43:12 +02001036 /* Prevent a race between resync-handshake and
1037 * being promoted to Primary.
1038 *
1039 * Grab and release the state mutex, so we know that any current
1040 * drbd_set_role() is finished, and any incoming drbd_set_role
1041 * will see the STATE_SENT flag, and wait for it to be cleared.
1042 */
1043 idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
1044 mutex_lock(peer_device->device->state_mutex);
1045
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001046 set_bit(STATE_SENT, &connection->flags);
Philipp Reisner197296f2012-03-26 16:47:11 +02001047
Philipp Reisner31007742014-04-28 18:43:12 +02001048 idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
1049 mutex_unlock(peer_device->device->state_mutex);
1050
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02001051 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001052 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1053 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001054 kref_get(&device->kref);
Andreas Gruenbacher26ea8f92013-06-25 16:50:03 +02001055 rcu_read_unlock();
1056
Philipp Reisner08b165b2011-09-05 16:22:33 +02001057 if (discard_my_data)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001058 set_bit(DISCARD_MY_DATA, &device->flags);
Philipp Reisner08b165b2011-09-05 16:22:33 +02001059 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001060 clear_bit(DISCARD_MY_DATA, &device->flags);
Philipp Reisner08b165b2011-09-05 16:22:33 +02001061
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001062 drbd_connected(peer_device);
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001063 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02001064 rcu_read_lock();
1065 }
1066 rcu_read_unlock();
1067
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001068 rv = conn_request_state(connection, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE);
1069 if (rv < SS_SUCCESS || connection->cstate != C_WF_REPORT_PARAMS) {
1070 clear_bit(STATE_SENT, &connection->flags);
Philipp Reisner1e86ac42011-08-04 10:33:08 +02001071 return 0;
Philipp Reisnera1096a62012-04-06 12:07:34 +02001072 }
Philipp Reisner1e86ac42011-08-04 10:33:08 +02001073
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001074 drbd_thread_start(&connection->asender);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001075
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001076 mutex_lock(&connection->resource->conf_update);
Philipp Reisner08b165b2011-09-05 16:22:33 +02001077 /* The discard_my_data flag is a single-shot modifier to the next
1078 * connection attempt, the handshake of which is now well underway.
1079 * No need for rcu style copying of the whole struct
1080 * just to clear a single value. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001081 connection->net_conf->discard_my_data = 0;
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001082 mutex_unlock(&connection->resource->conf_update);
Philipp Reisner08b165b2011-09-05 16:22:33 +02001083
Philipp Reisnerd3fcb492011-04-13 14:46:05 -07001084 return h;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001085
1086out_release_sockets:
Philipp Reisner7a426fd2012-07-12 14:22:37 +02001087 if (ad.s_listen)
1088 sock_release(ad.s_listen);
Philipp Reisner7da35862011-12-19 22:42:56 +01001089 if (sock.socket)
1090 sock_release(sock.socket);
1091 if (msock.socket)
1092 sock_release(msock.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001093 return -1;
1094}
1095
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001096static int decode_header(struct drbd_connection *connection, void *header, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001097{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001098 unsigned int header_size = drbd_header_size(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001099
Andreas Gruenbacher0c8e36d2011-03-30 16:00:17 +02001100 if (header_size == sizeof(struct p_header100) &&
1101 *(__be32 *)header == cpu_to_be32(DRBD_MAGIC_100)) {
1102 struct p_header100 *h = header;
1103 if (h->pad != 0) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001104 drbd_err(connection, "Header padding is not zero\n");
Andreas Gruenbacher0c8e36d2011-03-30 16:00:17 +02001105 return -EINVAL;
1106 }
1107 pi->vnr = be16_to_cpu(h->volume);
1108 pi->cmd = be16_to_cpu(h->command);
1109 pi->size = be32_to_cpu(h->length);
1110 } else if (header_size == sizeof(struct p_header95) &&
1111 *(__be16 *)header == cpu_to_be16(DRBD_MAGIC_BIG)) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001112 struct p_header95 *h = header;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001113 pi->cmd = be16_to_cpu(h->command);
Andreas Gruenbacherb55d84b2011-03-22 13:17:47 +01001114 pi->size = be32_to_cpu(h->length);
1115 pi->vnr = 0;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001116 } else if (header_size == sizeof(struct p_header80) &&
1117 *(__be32 *)header == cpu_to_be32(DRBD_MAGIC)) {
1118 struct p_header80 *h = header;
1119 pi->cmd = be16_to_cpu(h->command);
1120 pi->size = be16_to_cpu(h->length);
Philipp Reisner77351055b2011-02-07 17:24:26 +01001121 pi->vnr = 0;
Philipp Reisner02918be2010-08-20 14:35:10 +02001122 } else {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001123 drbd_err(connection, "Wrong magic value 0x%08x in protocol version %d\n",
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001124 be32_to_cpu(*(__be32 *)header),
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001125 connection->agreed_pro_version);
Andreas Gruenbacher8172f3e2011-03-16 17:22:39 +01001126 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001127 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001128 pi->data = header + header_size;
Andreas Gruenbacher8172f3e2011-03-16 17:22:39 +01001129 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001130}
1131
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001132static int drbd_recv_header(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisner257d0af2011-01-26 12:15:29 +01001133{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001134 void *buffer = connection->data.rbuf;
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001135 int err;
Philipp Reisner257d0af2011-01-26 12:15:29 +01001136
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001137 err = drbd_recv_all_warn(connection, buffer, drbd_header_size(connection));
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001138 if (err)
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001139 return err;
Philipp Reisner257d0af2011-01-26 12:15:29 +01001140
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001141 err = decode_header(connection, buffer, pi);
1142 connection->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001143
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001144 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001145}
1146
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001147static void drbd_flush(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001148{
1149 int rv;
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001150 struct drbd_peer_device *peer_device;
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001151 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001152
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001153 if (connection->write_ordering >= WO_bdev_flush) {
Lars Ellenberg615e0872011-11-17 14:32:12 +01001154 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001155 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1156 struct drbd_device *device = peer_device->device;
1157
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001158 if (!get_ldev(device))
Lars Ellenberg615e0872011-11-17 14:32:12 +01001159 continue;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001160 kref_get(&device->kref);
Lars Ellenberg615e0872011-11-17 14:32:12 +01001161 rcu_read_unlock();
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001162
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001163 rv = blkdev_issue_flush(device->ldev->backing_bdev,
Lars Ellenberg615e0872011-11-17 14:32:12 +01001164 GFP_NOIO, NULL);
1165 if (rv) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001166 drbd_info(device, "local disk flush failed with status %d\n", rv);
Lars Ellenberg615e0872011-11-17 14:32:12 +01001167 /* would rather check on EOPNOTSUPP, but that is not reliable.
1168 * don't try again for ANY return value != 0
1169 * if (rv == -EOPNOTSUPP) */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001170 drbd_bump_write_ordering(connection, WO_drain_io);
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001171 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001172 put_ldev(device);
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001173 kref_put(&device->kref, drbd_destroy_device);
Lars Ellenberg615e0872011-11-17 14:32:12 +01001174
1175 rcu_read_lock();
1176 if (rv)
1177 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001178 }
Lars Ellenberg615e0872011-11-17 14:32:12 +01001179 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07001180 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001181}
1182
1183/**
1184 * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it.
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001185 * @device: DRBD device.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001186 * @epoch: Epoch object.
1187 * @ev: Epoch event.
1188 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001189static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *connection,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001190 struct drbd_epoch *epoch,
1191 enum epoch_event ev)
1192{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001193 int epoch_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001194 struct drbd_epoch *next_epoch;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001195 enum finish_epoch rv = FE_STILL_LIVE;
1196
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001197 spin_lock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001198 do {
1199 next_epoch = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001200
1201 epoch_size = atomic_read(&epoch->epoch_size);
1202
1203 switch (ev & ~EV_CLEANUP) {
1204 case EV_PUT:
1205 atomic_dec(&epoch->active);
1206 break;
1207 case EV_GOT_BARRIER_NR:
1208 set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001209 break;
1210 case EV_BECAME_LAST:
1211 /* nothing to do*/
1212 break;
1213 }
1214
Philipp Reisnerb411b362009-09-25 16:07:19 -07001215 if (epoch_size != 0 &&
1216 atomic_read(&epoch->active) == 0 &&
Philipp Reisner80f9fd52011-07-18 15:45:15 +02001217 (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) || ev & EV_CLEANUP)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001218 if (!(ev & EV_CLEANUP)) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001219 spin_unlock(&connection->epoch_lock);
1220 drbd_send_b_ack(epoch->connection, epoch->barrier_nr, epoch_size);
1221 spin_lock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001222 }
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001223#if 0
1224 /* FIXME: dec unacked on connection, once we have
1225 * something to count pending connection packets in. */
Philipp Reisner80f9fd52011-07-18 15:45:15 +02001226 if (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags))
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001227 dec_unacked(epoch->connection);
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001228#endif
Philipp Reisnerb411b362009-09-25 16:07:19 -07001229
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001230 if (connection->current_epoch != epoch) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001231 next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
1232 list_del(&epoch->list);
1233 ev = EV_BECAME_LAST | (ev & EV_CLEANUP);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001234 connection->epochs--;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001235 kfree(epoch);
1236
1237 if (rv == FE_STILL_LIVE)
1238 rv = FE_DESTROYED;
1239 } else {
1240 epoch->flags = 0;
1241 atomic_set(&epoch->epoch_size, 0);
Uwe Kleine-König698f9312010-07-02 20:41:51 +02001242 /* atomic_set(&epoch->active, 0); is already zero */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001243 if (rv == FE_STILL_LIVE)
1244 rv = FE_RECYCLED;
1245 }
1246 }
1247
1248 if (!next_epoch)
1249 break;
1250
1251 epoch = next_epoch;
1252 } while (1);
1253
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001254 spin_unlock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001255
Philipp Reisnerb411b362009-09-25 16:07:19 -07001256 return rv;
1257}
1258
1259/**
1260 * drbd_bump_write_ordering() - Fall back to an other write ordering method
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001261 * @connection: DRBD connection.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001262 * @wo: Write ordering method to try.
1263 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001264void drbd_bump_write_ordering(struct drbd_connection *connection, enum write_ordering_e wo)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001265{
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001266 struct disk_conf *dc;
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001267 struct drbd_peer_device *peer_device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001268 enum write_ordering_e pwo;
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001269 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001270 static char *write_ordering_str[] = {
1271 [WO_none] = "none",
1272 [WO_drain_io] = "drain",
1273 [WO_bdev_flush] = "flush",
Philipp Reisnerb411b362009-09-25 16:07:19 -07001274 };
1275
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001276 pwo = connection->write_ordering;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001277 wo = min(pwo, wo);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001278 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001279 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1280 struct drbd_device *device = peer_device->device;
1281
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001282 if (!get_ldev_if_state(device, D_ATTACHING))
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001283 continue;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001284 dc = rcu_dereference(device->ldev->disk_conf);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001285
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001286 if (wo == WO_bdev_flush && !dc->disk_flushes)
1287 wo = WO_drain_io;
1288 if (wo == WO_drain_io && !dc->disk_drain)
1289 wo = WO_none;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001290 put_ldev(device);
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001291 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001292 rcu_read_unlock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001293 connection->write_ordering = wo;
1294 if (pwo != connection->write_ordering || wo == WO_bdev_flush)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001295 drbd_info(connection, "Method to ensure write ordering: %s\n", write_ordering_str[connection->write_ordering]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001296}
1297
1298/**
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001299 * drbd_submit_peer_request()
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001300 * @device: DRBD device.
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001301 * @peer_req: peer request
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001302 * @rw: flag field, see bio->bi_rw
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001303 *
1304 * May spread the pages to multiple bios,
1305 * depending on bio_add_page restrictions.
1306 *
1307 * Returns 0 if all bios have been submitted,
1308 * -ENOMEM if we could not allocate enough bios,
1309 * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a
1310 * single page to an empty bio (which should never happen and likely indicates
1311 * that the lower level IO stack is in some way broken). This has been observed
1312 * on certain Xen deployments.
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001313 */
1314/* TODO allocate from our own bio_set. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001315int drbd_submit_peer_request(struct drbd_device *device,
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001316 struct drbd_peer_request *peer_req,
1317 const unsigned rw, const int fault_type)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001318{
1319 struct bio *bios = NULL;
1320 struct bio *bio;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001321 struct page *page = peer_req->pages;
1322 sector_t sector = peer_req->i.sector;
1323 unsigned ds = peer_req->i.size;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001324 unsigned n_bios = 0;
1325 unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT;
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001326 int err = -ENOMEM;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001327
1328 /* In most cases, we will only need one bio. But in case the lower
1329 * level restrictions happen to be different at this offset on this
1330 * side than those of the sending peer, we may need to submit the
Lars Ellenberg9476f392011-02-23 17:02:01 +01001331 * request in more than one bio.
1332 *
1333 * Plain bio_alloc is good enough here, this is no DRBD internally
1334 * generated bio, but a bio allocated on behalf of the peer.
1335 */
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001336next_bio:
1337 bio = bio_alloc(GFP_NOIO, nr_pages);
1338 if (!bio) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001339 drbd_err(device, "submit_ee: Allocation of a bio failed\n");
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001340 goto fail;
1341 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001342 /* > peer_req->i.sector, unless this is the first bio */
Kent Overstreet4f024f32013-10-11 15:44:27 -07001343 bio->bi_iter.bi_sector = sector;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001344 bio->bi_bdev = device->ldev->backing_bdev;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001345 bio->bi_rw = rw;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001346 bio->bi_private = peer_req;
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001347 bio->bi_end_io = drbd_peer_request_endio;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001348
1349 bio->bi_next = bios;
1350 bios = bio;
1351 ++n_bios;
1352
1353 page_chain_for_each(page) {
1354 unsigned len = min_t(unsigned, ds, PAGE_SIZE);
1355 if (!bio_add_page(bio, page, len, 0)) {
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001356 /* A single page must always be possible!
1357 * But in case it fails anyways,
1358 * we deal with it, and complain (below). */
1359 if (bio->bi_vcnt == 0) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001360 drbd_err(device,
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001361 "bio_add_page failed for len=%u, "
1362 "bi_vcnt=0 (bi_sector=%llu)\n",
Kent Overstreet4f024f32013-10-11 15:44:27 -07001363 len, (uint64_t)bio->bi_iter.bi_sector);
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001364 err = -ENOSPC;
1365 goto fail;
1366 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001367 goto next_bio;
1368 }
1369 ds -= len;
1370 sector += len >> 9;
1371 --nr_pages;
1372 }
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001373 D_ASSERT(device, page == NULL);
1374 D_ASSERT(device, ds == 0);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001375
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001376 atomic_set(&peer_req->pending_bios, n_bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001377 do {
1378 bio = bios;
1379 bios = bios->bi_next;
1380 bio->bi_next = NULL;
1381
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001382 drbd_generic_make_request(device, fault_type, bio);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001383 } while (bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001384 return 0;
1385
1386fail:
1387 while (bios) {
1388 bio = bios;
1389 bios = bios->bi_next;
1390 bio_put(bio);
1391 }
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001392 return err;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001393}
1394
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001395static void drbd_remove_epoch_entry_interval(struct drbd_device *device,
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001396 struct drbd_peer_request *peer_req)
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001397{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001398 struct drbd_interval *i = &peer_req->i;
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001399
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001400 drbd_remove_interval(&device->write_requests, i);
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001401 drbd_clear_interval(i);
1402
Andreas Gruenbacher6c852be2011-02-04 15:38:52 +01001403 /* Wake up any processes waiting for this peer request to complete. */
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001404 if (i->waiting)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001405 wake_up(&device->misc_wait);
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001406}
1407
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001408static void conn_wait_active_ee_empty(struct drbd_connection *connection)
Philipp Reisner77fede52011-11-10 21:19:11 +01001409{
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001410 struct drbd_peer_device *peer_device;
Philipp Reisner77fede52011-11-10 21:19:11 +01001411 int vnr;
1412
1413 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001414 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1415 struct drbd_device *device = peer_device->device;
1416
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001417 kref_get(&device->kref);
Philipp Reisner77fede52011-11-10 21:19:11 +01001418 rcu_read_unlock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001419 drbd_wait_ee_list_empty(device, &device->active_ee);
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001420 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisner77fede52011-11-10 21:19:11 +01001421 rcu_read_lock();
1422 }
1423 rcu_read_unlock();
1424}
1425
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001426static struct drbd_peer_device *
1427conn_peer_device(struct drbd_connection *connection, int volume_number)
1428{
1429 return idr_find(&connection->peer_devices, volume_number);
1430}
1431
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001432static int receive_Barrier(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001433{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001434 int rv;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001435 struct p_barrier *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001436 struct drbd_epoch *epoch;
1437
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001438 /* FIXME these are unacked on connection,
1439 * not a specific (peer)device.
1440 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001441 connection->current_epoch->barrier_nr = p->barrier;
1442 connection->current_epoch->connection = connection;
1443 rv = drbd_may_finish_epoch(connection, connection->current_epoch, EV_GOT_BARRIER_NR);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001444
1445 /* P_BARRIER_ACK may imply that the corresponding extent is dropped from
1446 * the activity log, which means it would not be resynced in case the
1447 * R_PRIMARY crashes now.
1448 * Therefore we must send the barrier_ack after the barrier request was
1449 * completed. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001450 switch (connection->write_ordering) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001451 case WO_none:
1452 if (rv == FE_RECYCLED)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001453 return 0;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001454
1455 /* receiver context, in the writeout path of the other node.
1456 * avoid potential distributed deadlock */
1457 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1458 if (epoch)
1459 break;
1460 else
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001461 drbd_warn(connection, "Allocation of an epoch failed, slowing down\n");
Philipp Reisner2451fc32010-08-24 13:43:11 +02001462 /* Fall through */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001463
1464 case WO_bdev_flush:
1465 case WO_drain_io:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001466 conn_wait_active_ee_empty(connection);
1467 drbd_flush(connection);
Philipp Reisner2451fc32010-08-24 13:43:11 +02001468
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001469 if (atomic_read(&connection->current_epoch->epoch_size)) {
Philipp Reisner2451fc32010-08-24 13:43:11 +02001470 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1471 if (epoch)
1472 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001473 }
1474
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001475 return 0;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001476 default:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001477 drbd_err(connection, "Strangeness in connection->write_ordering %d\n", connection->write_ordering);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001478 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001479 }
1480
1481 epoch->flags = 0;
1482 atomic_set(&epoch->epoch_size, 0);
1483 atomic_set(&epoch->active, 0);
1484
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001485 spin_lock(&connection->epoch_lock);
1486 if (atomic_read(&connection->current_epoch->epoch_size)) {
1487 list_add(&epoch->list, &connection->current_epoch->list);
1488 connection->current_epoch = epoch;
1489 connection->epochs++;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001490 } else {
1491 /* The current_epoch got recycled while we allocated this one... */
1492 kfree(epoch);
1493 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001494 spin_unlock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001495
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001496 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001497}
1498
1499/* used from receive_RSDataReply (recv_resync_read)
1500 * and from receive_Data */
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +01001501static struct drbd_peer_request *
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001502read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +01001503 int data_size) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001504{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001505 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001506 const sector_t capacity = drbd_get_capacity(device->this_bdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001507 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001508 struct page *page;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001509 int dgs, ds, err;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001510 void *dig_in = peer_device->connection->int_dig_in;
1511 void *dig_vv = peer_device->connection->int_dig_vv;
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001512 unsigned long *data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001513
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001514 dgs = 0;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001515 if (peer_device->connection->peer_integrity_tfm) {
1516 dgs = crypto_hash_digestsize(peer_device->connection->peer_integrity_tfm);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001517 /*
1518 * FIXME: Receive the incoming digest into the receive buffer
1519 * here, together with its struct p_data?
1520 */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001521 err = drbd_recv_all_warn(peer_device->connection, dig_in, dgs);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001522 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001523 return NULL;
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001524 data_size -= dgs;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001525 }
1526
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01001527 if (!expect(IS_ALIGNED(data_size, 512)))
1528 return NULL;
1529 if (!expect(data_size <= DRBD_MAX_BIO_SIZE))
1530 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001531
Lars Ellenberg66660322010-04-06 12:15:04 +02001532 /* even though we trust out peer,
1533 * we sometimes have to double check. */
1534 if (sector + (data_size>>9) > capacity) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001535 drbd_err(device, "request from peer beyond end of local disk: "
Lars Ellenbergfdda6542011-01-24 15:11:01 +01001536 "capacity: %llus < sector: %llus + size: %u\n",
Lars Ellenberg66660322010-04-06 12:15:04 +02001537 (unsigned long long)capacity,
1538 (unsigned long long)sector, data_size);
1539 return NULL;
1540 }
1541
Philipp Reisnerb411b362009-09-25 16:07:19 -07001542 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
1543 * "criss-cross" setup, that might cause write-out on some other DRBD,
1544 * which in turn might block on the other node at this very place. */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001545 peer_req = drbd_alloc_peer_req(peer_device, id, sector, data_size, GFP_NOIO);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001546 if (!peer_req)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001547 return NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001548
Lars Ellenberga73ff322012-06-25 19:15:38 +02001549 if (!data_size)
Lars Ellenberg81a35372012-07-30 09:00:54 +02001550 return peer_req;
Lars Ellenberga73ff322012-06-25 19:15:38 +02001551
Philipp Reisnerb411b362009-09-25 16:07:19 -07001552 ds = data_size;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001553 page = peer_req->pages;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001554 page_chain_for_each(page) {
1555 unsigned len = min_t(int, ds, PAGE_SIZE);
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001556 data = kmap(page);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001557 err = drbd_recv_all_warn(peer_device->connection, data, len);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001558 if (drbd_insert_fault(device, DRBD_FAULT_RECEIVE)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001559 drbd_err(device, "Fault injection: Corrupting data on receive\n");
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001560 data[0] = data[0] ^ (unsigned long)-1;
1561 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001562 kunmap(page);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001563 if (err) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001564 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001565 return NULL;
1566 }
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001567 ds -= len;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001568 }
1569
1570 if (dgs) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001571 drbd_csum_ee(peer_device->connection->peer_integrity_tfm, peer_req, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001572 if (memcmp(dig_in, dig_vv, dgs)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001573 drbd_err(device, "Digest integrity check FAILED: %llus +%u\n",
Lars Ellenberg470be442010-11-10 10:36:52 +01001574 (unsigned long long)sector, data_size);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001575 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001576 return NULL;
1577 }
1578 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001579 device->recv_cnt += data_size>>9;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001580 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001581}
1582
1583/* drbd_drain_block() just takes a data block
1584 * out of the socket input buffer, and discards it.
1585 */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001586static int drbd_drain_block(struct drbd_peer_device *peer_device, int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001587{
1588 struct page *page;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001589 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001590 void *data;
1591
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001592 if (!data_size)
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001593 return 0;
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001594
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001595 page = drbd_alloc_pages(peer_device, 1, 1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001596
1597 data = kmap(page);
1598 while (data_size) {
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001599 unsigned int len = min_t(int, data_size, PAGE_SIZE);
1600
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001601 err = drbd_recv_all_warn(peer_device->connection, data, len);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001602 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001603 break;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001604 data_size -= len;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001605 }
1606 kunmap(page);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001607 drbd_free_pages(peer_device->device, page, 0);
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001608 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001609}
1610
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001611static int recv_dless_read(struct drbd_peer_device *peer_device, struct drbd_request *req,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001612 sector_t sector, int data_size)
1613{
Kent Overstreet79886132013-11-23 17:19:00 -08001614 struct bio_vec bvec;
1615 struct bvec_iter iter;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001616 struct bio *bio;
Kent Overstreet79886132013-11-23 17:19:00 -08001617 int dgs, err, expect;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001618 void *dig_in = peer_device->connection->int_dig_in;
1619 void *dig_vv = peer_device->connection->int_dig_vv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001620
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001621 dgs = 0;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001622 if (peer_device->connection->peer_integrity_tfm) {
1623 dgs = crypto_hash_digestsize(peer_device->connection->peer_integrity_tfm);
1624 err = drbd_recv_all_warn(peer_device->connection, dig_in, dgs);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001625 if (err)
1626 return err;
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001627 data_size -= dgs;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001628 }
1629
Philipp Reisnerb411b362009-09-25 16:07:19 -07001630 /* optimistically update recv_cnt. if receiving fails below,
1631 * we disconnect anyways, and counters will be reset. */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001632 peer_device->device->recv_cnt += data_size>>9;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001633
1634 bio = req->master_bio;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001635 D_ASSERT(peer_device->device, sector == bio->bi_iter.bi_sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001636
Kent Overstreet79886132013-11-23 17:19:00 -08001637 bio_for_each_segment(bvec, bio, iter) {
1638 void *mapped = kmap(bvec.bv_page) + bvec.bv_offset;
1639 expect = min_t(int, data_size, bvec.bv_len);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001640 err = drbd_recv_all_warn(peer_device->connection, mapped, expect);
Kent Overstreet79886132013-11-23 17:19:00 -08001641 kunmap(bvec.bv_page);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001642 if (err)
1643 return err;
1644 data_size -= expect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001645 }
1646
1647 if (dgs) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001648 drbd_csum_bio(peer_device->connection->peer_integrity_tfm, bio, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001649 if (memcmp(dig_in, dig_vv, dgs)) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001650 drbd_err(peer_device, "Digest integrity check FAILED. Broken NICs?\n");
Andreas Gruenbacher28284ce2011-03-16 17:54:02 +01001651 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001652 }
1653 }
1654
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001655 D_ASSERT(peer_device->device, data_size == 0);
Andreas Gruenbacher28284ce2011-03-16 17:54:02 +01001656 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001657}
1658
Andreas Gruenbachera990be42011-04-06 17:56:48 +02001659/*
1660 * e_end_resync_block() is called in asender context via
1661 * drbd_finish_peer_reqs().
1662 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001663static int e_end_resync_block(struct drbd_work *w, int unused)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001664{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001665 struct drbd_peer_request *peer_req =
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001666 container_of(w, struct drbd_peer_request, w);
1667 struct drbd_peer_device *peer_device = peer_req->peer_device;
1668 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001669 sector_t sector = peer_req->i.sector;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001670 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001671
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001672 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001673
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001674 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001675 drbd_set_in_sync(device, sector, peer_req->i.size);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001676 err = drbd_send_ack(peer_device, P_RS_WRITE_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001677 } else {
1678 /* Record failure to sync */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001679 drbd_rs_failed_io(device, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001680
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001681 err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001682 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001683 dec_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001684
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001685 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001686}
1687
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001688static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t sector,
1689 int data_size) __releases(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001690{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001691 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001692 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001693
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001694 peer_req = read_in_block(peer_device, ID_SYNCER, sector, data_size);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001695 if (!peer_req)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001696 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001697
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001698 dec_rs_pending(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001699
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001700 inc_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001701 /* corresponding dec_unacked() in e_end_resync_block()
1702 * respective _drbd_clear_done_ee */
1703
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001704 peer_req->w.cb = e_end_resync_block;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001705
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001706 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001707 list_add(&peer_req->w.list, &device->sync_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001708 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001709
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001710 atomic_add(data_size >> 9, &device->rs_sect_ev);
1711 if (drbd_submit_peer_request(device, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0)
Andreas Gruenbachere1c1b0f2011-03-16 17:58:27 +01001712 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001713
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001714 /* don't care for the reason here */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001715 drbd_err(device, "submit failed, triggering re-connect\n");
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001716 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001717 list_del(&peer_req->w.list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001718 spin_unlock_irq(&device->resource->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02001719
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001720 drbd_free_peer_req(device, peer_req);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001721fail:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001722 put_ldev(device);
Andreas Gruenbachere1c1b0f2011-03-16 17:58:27 +01001723 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001724}
1725
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001726static struct drbd_request *
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001727find_request(struct drbd_device *device, struct rb_root *root, u64 id,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001728 sector_t sector, bool missing_ok, const char *func)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001729{
1730 struct drbd_request *req;
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001731
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001732 /* Request object according to our peer */
1733 req = (struct drbd_request *)(unsigned long)id;
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001734 if (drbd_contains_interval(root, sector, &req->i) && req->i.local)
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001735 return req;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001736 if (!missing_ok) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001737 drbd_err(device, "%s: failed to find request 0x%lx, sector %llus\n", func,
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001738 (unsigned long)id, (unsigned long long)sector);
1739 }
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001740 return NULL;
1741}
1742
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001743static int receive_DataReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001744{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001745 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001746 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001747 struct drbd_request *req;
1748 sector_t sector;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001749 int err;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001750 struct p_data *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001751
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001752 peer_device = conn_peer_device(connection, pi->vnr);
1753 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001754 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001755 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001756
1757 sector = be64_to_cpu(p->sector);
1758
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001759 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001760 req = find_request(device, &device->read_requests, p->block_id, sector, false, __func__);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001761 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001762 if (unlikely(!req))
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001763 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001764
Bart Van Assche24c48302011-05-21 18:32:29 +02001765 /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid
Philipp Reisnerb411b362009-09-25 16:07:19 -07001766 * special casing it there for the various failure cases.
1767 * still no race with drbd_fail_pending_reads */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001768 err = recv_dless_read(peer_device, req, sector, pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001769 if (!err)
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001770 req_mod(req, DATA_RECEIVED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001771 /* else: nothing. handled from drbd_disconnect...
1772 * I don't think we may complete this just yet
1773 * in case we are "on-disconnect: freeze" */
1774
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001775 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001776}
1777
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001778static int receive_RSDataReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001779{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001780 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001781 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001782 sector_t sector;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001783 int err;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001784 struct p_data *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001785
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001786 peer_device = conn_peer_device(connection, pi->vnr);
1787 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001788 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001789 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001790
1791 sector = be64_to_cpu(p->sector);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001792 D_ASSERT(device, p->block_id == ID_SYNCER);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001793
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001794 if (get_ldev(device)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001795 /* data is submitted to disk within recv_resync_read.
1796 * corresponding put_ldev done below on error,
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001797 * or in drbd_peer_request_endio. */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001798 err = recv_resync_read(peer_device, sector, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001799 } else {
1800 if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001801 drbd_err(device, "Can not write resync data to local disk.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001802
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001803 err = drbd_drain_block(peer_device, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001804
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001805 drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001806 }
1807
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001808 atomic_add(pi->size >> 9, &device->rs_sect_in);
Philipp Reisner778f2712010-07-06 11:14:00 +02001809
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001810 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001811}
1812
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001813static void restart_conflicting_writes(struct drbd_device *device,
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001814 sector_t sector, int size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001815{
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001816 struct drbd_interval *i;
1817 struct drbd_request *req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001818
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001819 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001820 if (!i->local)
1821 continue;
1822 req = container_of(i, struct drbd_request, i);
1823 if (req->rq_state & RQ_LOCAL_PENDING ||
1824 !(req->rq_state & RQ_POSTPONED))
1825 continue;
Lars Ellenberg2312f0b32011-11-24 10:36:25 +01001826 /* as it is RQ_POSTPONED, this will cause it to
1827 * be queued on the retry workqueue. */
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02001828 __req_mod(req, CONFLICT_RESOLVED, NULL);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001829 }
1830}
1831
Andreas Gruenbachera990be42011-04-06 17:56:48 +02001832/*
1833 * e_end_block() is called in asender context via drbd_finish_peer_reqs().
Philipp Reisnerb411b362009-09-25 16:07:19 -07001834 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001835static int e_end_block(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001836{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001837 struct drbd_peer_request *peer_req =
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001838 container_of(w, struct drbd_peer_request, w);
1839 struct drbd_peer_device *peer_device = peer_req->peer_device;
1840 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001841 sector_t sector = peer_req->i.sector;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001842 int err = 0, pcmd;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001843
Philipp Reisner303d1442011-04-13 16:24:47 -07001844 if (peer_req->flags & EE_SEND_WRITE_ACK) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001845 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001846 pcmd = (device->state.conn >= C_SYNC_SOURCE &&
1847 device->state.conn <= C_PAUSED_SYNC_T &&
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001848 peer_req->flags & EE_MAY_SET_IN_SYNC) ?
Philipp Reisnerb411b362009-09-25 16:07:19 -07001849 P_RS_WRITE_ACK : P_WRITE_ACK;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001850 err = drbd_send_ack(peer_device, pcmd, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001851 if (pcmd == P_RS_WRITE_ACK)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001852 drbd_set_in_sync(device, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001853 } else {
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001854 err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001855 /* we expect it to be marked out of sync anyways...
1856 * maybe assert this? */
1857 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001858 dec_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001859 }
1860 /* we delete from the conflict detection hash _after_ we sent out the
1861 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */
Philipp Reisner302bdea2011-04-21 11:36:49 +02001862 if (peer_req->flags & EE_IN_INTERVAL_TREE) {
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001863 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001864 D_ASSERT(device, !drbd_interval_empty(&peer_req->i));
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001865 drbd_remove_epoch_entry_interval(device, peer_req);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001866 if (peer_req->flags & EE_RESTART_REQUESTS)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001867 restart_conflicting_writes(device, sector, peer_req->i.size);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001868 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +01001869 } else
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001870 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001871
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001872 drbd_may_finish_epoch(first_peer_device(device)->connection, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001873
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001874 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001875}
1876
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001877static int e_send_ack(struct drbd_work *w, enum drbd_packet ack)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001878{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001879 struct drbd_peer_request *peer_req =
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001880 container_of(w, struct drbd_peer_request, w);
1881 struct drbd_peer_device *peer_device = peer_req->peer_device;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001882 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001883
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001884 err = drbd_send_ack(peer_device, ack, peer_req);
1885 dec_unacked(peer_device->device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001886
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001887 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001888}
1889
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02001890static int e_send_superseded(struct drbd_work *w, int unused)
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001891{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001892 return e_send_ack(w, P_SUPERSEDED);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001893}
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001894
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001895static int e_send_retry_write(struct drbd_work *w, int unused)
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001896{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001897 struct drbd_peer_request *peer_req =
1898 container_of(w, struct drbd_peer_request, w);
1899 struct drbd_connection *connection = peer_req->peer_device->connection;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001900
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001901 return e_send_ack(w, connection->agreed_pro_version >= 100 ?
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02001902 P_RETRY_WRITE : P_SUPERSEDED);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001903}
1904
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001905static bool seq_greater(u32 a, u32 b)
1906{
1907 /*
1908 * We assume 32-bit wrap-around here.
1909 * For 24-bit wrap-around, we would have to shift:
1910 * a <<= 8; b <<= 8;
1911 */
1912 return (s32)a - (s32)b > 0;
1913}
1914
1915static u32 seq_max(u32 a, u32 b)
1916{
1917 return seq_greater(a, b) ? a : b;
1918}
1919
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001920static void update_peer_seq(struct drbd_peer_device *peer_device, unsigned int peer_seq)
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001921{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001922 struct drbd_device *device = peer_device->device;
Lars Ellenberg3c13b682011-02-23 16:10:01 +01001923 unsigned int newest_peer_seq;
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001924
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001925 if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001926 spin_lock(&device->peer_seq_lock);
1927 newest_peer_seq = seq_max(device->peer_seq, peer_seq);
1928 device->peer_seq = newest_peer_seq;
1929 spin_unlock(&device->peer_seq_lock);
1930 /* wake up only if we actually changed device->peer_seq */
Lars Ellenberg3c13b682011-02-23 16:10:01 +01001931 if (peer_seq == newest_peer_seq)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001932 wake_up(&device->seq_wait);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001933 }
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001934}
1935
Lars Ellenbergd93f6302012-03-26 15:49:13 +02001936static inline int overlaps(sector_t s1, int l1, sector_t s2, int l2)
1937{
1938 return !((s1 + (l1>>9) <= s2) || (s1 >= s2 + (l2>>9)));
1939}
1940
1941/* maybe change sync_ee into interval trees as well? */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001942static bool overlapping_resync_write(struct drbd_device *device, struct drbd_peer_request *peer_req)
Lars Ellenbergd93f6302012-03-26 15:49:13 +02001943{
1944 struct drbd_peer_request *rs_req;
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001945 bool rv = 0;
1946
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001947 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001948 list_for_each_entry(rs_req, &device->sync_ee, w.list) {
Lars Ellenbergd93f6302012-03-26 15:49:13 +02001949 if (overlaps(peer_req->i.sector, peer_req->i.size,
1950 rs_req->i.sector, rs_req->i.size)) {
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001951 rv = 1;
1952 break;
1953 }
1954 }
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001955 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001956
1957 return rv;
1958}
1959
Philipp Reisnerb411b362009-09-25 16:07:19 -07001960/* Called from receive_Data.
1961 * Synchronize packets on sock with packets on msock.
1962 *
1963 * This is here so even when a P_DATA packet traveling via sock overtook an Ack
1964 * packet traveling on msock, they are still processed in the order they have
1965 * been sent.
1966 *
1967 * Note: we don't care for Ack packets overtaking P_DATA packets.
1968 *
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001969 * In case packet_seq is larger than device->peer_seq number, there are
Philipp Reisnerb411b362009-09-25 16:07:19 -07001970 * outstanding packets on the msock. We wait for them to arrive.
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001971 * In case we are the logically next packet, we update device->peer_seq
Philipp Reisnerb411b362009-09-25 16:07:19 -07001972 * ourselves. Correctly handles 32bit wrap around.
1973 *
1974 * Assume we have a 10 GBit connection, that is about 1<<30 byte per second,
1975 * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds
1976 * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have
1977 * 1<<9 == 512 seconds aka ages for the 32bit wrap around...
1978 *
1979 * returns 0 if we may process the packet,
1980 * -ERESTARTSYS if we were interrupted (by disconnect signal). */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001981static int wait_for_and_update_peer_seq(struct drbd_peer_device *peer_device, const u32 peer_seq)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001982{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001983 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001984 DEFINE_WAIT(wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001985 long timeout;
Philipp Reisnerb874d232013-10-23 10:59:16 +02001986 int ret = 0, tp;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001987
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001988 if (!test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags))
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001989 return 0;
1990
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001991 spin_lock(&device->peer_seq_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001992 for (;;) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001993 if (!seq_greater(peer_seq - 1, device->peer_seq)) {
1994 device->peer_seq = seq_max(device->peer_seq, peer_seq);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001995 break;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001996 }
Philipp Reisnerb874d232013-10-23 10:59:16 +02001997
Philipp Reisnerb411b362009-09-25 16:07:19 -07001998 if (signal_pending(current)) {
1999 ret = -ERESTARTSYS;
2000 break;
2001 }
Philipp Reisnerb874d232013-10-23 10:59:16 +02002002
2003 rcu_read_lock();
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002004 tp = rcu_dereference(first_peer_device(device)->connection->net_conf)->two_primaries;
Philipp Reisnerb874d232013-10-23 10:59:16 +02002005 rcu_read_unlock();
2006
2007 if (!tp)
2008 break;
2009
2010 /* Only need to wait if two_primaries is enabled */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002011 prepare_to_wait(&device->seq_wait, &wait, TASK_INTERRUPTIBLE);
2012 spin_unlock(&device->peer_seq_lock);
Philipp Reisner44ed1672011-04-19 17:10:19 +02002013 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002014 timeout = rcu_dereference(peer_device->connection->net_conf)->ping_timeo*HZ/10;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002015 rcu_read_unlock();
Andreas Gruenbacher71b1c1e2011-03-01 15:40:43 +01002016 timeout = schedule_timeout(timeout);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002017 spin_lock(&device->peer_seq_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002018 if (!timeout) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002019 ret = -ETIMEDOUT;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002020 drbd_err(device, "Timed out waiting for missing ack packets; disconnecting\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002021 break;
2022 }
2023 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002024 spin_unlock(&device->peer_seq_lock);
2025 finish_wait(&device->seq_wait, &wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002026 return ret;
2027}
2028
Lars Ellenberg688593c2010-11-17 22:25:03 +01002029/* see also bio_flags_to_wire()
2030 * DRBD_REQ_*, because we need to semantically map the flags to data packet
2031 * flags and back. We may replicate to other kernel versions. */
Andreas Gruenbacher81f0ffd2011-08-30 16:22:33 +02002032static unsigned long wire_flags_to_bio(u32 dpf)
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02002033{
Lars Ellenberg688593c2010-11-17 22:25:03 +01002034 return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
2035 (dpf & DP_FUA ? REQ_FUA : 0) |
2036 (dpf & DP_FLUSH ? REQ_FLUSH : 0) |
2037 (dpf & DP_DISCARD ? REQ_DISCARD : 0);
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02002038}
2039
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002040static void fail_postponed_requests(struct drbd_device *device, sector_t sector,
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002041 unsigned int size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002042{
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002043 struct drbd_interval *i;
2044
2045 repeat:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002046 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002047 struct drbd_request *req;
2048 struct bio_and_error m;
2049
2050 if (!i->local)
2051 continue;
2052 req = container_of(i, struct drbd_request, i);
2053 if (!(req->rq_state & RQ_POSTPONED))
2054 continue;
2055 req->rq_state &= ~RQ_POSTPONED;
2056 __req_mod(req, NEG_ACKED, &m);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002057 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002058 if (m.bio)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002059 complete_master_bio(device, &m);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002060 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002061 goto repeat;
2062 }
2063}
2064
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002065static int handle_write_conflicts(struct drbd_device *device,
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002066 struct drbd_peer_request *peer_req)
2067{
Andreas Gruenbachere33b32d2011-08-30 15:38:04 +02002068 struct drbd_connection *connection = peer_req->peer_device->connection;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002069 bool resolve_conflicts = test_bit(RESOLVE_CONFLICTS, &connection->flags);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002070 sector_t sector = peer_req->i.sector;
2071 const unsigned int size = peer_req->i.size;
2072 struct drbd_interval *i;
2073 bool equal;
2074 int err;
2075
2076 /*
2077 * Inserting the peer request into the write_requests tree will prevent
2078 * new conflicting local requests from being added.
2079 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002080 drbd_insert_interval(&device->write_requests, &peer_req->i);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002081
2082 repeat:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002083 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002084 if (i == &peer_req->i)
2085 continue;
2086
2087 if (!i->local) {
2088 /*
2089 * Our peer has sent a conflicting remote request; this
2090 * should not happen in a two-node setup. Wait for the
2091 * earlier peer request to complete.
2092 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002093 err = drbd_wait_misc(device, i);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002094 if (err)
2095 goto out;
2096 goto repeat;
2097 }
2098
2099 equal = i->sector == sector && i->size == size;
2100 if (resolve_conflicts) {
2101 /*
2102 * If the peer request is fully contained within the
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002103 * overlapping request, it can be considered overwritten
2104 * and thus superseded; otherwise, it will be retried
2105 * once all overlapping requests have completed.
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002106 */
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002107 bool superseded = i->sector <= sector && i->sector +
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002108 (i->size >> 9) >= sector + (size >> 9);
2109
2110 if (!equal)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002111 drbd_alert(device, "Concurrent writes detected: "
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002112 "local=%llus +%u, remote=%llus +%u, "
2113 "assuming %s came first\n",
2114 (unsigned long long)i->sector, i->size,
2115 (unsigned long long)sector, size,
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002116 superseded ? "local" : "remote");
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002117
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002118 inc_unacked(device);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002119 peer_req->w.cb = superseded ? e_send_superseded :
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002120 e_send_retry_write;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002121 list_add_tail(&peer_req->w.list, &device->done_ee);
Andreas Gruenbachere33b32d2011-08-30 15:38:04 +02002122 wake_asender(connection);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002123
2124 err = -ENOENT;
2125 goto out;
2126 } else {
2127 struct drbd_request *req =
2128 container_of(i, struct drbd_request, i);
2129
2130 if (!equal)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002131 drbd_alert(device, "Concurrent writes detected: "
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002132 "local=%llus +%u, remote=%llus +%u\n",
2133 (unsigned long long)i->sector, i->size,
2134 (unsigned long long)sector, size);
2135
2136 if (req->rq_state & RQ_LOCAL_PENDING ||
2137 !(req->rq_state & RQ_POSTPONED)) {
2138 /*
2139 * Wait for the node with the discard flag to
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002140 * decide if this request has been superseded
2141 * or needs to be retried.
2142 * Requests that have been superseded will
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002143 * disappear from the write_requests tree.
2144 *
2145 * In addition, wait for the conflicting
2146 * request to finish locally before submitting
2147 * the conflicting peer request.
2148 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002149 err = drbd_wait_misc(device, &req->i);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002150 if (err) {
Andreas Gruenbachere33b32d2011-08-30 15:38:04 +02002151 _conn_request_state(connection, NS(conn, C_TIMEOUT), CS_HARD);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002152 fail_postponed_requests(device, sector, size);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002153 goto out;
2154 }
2155 goto repeat;
2156 }
2157 /*
2158 * Remember to restart the conflicting requests after
2159 * the new peer request has completed.
2160 */
2161 peer_req->flags |= EE_RESTART_REQUESTS;
2162 }
2163 }
2164 err = 0;
2165
2166 out:
2167 if (err)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002168 drbd_remove_epoch_entry_interval(device, peer_req);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002169 return err;
2170}
2171
Philipp Reisnerb411b362009-09-25 16:07:19 -07002172/* mirrored write */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002173static int receive_Data(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002174{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002175 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002176 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002177 sector_t sector;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002178 struct drbd_peer_request *peer_req;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02002179 struct p_data *p = pi->data;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002180 u32 peer_seq = be32_to_cpu(p->seq_num);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002181 int rw = WRITE;
2182 u32 dp_flags;
Philipp Reisner302bdea2011-04-21 11:36:49 +02002183 int err, tp;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002184
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002185 peer_device = conn_peer_device(connection, pi->vnr);
2186 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002187 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002188 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002189
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002190 if (!get_ldev(device)) {
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002191 int err2;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002192
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002193 err = wait_for_and_update_peer_seq(peer_device, peer_seq);
2194 drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002195 atomic_inc(&connection->current_epoch->epoch_size);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002196 err2 = drbd_drain_block(peer_device, pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002197 if (!err)
2198 err = err2;
2199 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002200 }
2201
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01002202 /*
2203 * Corresponding put_ldev done either below (on various errors), or in
2204 * drbd_peer_request_endio, if we successfully submit the data at the
2205 * end of this function.
2206 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002207
2208 sector = be64_to_cpu(p->sector);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002209 peer_req = read_in_block(peer_device, p->block_id, sector, pi->size);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002210 if (!peer_req) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002211 put_ldev(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002212 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002213 }
2214
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002215 peer_req->w.cb = e_end_block;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002216
Lars Ellenberg688593c2010-11-17 22:25:03 +01002217 dp_flags = be32_to_cpu(p->dp_flags);
Andreas Gruenbacher81f0ffd2011-08-30 16:22:33 +02002218 rw |= wire_flags_to_bio(dp_flags);
Lars Ellenberg81a35372012-07-30 09:00:54 +02002219 if (peer_req->pages == NULL) {
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02002220 D_ASSERT(device, peer_req->i.size == 0);
2221 D_ASSERT(device, dp_flags & DP_FLUSH);
Lars Ellenberga73ff322012-06-25 19:15:38 +02002222 }
Lars Ellenberg688593c2010-11-17 22:25:03 +01002223
2224 if (dp_flags & DP_MAY_SET_IN_SYNC)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002225 peer_req->flags |= EE_MAY_SET_IN_SYNC;
Lars Ellenberg688593c2010-11-17 22:25:03 +01002226
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002227 spin_lock(&connection->epoch_lock);
2228 peer_req->epoch = connection->current_epoch;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002229 atomic_inc(&peer_req->epoch->epoch_size);
2230 atomic_inc(&peer_req->epoch->active);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002231 spin_unlock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002232
Philipp Reisner302bdea2011-04-21 11:36:49 +02002233 rcu_read_lock();
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002234 tp = rcu_dereference(peer_device->connection->net_conf)->two_primaries;
Philipp Reisner302bdea2011-04-21 11:36:49 +02002235 rcu_read_unlock();
2236 if (tp) {
2237 peer_req->flags |= EE_IN_INTERVAL_TREE;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002238 err = wait_for_and_update_peer_seq(peer_device, peer_seq);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002239 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002240 goto out_interrupted;
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002241 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002242 err = handle_write_conflicts(device, peer_req);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002243 if (err) {
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002244 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002245 if (err == -ENOENT) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002246 put_ldev(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002247 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002248 }
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002249 goto out_interrupted;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002250 }
Philipp Reisnerb874d232013-10-23 10:59:16 +02002251 } else {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002252 update_peer_seq(peer_device, peer_seq);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002253 spin_lock_irq(&device->resource->req_lock);
Philipp Reisnerb874d232013-10-23 10:59:16 +02002254 }
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002255 list_add(&peer_req->w.list, &device->active_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002256 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002257
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002258 if (device->state.conn == C_SYNC_TARGET)
2259 wait_event(device->ee_wait, !overlapping_resync_write(device, peer_req));
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01002260
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002261 if (peer_device->connection->agreed_pro_version < 100) {
Philipp Reisner44ed1672011-04-19 17:10:19 +02002262 rcu_read_lock();
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002263 switch (rcu_dereference(peer_device->connection->net_conf)->wire_protocol) {
Philipp Reisner303d1442011-04-13 16:24:47 -07002264 case DRBD_PROT_C:
2265 dp_flags |= DP_SEND_WRITE_ACK;
2266 break;
2267 case DRBD_PROT_B:
2268 dp_flags |= DP_SEND_RECEIVE_ACK;
2269 break;
2270 }
Philipp Reisner44ed1672011-04-19 17:10:19 +02002271 rcu_read_unlock();
Philipp Reisner303d1442011-04-13 16:24:47 -07002272 }
2273
2274 if (dp_flags & DP_SEND_WRITE_ACK) {
2275 peer_req->flags |= EE_SEND_WRITE_ACK;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002276 inc_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002277 /* corresponding dec_unacked() in e_end_block()
2278 * respective _drbd_clear_done_ee */
Philipp Reisner303d1442011-04-13 16:24:47 -07002279 }
2280
2281 if (dp_flags & DP_SEND_RECEIVE_ACK) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002282 /* I really don't like it that the receiver thread
2283 * sends on the msock, but anyways */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002284 drbd_send_ack(first_peer_device(device), P_RECV_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002285 }
2286
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002287 if (device->state.pdsk < D_INCONSISTENT) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002288 /* In case we have the only disk of the cluster, */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002289 drbd_set_out_of_sync(device, peer_req->i.sector, peer_req->i.size);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002290 peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
2291 peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002292 drbd_al_begin_io(device, &peer_req->i, true);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002293 }
2294
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002295 err = drbd_submit_peer_request(device, peer_req, rw, DRBD_FAULT_DT_WR);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002296 if (!err)
2297 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002298
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002299 /* don't care for the reason here */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002300 drbd_err(device, "submit failed, triggering re-connect\n");
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002301 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002302 list_del(&peer_req->w.list);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002303 drbd_remove_epoch_entry_interval(device, peer_req);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002304 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002305 if (peer_req->flags & EE_CALL_AL_COMPLETE_IO)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002306 drbd_al_complete_io(device, &peer_req->i);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002307
Philipp Reisnerb411b362009-09-25 16:07:19 -07002308out_interrupted:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002309 drbd_may_finish_epoch(connection, peer_req->epoch, EV_PUT + EV_CLEANUP);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002310 put_ldev(device);
2311 drbd_free_peer_req(device, peer_req);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002312 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002313}
2314
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002315/* We may throttle resync, if the lower device seems to be busy,
2316 * and current sync rate is above c_min_rate.
2317 *
2318 * To decide whether or not the lower device is busy, we use a scheme similar
2319 * to MD RAID is_mddev_idle(): if the partition stats reveal "significant"
2320 * (more than 64 sectors) of activity we cannot account for with our own resync
2321 * activity, it obviously is "busy".
2322 *
2323 * The current sync rate used here uses only the most recent two step marks,
2324 * to have a short time average so we can react faster.
2325 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002326int drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002327{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002328 struct gendisk *disk = device->ldev->backing_bdev->bd_contains->bd_disk;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002329 unsigned long db, dt, dbdt;
Philipp Reisnere3555d82010-11-07 15:56:29 +01002330 struct lc_element *tmp;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002331 int curr_events;
2332 int throttle = 0;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002333 unsigned int c_min_rate;
2334
2335 rcu_read_lock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002336 c_min_rate = rcu_dereference(device->ldev->disk_conf)->c_min_rate;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002337 rcu_read_unlock();
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002338
2339 /* feature disabled? */
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002340 if (c_min_rate == 0)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002341 return 0;
2342
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002343 spin_lock_irq(&device->al_lock);
2344 tmp = lc_find(device->resync, BM_SECT_TO_EXT(sector));
Philipp Reisnere3555d82010-11-07 15:56:29 +01002345 if (tmp) {
2346 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
2347 if (test_bit(BME_PRIORITY, &bm_ext->flags)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002348 spin_unlock_irq(&device->al_lock);
Philipp Reisnere3555d82010-11-07 15:56:29 +01002349 return 0;
2350 }
2351 /* Do not slow down if app IO is already waiting for this extent */
2352 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002353 spin_unlock_irq(&device->al_lock);
Philipp Reisnere3555d82010-11-07 15:56:29 +01002354
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002355 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
2356 (int)part_stat_read(&disk->part0, sectors[1]) -
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002357 atomic_read(&device->rs_sect_ev);
Philipp Reisnere3555d82010-11-07 15:56:29 +01002358
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002359 if (!device->rs_last_events || curr_events - device->rs_last_events > 64) {
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002360 unsigned long rs_left;
2361 int i;
2362
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002363 device->rs_last_events = curr_events;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002364
2365 /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
2366 * approx. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002367 i = (device->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
Lars Ellenberg2649f082010-11-05 10:05:47 +01002368
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002369 if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T)
2370 rs_left = device->ov_left;
Lars Ellenberg2649f082010-11-05 10:05:47 +01002371 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002372 rs_left = drbd_bm_total_weight(device) - device->rs_failed;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002373
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002374 dt = ((long)jiffies - (long)device->rs_mark_time[i]) / HZ;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002375 if (!dt)
2376 dt++;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002377 db = device->rs_mark_left[i] - rs_left;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002378 dbdt = Bit2KB(db/dt);
2379
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002380 if (dbdt > c_min_rate)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002381 throttle = 1;
2382 }
2383 return throttle;
2384}
2385
2386
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002387static int receive_DataRequest(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002388{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002389 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002390 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002391 sector_t sector;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002392 sector_t capacity;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002393 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002394 struct digest_info *di = NULL;
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002395 int size, verb;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002396 unsigned int fault_type;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02002397 struct p_block_req *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002398
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002399 peer_device = conn_peer_device(connection, pi->vnr);
2400 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002401 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002402 device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002403 capacity = drbd_get_capacity(device->this_bdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002404
2405 sector = be64_to_cpu(p->sector);
2406 size = be32_to_cpu(p->blksize);
2407
Andreas Gruenbacherc670a392011-02-21 12:41:39 +01002408 if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002409 drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
Philipp Reisnerb411b362009-09-25 16:07:19 -07002410 (unsigned long long)sector, size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002411 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002412 }
2413 if (sector + (size>>9) > capacity) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002414 drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
Philipp Reisnerb411b362009-09-25 16:07:19 -07002415 (unsigned long long)sector, size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002416 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002417 }
2418
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002419 if (!get_ldev_if_state(device, D_UP_TO_DATE)) {
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002420 verb = 1;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002421 switch (pi->cmd) {
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002422 case P_DATA_REQUEST:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002423 drbd_send_ack_rp(peer_device, P_NEG_DREPLY, p);
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002424 break;
2425 case P_RS_DATA_REQUEST:
2426 case P_CSUM_RS_REQUEST:
2427 case P_OV_REQUEST:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002428 drbd_send_ack_rp(peer_device, P_NEG_RS_DREPLY , p);
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002429 break;
2430 case P_OV_REPLY:
2431 verb = 0;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002432 dec_rs_pending(device);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002433 drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size, ID_IN_SYNC);
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002434 break;
2435 default:
Andreas Gruenbacher49ba9b12011-03-25 00:35:45 +01002436 BUG();
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002437 }
2438 if (verb && __ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002439 drbd_err(device, "Can not satisfy peer's read request, "
Philipp Reisnerb411b362009-09-25 16:07:19 -07002440 "no local data.\n");
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002441
Lars Ellenberga821cc42010-09-06 12:31:37 +02002442 /* drain possibly payload */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002443 return drbd_drain_block(peer_device, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002444 }
2445
2446 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
2447 * "criss-cross" setup, that might cause write-out on some other DRBD,
2448 * which in turn might block on the other node at this very place. */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002449 peer_req = drbd_alloc_peer_req(peer_device, p->block_id, sector, size, GFP_NOIO);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002450 if (!peer_req) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002451 put_ldev(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002452 return -ENOMEM;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002453 }
2454
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002455 switch (pi->cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002456 case P_DATA_REQUEST:
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002457 peer_req->w.cb = w_e_end_data_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002458 fault_type = DRBD_FAULT_DT_RD;
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002459 /* application IO, don't drbd_rs_begin_io */
2460 goto submit;
2461
Philipp Reisnerb411b362009-09-25 16:07:19 -07002462 case P_RS_DATA_REQUEST:
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002463 peer_req->w.cb = w_e_end_rsdata_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002464 fault_type = DRBD_FAULT_RS_RD;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002465 /* used in the sector offset progress display */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002466 device->bm_resync_fo = BM_SECT_TO_BIT(sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002467 break;
2468
2469 case P_OV_REPLY:
2470 case P_CSUM_RS_REQUEST:
2471 fault_type = DRBD_FAULT_RS_RD;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002472 di = kmalloc(sizeof(*di) + pi->size, GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002473 if (!di)
2474 goto out_free_e;
2475
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002476 di->digest_size = pi->size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002477 di->digest = (((char *)di)+sizeof(struct digest_info));
2478
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002479 peer_req->digest = di;
2480 peer_req->flags |= EE_HAS_DIGEST;
Lars Ellenbergc36c3ce2010-08-11 20:42:55 +02002481
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002482 if (drbd_recv_all(peer_device->connection, di->digest, pi->size))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002483 goto out_free_e;
2484
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002485 if (pi->cmd == P_CSUM_RS_REQUEST) {
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002486 D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002487 peer_req->w.cb = w_e_end_csum_rs_req;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002488 /* used in the sector offset progress display */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002489 device->bm_resync_fo = BM_SECT_TO_BIT(sector);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002490 } else if (pi->cmd == P_OV_REPLY) {
Lars Ellenberg2649f082010-11-05 10:05:47 +01002491 /* track progress, we may need to throttle */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002492 atomic_add(size >> 9, &device->rs_sect_in);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002493 peer_req->w.cb = w_e_end_ov_reply;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002494 dec_rs_pending(device);
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002495 /* drbd_rs_begin_io done when we sent this request,
2496 * but accounting still needs to be done. */
2497 goto submit_for_resync;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002498 }
2499 break;
2500
2501 case P_OV_REQUEST:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002502 if (device->ov_start_sector == ~(sector_t)0 &&
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002503 peer_device->connection->agreed_pro_version >= 90) {
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002504 unsigned long now = jiffies;
2505 int i;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002506 device->ov_start_sector = sector;
2507 device->ov_position = sector;
2508 device->ov_left = drbd_bm_bits(device) - BM_SECT_TO_BIT(sector);
2509 device->rs_total = device->ov_left;
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002510 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002511 device->rs_mark_left[i] = device->ov_left;
2512 device->rs_mark_time[i] = now;
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002513 }
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002514 drbd_info(device, "Online Verify start sector: %llu\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07002515 (unsigned long long)sector);
2516 }
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002517 peer_req->w.cb = w_e_end_ov_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002518 fault_type = DRBD_FAULT_RS_RD;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002519 break;
2520
Philipp Reisnerb411b362009-09-25 16:07:19 -07002521 default:
Andreas Gruenbacher49ba9b12011-03-25 00:35:45 +01002522 BUG();
Philipp Reisnerb411b362009-09-25 16:07:19 -07002523 }
2524
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002525 /* Throttle, drbd_rs_begin_io and submit should become asynchronous
2526 * wrt the receiver, but it is not as straightforward as it may seem.
2527 * Various places in the resync start and stop logic assume resync
2528 * requests are processed in order, requeuing this on the worker thread
2529 * introduces a bunch of new code for synchronization between threads.
2530 *
2531 * Unlimited throttling before drbd_rs_begin_io may stall the resync
2532 * "forever", throttling after drbd_rs_begin_io will lock that extent
2533 * for application writes for the same time. For now, just throttle
2534 * here, where the rest of the code expects the receiver to sleep for
2535 * a while, anyways.
2536 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002537
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002538 /* Throttle before drbd_rs_begin_io, as that locks out application IO;
2539 * this defers syncer requests for some time, before letting at least
2540 * on request through. The resync controller on the receiving side
2541 * will adapt to the incoming rate accordingly.
2542 *
2543 * We cannot throttle here if remote is Primary/SyncTarget:
2544 * we would also throttle its application reads.
2545 * In that case, throttling is done on the SyncTarget only.
2546 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002547 if (device->state.peer != R_PRIMARY && drbd_rs_should_slow_down(device, sector))
Philipp Reisnere3555d82010-11-07 15:56:29 +01002548 schedule_timeout_uninterruptible(HZ/10);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002549 if (drbd_rs_begin_io(device, sector))
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002550 goto out_free_e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002551
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002552submit_for_resync:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002553 atomic_add(size >> 9, &device->rs_sect_ev);
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002554
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002555submit:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002556 inc_unacked(device);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002557 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002558 list_add_tail(&peer_req->w.list, &device->read_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002559 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002560
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002561 if (drbd_submit_peer_request(device, peer_req, READ, fault_type) == 0)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002562 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002563
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002564 /* don't care for the reason here */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002565 drbd_err(device, "submit failed, triggering re-connect\n");
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002566 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002567 list_del(&peer_req->w.list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002568 spin_unlock_irq(&device->resource->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002569 /* no drbd_rs_complete_io(), we are dropping the connection anyways */
2570
Philipp Reisnerb411b362009-09-25 16:07:19 -07002571out_free_e:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002572 put_ldev(device);
2573 drbd_free_peer_req(device, peer_req);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002574 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002575}
2576
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002577/**
2578 * drbd_asb_recover_0p - Recover after split-brain with no remaining primaries
2579 */
2580static int drbd_asb_recover_0p(struct drbd_peer_device *peer_device) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002581{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002582 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002583 int self, peer, rv = -100;
2584 unsigned long ch_self, ch_peer;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002585 enum drbd_after_sb_p after_sb_0p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002586
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002587 self = device->ldev->md.uuid[UI_BITMAP] & 1;
2588 peer = device->p_uuid[UI_BITMAP] & 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002589
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002590 ch_peer = device->p_uuid[UI_SIZE];
2591 ch_self = device->comm_bm_set;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002592
Philipp Reisner44ed1672011-04-19 17:10:19 +02002593 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002594 after_sb_0p = rcu_dereference(peer_device->connection->net_conf)->after_sb_0p;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002595 rcu_read_unlock();
2596 switch (after_sb_0p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002597 case ASB_CONSENSUS:
2598 case ASB_DISCARD_SECONDARY:
2599 case ASB_CALL_HELPER:
Philipp Reisner44ed1672011-04-19 17:10:19 +02002600 case ASB_VIOLENTLY:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002601 drbd_err(device, "Configuration error.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002602 break;
2603 case ASB_DISCONNECT:
2604 break;
2605 case ASB_DISCARD_YOUNGER_PRI:
2606 if (self == 0 && peer == 1) {
2607 rv = -1;
2608 break;
2609 }
2610 if (self == 1 && peer == 0) {
2611 rv = 1;
2612 break;
2613 }
2614 /* Else fall through to one of the other strategies... */
2615 case ASB_DISCARD_OLDER_PRI:
2616 if (self == 0 && peer == 1) {
2617 rv = 1;
2618 break;
2619 }
2620 if (self == 1 && peer == 0) {
2621 rv = -1;
2622 break;
2623 }
2624 /* Else fall through to one of the other strategies... */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002625 drbd_warn(device, "Discard younger/older primary did not find a decision\n"
Philipp Reisnerb411b362009-09-25 16:07:19 -07002626 "Using discard-least-changes instead\n");
2627 case ASB_DISCARD_ZERO_CHG:
2628 if (ch_peer == 0 && ch_self == 0) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002629 rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002630 ? -1 : 1;
2631 break;
2632 } else {
2633 if (ch_peer == 0) { rv = 1; break; }
2634 if (ch_self == 0) { rv = -1; break; }
2635 }
Philipp Reisner44ed1672011-04-19 17:10:19 +02002636 if (after_sb_0p == ASB_DISCARD_ZERO_CHG)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002637 break;
2638 case ASB_DISCARD_LEAST_CHG:
2639 if (ch_self < ch_peer)
2640 rv = -1;
2641 else if (ch_self > ch_peer)
2642 rv = 1;
2643 else /* ( ch_self == ch_peer ) */
2644 /* Well, then use something else. */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002645 rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002646 ? -1 : 1;
2647 break;
2648 case ASB_DISCARD_LOCAL:
2649 rv = -1;
2650 break;
2651 case ASB_DISCARD_REMOTE:
2652 rv = 1;
2653 }
2654
2655 return rv;
2656}
2657
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002658/**
2659 * drbd_asb_recover_1p - Recover after split-brain with one remaining primary
2660 */
2661static int drbd_asb_recover_1p(struct drbd_peer_device *peer_device) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002662{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002663 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002664 int hg, rv = -100;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002665 enum drbd_after_sb_p after_sb_1p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002666
Philipp Reisner44ed1672011-04-19 17:10:19 +02002667 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002668 after_sb_1p = rcu_dereference(peer_device->connection->net_conf)->after_sb_1p;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002669 rcu_read_unlock();
2670 switch (after_sb_1p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002671 case ASB_DISCARD_YOUNGER_PRI:
2672 case ASB_DISCARD_OLDER_PRI:
2673 case ASB_DISCARD_LEAST_CHG:
2674 case ASB_DISCARD_LOCAL:
2675 case ASB_DISCARD_REMOTE:
Philipp Reisner44ed1672011-04-19 17:10:19 +02002676 case ASB_DISCARD_ZERO_CHG:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002677 drbd_err(device, "Configuration error.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002678 break;
2679 case ASB_DISCONNECT:
2680 break;
2681 case ASB_CONSENSUS:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002682 hg = drbd_asb_recover_0p(peer_device);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002683 if (hg == -1 && device->state.role == R_SECONDARY)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002684 rv = hg;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002685 if (hg == 1 && device->state.role == R_PRIMARY)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002686 rv = hg;
2687 break;
2688 case ASB_VIOLENTLY:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002689 rv = drbd_asb_recover_0p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002690 break;
2691 case ASB_DISCARD_SECONDARY:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002692 return device->state.role == R_PRIMARY ? 1 : -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002693 case ASB_CALL_HELPER:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002694 hg = drbd_asb_recover_0p(peer_device);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002695 if (hg == -1 && device->state.role == R_PRIMARY) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002696 enum drbd_state_rv rv2;
2697
Philipp Reisnerb411b362009-09-25 16:07:19 -07002698 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2699 * we might be here in C_WF_REPORT_PARAMS which is transient.
2700 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002701 rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002702 if (rv2 != SS_SUCCESS) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002703 drbd_khelper(device, "pri-lost-after-sb");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002704 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002705 drbd_warn(device, "Successfully gave up primary role.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002706 rv = hg;
2707 }
2708 } else
2709 rv = hg;
2710 }
2711
2712 return rv;
2713}
2714
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002715/**
2716 * drbd_asb_recover_2p - Recover after split-brain with two remaining primaries
2717 */
2718static int drbd_asb_recover_2p(struct drbd_peer_device *peer_device) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002719{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002720 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002721 int hg, rv = -100;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002722 enum drbd_after_sb_p after_sb_2p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002723
Philipp Reisner44ed1672011-04-19 17:10:19 +02002724 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002725 after_sb_2p = rcu_dereference(peer_device->connection->net_conf)->after_sb_2p;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002726 rcu_read_unlock();
2727 switch (after_sb_2p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002728 case ASB_DISCARD_YOUNGER_PRI:
2729 case ASB_DISCARD_OLDER_PRI:
2730 case ASB_DISCARD_LEAST_CHG:
2731 case ASB_DISCARD_LOCAL:
2732 case ASB_DISCARD_REMOTE:
2733 case ASB_CONSENSUS:
2734 case ASB_DISCARD_SECONDARY:
Philipp Reisner44ed1672011-04-19 17:10:19 +02002735 case ASB_DISCARD_ZERO_CHG:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002736 drbd_err(device, "Configuration error.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002737 break;
2738 case ASB_VIOLENTLY:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002739 rv = drbd_asb_recover_0p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002740 break;
2741 case ASB_DISCONNECT:
2742 break;
2743 case ASB_CALL_HELPER:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002744 hg = drbd_asb_recover_0p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002745 if (hg == -1) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002746 enum drbd_state_rv rv2;
2747
Philipp Reisnerb411b362009-09-25 16:07:19 -07002748 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2749 * we might be here in C_WF_REPORT_PARAMS which is transient.
2750 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002751 rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002752 if (rv2 != SS_SUCCESS) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002753 drbd_khelper(device, "pri-lost-after-sb");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002754 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002755 drbd_warn(device, "Successfully gave up primary role.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002756 rv = hg;
2757 }
2758 } else
2759 rv = hg;
2760 }
2761
2762 return rv;
2763}
2764
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002765static void drbd_uuid_dump(struct drbd_device *device, char *text, u64 *uuid,
Philipp Reisnerb411b362009-09-25 16:07:19 -07002766 u64 bits, u64 flags)
2767{
2768 if (!uuid) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002769 drbd_info(device, "%s uuid info vanished while I was looking!\n", text);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002770 return;
2771 }
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002772 drbd_info(device, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07002773 text,
2774 (unsigned long long)uuid[UI_CURRENT],
2775 (unsigned long long)uuid[UI_BITMAP],
2776 (unsigned long long)uuid[UI_HISTORY_START],
2777 (unsigned long long)uuid[UI_HISTORY_END],
2778 (unsigned long long)bits,
2779 (unsigned long long)flags);
2780}
2781
2782/*
2783 100 after split brain try auto recover
2784 2 C_SYNC_SOURCE set BitMap
2785 1 C_SYNC_SOURCE use BitMap
2786 0 no Sync
2787 -1 C_SYNC_TARGET use BitMap
2788 -2 C_SYNC_TARGET set BitMap
2789 -100 after split brain, disconnect
2790-1000 unrelated data
Philipp Reisner4a23f262011-01-11 17:42:17 +01002791-1091 requires proto 91
2792-1096 requires proto 96
Philipp Reisnerb411b362009-09-25 16:07:19 -07002793 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002794static int drbd_uuid_compare(struct drbd_device *device, int *rule_nr) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002795{
2796 u64 self, peer;
2797 int i, j;
2798
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002799 self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2800 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002801
2802 *rule_nr = 10;
2803 if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED)
2804 return 0;
2805
2806 *rule_nr = 20;
2807 if ((self == UUID_JUST_CREATED || self == (u64)0) &&
2808 peer != UUID_JUST_CREATED)
2809 return -2;
2810
2811 *rule_nr = 30;
2812 if (self != UUID_JUST_CREATED &&
2813 (peer == UUID_JUST_CREATED || peer == (u64)0))
2814 return 2;
2815
2816 if (self == peer) {
2817 int rct, dc; /* roles at crash time */
2818
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002819 if (device->p_uuid[UI_BITMAP] == (u64)0 && device->ldev->md.uuid[UI_BITMAP] != (u64)0) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002820
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002821 if (first_peer_device(device)->connection->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002822 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002823
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002824 if ((device->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
2825 (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002826 drbd_info(device, "was SyncSource, missed the resync finished event, corrected myself:\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002827 drbd_uuid_move_history(device);
2828 device->ldev->md.uuid[UI_HISTORY_START] = device->ldev->md.uuid[UI_BITMAP];
2829 device->ldev->md.uuid[UI_BITMAP] = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002830
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002831 drbd_uuid_dump(device, "self", device->ldev->md.uuid,
2832 device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002833 *rule_nr = 34;
2834 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002835 drbd_info(device, "was SyncSource (peer failed to write sync_uuid)\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002836 *rule_nr = 36;
2837 }
2838
2839 return 1;
2840 }
2841
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002842 if (device->ldev->md.uuid[UI_BITMAP] == (u64)0 && device->p_uuid[UI_BITMAP] != (u64)0) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002843
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002844 if (first_peer_device(device)->connection->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002845 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002846
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002847 if ((device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_BITMAP] & ~((u64)1)) &&
2848 (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002849 drbd_info(device, "was SyncTarget, peer missed the resync finished event, corrected peer:\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002850
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002851 device->p_uuid[UI_HISTORY_START + 1] = device->p_uuid[UI_HISTORY_START];
2852 device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_BITMAP];
2853 device->p_uuid[UI_BITMAP] = 0UL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002854
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002855 drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002856 *rule_nr = 35;
2857 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002858 drbd_info(device, "was SyncTarget (failed to write sync_uuid)\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002859 *rule_nr = 37;
2860 }
2861
2862 return -1;
2863 }
2864
2865 /* Common power [off|failure] */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002866 rct = (test_bit(CRASHED_PRIMARY, &device->flags) ? 1 : 0) +
2867 (device->p_uuid[UI_FLAGS] & 2);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002868 /* lowest bit is set when we were primary,
2869 * next bit (weight 2) is set when peer was primary */
2870 *rule_nr = 40;
2871
2872 switch (rct) {
2873 case 0: /* !self_pri && !peer_pri */ return 0;
2874 case 1: /* self_pri && !peer_pri */ return 1;
2875 case 2: /* !self_pri && peer_pri */ return -1;
2876 case 3: /* self_pri && peer_pri */
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002877 dc = test_bit(RESOLVE_CONFLICTS, &first_peer_device(device)->connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002878 return dc ? -1 : 1;
2879 }
2880 }
2881
2882 *rule_nr = 50;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002883 peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002884 if (self == peer)
2885 return -1;
2886
2887 *rule_nr = 51;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002888 peer = device->p_uuid[UI_HISTORY_START] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002889 if (self == peer) {
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002890 if (first_peer_device(device)->connection->agreed_pro_version < 96 ?
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002891 (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
2892 (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
2893 peer + UUID_NEW_BM_OFFSET == (device->p_uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002894 /* The last P_SYNC_UUID did not get though. Undo the last start of
2895 resync as sync source modifications of the peer's UUIDs. */
2896
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002897 if (first_peer_device(device)->connection->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002898 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002899
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002900 device->p_uuid[UI_BITMAP] = device->p_uuid[UI_HISTORY_START];
2901 device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_HISTORY_START + 1];
Philipp Reisner4a23f262011-01-11 17:42:17 +01002902
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002903 drbd_info(device, "Lost last syncUUID packet, corrected:\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002904 drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
Philipp Reisner4a23f262011-01-11 17:42:17 +01002905
Philipp Reisnerb411b362009-09-25 16:07:19 -07002906 return -1;
2907 }
2908 }
2909
2910 *rule_nr = 60;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002911 self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002912 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002913 peer = device->p_uuid[i] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002914 if (self == peer)
2915 return -2;
2916 }
2917
2918 *rule_nr = 70;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002919 self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2920 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002921 if (self == peer)
2922 return 1;
2923
2924 *rule_nr = 71;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002925 self = device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002926 if (self == peer) {
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002927 if (first_peer_device(device)->connection->agreed_pro_version < 96 ?
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002928 (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
2929 (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
2930 self + UUID_NEW_BM_OFFSET == (device->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002931 /* The last P_SYNC_UUID did not get though. Undo the last start of
2932 resync as sync source modifications of our UUIDs. */
2933
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002934 if (first_peer_device(device)->connection->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002935 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002936
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002937 __drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_HISTORY_START]);
2938 __drbd_uuid_set(device, UI_HISTORY_START, device->ldev->md.uuid[UI_HISTORY_START + 1]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002939
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002940 drbd_info(device, "Last syncUUID did not get through, corrected:\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002941 drbd_uuid_dump(device, "self", device->ldev->md.uuid,
2942 device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002943
2944 return 1;
2945 }
2946 }
2947
2948
2949 *rule_nr = 80;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002950 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002951 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002952 self = device->ldev->md.uuid[i] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002953 if (self == peer)
2954 return 2;
2955 }
2956
2957 *rule_nr = 90;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002958 self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2959 peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002960 if (self == peer && self != ((u64)0))
2961 return 100;
2962
2963 *rule_nr = 100;
2964 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002965 self = device->ldev->md.uuid[i] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002966 for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002967 peer = device->p_uuid[j] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002968 if (self == peer)
2969 return -100;
2970 }
2971 }
2972
2973 return -1000;
2974}
2975
2976/* drbd_sync_handshake() returns the new conn state on success, or
2977 CONN_MASK (-1) on failure.
2978 */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002979static enum drbd_conns drbd_sync_handshake(struct drbd_peer_device *peer_device,
2980 enum drbd_role peer_role,
Philipp Reisnerb411b362009-09-25 16:07:19 -07002981 enum drbd_disk_state peer_disk) __must_hold(local)
2982{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002983 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002984 enum drbd_conns rv = C_MASK;
2985 enum drbd_disk_state mydisk;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002986 struct net_conf *nc;
Andreas Gruenbacher6dff2902011-06-28 14:18:12 +02002987 int hg, rule_nr, rr_conflict, tentative;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002988
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002989 mydisk = device->state.disk;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002990 if (mydisk == D_NEGOTIATING)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002991 mydisk = device->new_state_tmp.disk;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002992
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002993 drbd_info(device, "drbd_sync_handshake:\n");
Philipp Reisner9f2247b2012-08-16 14:25:58 +02002994
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002995 spin_lock_irq(&device->ldev->md.uuid_lock);
2996 drbd_uuid_dump(device, "self", device->ldev->md.uuid, device->comm_bm_set, 0);
2997 drbd_uuid_dump(device, "peer", device->p_uuid,
2998 device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002999
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003000 hg = drbd_uuid_compare(device, &rule_nr);
3001 spin_unlock_irq(&device->ldev->md.uuid_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003002
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003003 drbd_info(device, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003004
3005 if (hg == -1000) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003006 drbd_alert(device, "Unrelated data, aborting!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003007 return C_MASK;
3008 }
Philipp Reisner4a23f262011-01-11 17:42:17 +01003009 if (hg < -1000) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003010 drbd_alert(device, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003011 return C_MASK;
3012 }
3013
3014 if ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) ||
3015 (peer_disk == D_INCONSISTENT && mydisk > D_INCONSISTENT)) {
3016 int f = (hg == -100) || abs(hg) == 2;
3017 hg = mydisk > D_INCONSISTENT ? 1 : -1;
3018 if (f)
3019 hg = hg*2;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003020 drbd_info(device, "Becoming sync %s due to disk states.\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07003021 hg > 0 ? "source" : "target");
3022 }
3023
Adam Gandelman3a11a482010-04-08 16:48:23 -07003024 if (abs(hg) == 100)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003025 drbd_khelper(device, "initial-split-brain");
Adam Gandelman3a11a482010-04-08 16:48:23 -07003026
Philipp Reisner44ed1672011-04-19 17:10:19 +02003027 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003028 nc = rcu_dereference(peer_device->connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +02003029
3030 if (hg == 100 || (hg == -100 && nc->always_asbp)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003031 int pcount = (device->state.role == R_PRIMARY)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003032 + (peer_role == R_PRIMARY);
3033 int forced = (hg == -100);
3034
3035 switch (pcount) {
3036 case 0:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003037 hg = drbd_asb_recover_0p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003038 break;
3039 case 1:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003040 hg = drbd_asb_recover_1p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003041 break;
3042 case 2:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003043 hg = drbd_asb_recover_2p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003044 break;
3045 }
3046 if (abs(hg) < 100) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003047 drbd_warn(device, "Split-Brain detected, %d primaries, "
Philipp Reisnerb411b362009-09-25 16:07:19 -07003048 "automatically solved. Sync from %s node\n",
3049 pcount, (hg < 0) ? "peer" : "this");
3050 if (forced) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003051 drbd_warn(device, "Doing a full sync, since"
Philipp Reisnerb411b362009-09-25 16:07:19 -07003052 " UUIDs where ambiguous.\n");
3053 hg = hg*2;
3054 }
3055 }
3056 }
3057
3058 if (hg == -100) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003059 if (test_bit(DISCARD_MY_DATA, &device->flags) && !(device->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003060 hg = -1;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003061 if (!test_bit(DISCARD_MY_DATA, &device->flags) && (device->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003062 hg = 1;
3063
3064 if (abs(hg) < 100)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003065 drbd_warn(device, "Split-Brain detected, manually solved. "
Philipp Reisnerb411b362009-09-25 16:07:19 -07003066 "Sync from %s node\n",
3067 (hg < 0) ? "peer" : "this");
3068 }
Philipp Reisner44ed1672011-04-19 17:10:19 +02003069 rr_conflict = nc->rr_conflict;
Andreas Gruenbacher6dff2902011-06-28 14:18:12 +02003070 tentative = nc->tentative;
Philipp Reisner44ed1672011-04-19 17:10:19 +02003071 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07003072
3073 if (hg == -100) {
Lars Ellenberg580b9762010-02-26 23:15:23 +01003074 /* FIXME this log message is not correct if we end up here
3075 * after an attempted attach on a diskless node.
3076 * We just refuse to attach -- well, we drop the "connection"
3077 * to that disk, in a way... */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003078 drbd_alert(device, "Split-Brain detected but unresolved, dropping connection!\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003079 drbd_khelper(device, "split-brain");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003080 return C_MASK;
3081 }
3082
3083 if (hg > 0 && mydisk <= D_INCONSISTENT) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003084 drbd_err(device, "I shall become SyncSource, but I am inconsistent!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003085 return C_MASK;
3086 }
3087
3088 if (hg < 0 && /* by intention we do not use mydisk here. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003089 device->state.role == R_PRIMARY && device->state.disk >= D_CONSISTENT) {
Philipp Reisner44ed1672011-04-19 17:10:19 +02003090 switch (rr_conflict) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003091 case ASB_CALL_HELPER:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003092 drbd_khelper(device, "pri-lost");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003093 /* fall through */
3094 case ASB_DISCONNECT:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003095 drbd_err(device, "I shall become SyncTarget, but I am primary!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003096 return C_MASK;
3097 case ASB_VIOLENTLY:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003098 drbd_warn(device, "Becoming SyncTarget, violating the stable-data"
Philipp Reisnerb411b362009-09-25 16:07:19 -07003099 "assumption\n");
3100 }
3101 }
3102
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003103 if (tentative || test_bit(CONN_DRY_RUN, &peer_device->connection->flags)) {
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003104 if (hg == 0)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003105 drbd_info(device, "dry-run connect: No resync, would become Connected immediately.\n");
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003106 else
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003107 drbd_info(device, "dry-run connect: Would become %s, doing a %s resync.",
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003108 drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET),
3109 abs(hg) >= 2 ? "full" : "bit-map based");
3110 return C_MASK;
3111 }
3112
Philipp Reisnerb411b362009-09-25 16:07:19 -07003113 if (abs(hg) >= 2) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003114 drbd_info(device, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003115 if (drbd_bitmap_io(device, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003116 BM_LOCKED_SET_ALLOWED))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003117 return C_MASK;
3118 }
3119
3120 if (hg > 0) { /* become sync source. */
3121 rv = C_WF_BITMAP_S;
3122 } else if (hg < 0) { /* become sync target */
3123 rv = C_WF_BITMAP_T;
3124 } else {
3125 rv = C_CONNECTED;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003126 if (drbd_bm_total_weight(device)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003127 drbd_info(device, "No resync, but %lu bits in bitmap!\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003128 drbd_bm_total_weight(device));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003129 }
3130 }
3131
3132 return rv;
3133}
3134
Philipp Reisnerf179d762011-05-16 17:31:47 +02003135static enum drbd_after_sb_p convert_after_sb(enum drbd_after_sb_p peer)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003136{
3137 /* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */
Philipp Reisnerf179d762011-05-16 17:31:47 +02003138 if (peer == ASB_DISCARD_REMOTE)
3139 return ASB_DISCARD_LOCAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003140
3141 /* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */
Philipp Reisnerf179d762011-05-16 17:31:47 +02003142 if (peer == ASB_DISCARD_LOCAL)
3143 return ASB_DISCARD_REMOTE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003144
3145 /* everything else is valid if they are equal on both sides. */
Philipp Reisnerf179d762011-05-16 17:31:47 +02003146 return peer;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003147}
3148
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003149static int receive_protocol(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003150{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003151 struct p_protocol *p = pi->data;
Philipp Reisner036b17e2011-05-16 17:38:11 +02003152 enum drbd_after_sb_p p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
3153 int p_proto, p_discard_my_data, p_two_primaries, cf;
3154 struct net_conf *nc, *old_net_conf, *new_net_conf = NULL;
3155 char integrity_alg[SHARED_SECRET_MAX] = "";
Andreas Gruenbacheraccdbcc2011-07-15 17:41:09 +02003156 struct crypto_hash *peer_integrity_tfm = NULL;
Philipp Reisner7aca6c72011-05-17 10:12:56 +02003157 void *int_dig_in = NULL, *int_dig_vv = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003158
Philipp Reisnerb411b362009-09-25 16:07:19 -07003159 p_proto = be32_to_cpu(p->protocol);
3160 p_after_sb_0p = be32_to_cpu(p->after_sb_0p);
3161 p_after_sb_1p = be32_to_cpu(p->after_sb_1p);
3162 p_after_sb_2p = be32_to_cpu(p->after_sb_2p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003163 p_two_primaries = be32_to_cpu(p->two_primaries);
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003164 cf = be32_to_cpu(p->conn_flags);
Andreas Gruenbacher6139f602011-05-06 20:00:02 +02003165 p_discard_my_data = cf & CF_DISCARD_MY_DATA;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003166
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003167 if (connection->agreed_pro_version >= 87) {
Andreas Gruenbacher86db0612011-04-28 15:24:18 +02003168 int err;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003169
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02003170 if (pi->size > sizeof(integrity_alg))
Andreas Gruenbacher86db0612011-04-28 15:24:18 +02003171 return -EIO;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003172 err = drbd_recv_all(connection, integrity_alg, pi->size);
Andreas Gruenbacher86db0612011-04-28 15:24:18 +02003173 if (err)
3174 return err;
Philipp Reisner036b17e2011-05-16 17:38:11 +02003175 integrity_alg[SHARED_SECRET_MAX - 1] = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003176 }
3177
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003178 if (pi->cmd != P_PROTOCOL_UPDATE) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003179 clear_bit(CONN_DRY_RUN, &connection->flags);
Philipp Reisner036b17e2011-05-16 17:38:11 +02003180
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003181 if (cf & CF_DRY_RUN)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003182 set_bit(CONN_DRY_RUN, &connection->flags);
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003183
3184 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003185 nc = rcu_dereference(connection->net_conf);
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003186
3187 if (p_proto != nc->wire_protocol) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003188 drbd_err(connection, "incompatible %s settings\n", "protocol");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003189 goto disconnect_rcu_unlock;
3190 }
3191
3192 if (convert_after_sb(p_after_sb_0p) != nc->after_sb_0p) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003193 drbd_err(connection, "incompatible %s settings\n", "after-sb-0pri");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003194 goto disconnect_rcu_unlock;
3195 }
3196
3197 if (convert_after_sb(p_after_sb_1p) != nc->after_sb_1p) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003198 drbd_err(connection, "incompatible %s settings\n", "after-sb-1pri");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003199 goto disconnect_rcu_unlock;
3200 }
3201
3202 if (convert_after_sb(p_after_sb_2p) != nc->after_sb_2p) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003203 drbd_err(connection, "incompatible %s settings\n", "after-sb-2pri");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003204 goto disconnect_rcu_unlock;
3205 }
3206
3207 if (p_discard_my_data && nc->discard_my_data) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003208 drbd_err(connection, "incompatible %s settings\n", "discard-my-data");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003209 goto disconnect_rcu_unlock;
3210 }
3211
3212 if (p_two_primaries != nc->two_primaries) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003213 drbd_err(connection, "incompatible %s settings\n", "allow-two-primaries");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003214 goto disconnect_rcu_unlock;
3215 }
3216
3217 if (strcmp(integrity_alg, nc->integrity_alg)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003218 drbd_err(connection, "incompatible %s settings\n", "data-integrity-alg");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003219 goto disconnect_rcu_unlock;
3220 }
3221
3222 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07003223 }
3224
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003225 if (integrity_alg[0]) {
3226 int hash_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003227
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003228 /*
3229 * We can only change the peer data integrity algorithm
3230 * here. Changing our own data integrity algorithm
3231 * requires that we send a P_PROTOCOL_UPDATE packet at
3232 * the same time; otherwise, the peer has no way to
3233 * tell between which packets the algorithm should
3234 * change.
3235 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003236
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003237 peer_integrity_tfm = crypto_alloc_hash(integrity_alg, 0, CRYPTO_ALG_ASYNC);
3238 if (!peer_integrity_tfm) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003239 drbd_err(connection, "peer data-integrity-alg %s not supported\n",
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003240 integrity_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003241 goto disconnect;
3242 }
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003243
3244 hash_size = crypto_hash_digestsize(peer_integrity_tfm);
3245 int_dig_in = kmalloc(hash_size, GFP_KERNEL);
3246 int_dig_vv = kmalloc(hash_size, GFP_KERNEL);
3247 if (!(int_dig_in && int_dig_vv)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003248 drbd_err(connection, "Allocation of buffers for data integrity checking failed\n");
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003249 goto disconnect;
3250 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003251 }
3252
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003253 new_net_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL);
3254 if (!new_net_conf) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003255 drbd_err(connection, "Allocation of new net_conf failed\n");
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003256 goto disconnect;
3257 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003258
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003259 mutex_lock(&connection->data.mutex);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003260 mutex_lock(&connection->resource->conf_update);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003261 old_net_conf = connection->net_conf;
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003262 *new_net_conf = *old_net_conf;
3263
3264 new_net_conf->wire_protocol = p_proto;
3265 new_net_conf->after_sb_0p = convert_after_sb(p_after_sb_0p);
3266 new_net_conf->after_sb_1p = convert_after_sb(p_after_sb_1p);
3267 new_net_conf->after_sb_2p = convert_after_sb(p_after_sb_2p);
3268 new_net_conf->two_primaries = p_two_primaries;
3269
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003270 rcu_assign_pointer(connection->net_conf, new_net_conf);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003271 mutex_unlock(&connection->resource->conf_update);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003272 mutex_unlock(&connection->data.mutex);
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003273
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003274 crypto_free_hash(connection->peer_integrity_tfm);
3275 kfree(connection->int_dig_in);
3276 kfree(connection->int_dig_vv);
3277 connection->peer_integrity_tfm = peer_integrity_tfm;
3278 connection->int_dig_in = int_dig_in;
3279 connection->int_dig_vv = int_dig_vv;
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003280
3281 if (strcmp(old_net_conf->integrity_alg, integrity_alg))
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003282 drbd_info(connection, "peer data-integrity-alg: %s\n",
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003283 integrity_alg[0] ? integrity_alg : "(none)");
3284
3285 synchronize_rcu();
3286 kfree(old_net_conf);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003287 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003288
Philipp Reisner44ed1672011-04-19 17:10:19 +02003289disconnect_rcu_unlock:
3290 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07003291disconnect:
Andreas Gruenbacherb792c352011-07-15 16:48:49 +02003292 crypto_free_hash(peer_integrity_tfm);
Philipp Reisner036b17e2011-05-16 17:38:11 +02003293 kfree(int_dig_in);
3294 kfree(int_dig_vv);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003295 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003296 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003297}
3298
3299/* helper function
3300 * input: alg name, feature name
3301 * return: NULL (alg name was "")
3302 * ERR_PTR(error) if something goes wrong
3303 * or the crypto hash ptr, if it worked out ok. */
Rashika Kheriaf63e6312013-12-19 15:11:09 +05303304static
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003305struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_device *device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003306 const char *alg, const char *name)
3307{
3308 struct crypto_hash *tfm;
3309
3310 if (!alg[0])
3311 return NULL;
3312
3313 tfm = crypto_alloc_hash(alg, 0, CRYPTO_ALG_ASYNC);
3314 if (IS_ERR(tfm)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003315 drbd_err(device, "Can not allocate \"%s\" as %s (reason: %ld)\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07003316 alg, name, PTR_ERR(tfm));
3317 return tfm;
3318 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003319 return tfm;
3320}
3321
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003322static int ignore_remaining_packet(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003323{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003324 void *buffer = connection->data.rbuf;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003325 int size = pi->size;
3326
3327 while (size) {
3328 int s = min_t(int, size, DRBD_SOCKET_BUFFER_SIZE);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003329 s = drbd_recv(connection, buffer, s);
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003330 if (s <= 0) {
3331 if (s < 0)
3332 return s;
3333 break;
3334 }
3335 size -= s;
3336 }
3337 if (size)
3338 return -EIO;
3339 return 0;
3340}
3341
3342/*
3343 * config_unknown_volume - device configuration command for unknown volume
3344 *
3345 * When a device is added to an existing connection, the node on which the
3346 * device is added first will send configuration commands to its peer but the
3347 * peer will not know about the device yet. It will warn and ignore these
3348 * commands. Once the device is added on the second node, the second node will
3349 * send the same device configuration commands, but in the other direction.
3350 *
3351 * (We can also end up here if drbd is misconfigured.)
3352 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003353static int config_unknown_volume(struct drbd_connection *connection, struct packet_info *pi)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003354{
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003355 drbd_warn(connection, "%s packet received for volume %u, which is not configured locally\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02003356 cmdname(pi->cmd), pi->vnr);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003357 return ignore_remaining_packet(connection, pi);
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003358}
3359
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003360static int receive_SyncParam(struct drbd_connection *connection, struct packet_info *pi)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003361{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003362 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003363 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003364 struct p_rs_param_95 *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003365 unsigned int header_size, data_size, exp_max_sz;
3366 struct crypto_hash *verify_tfm = NULL;
3367 struct crypto_hash *csums_tfm = NULL;
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003368 struct net_conf *old_net_conf, *new_net_conf = NULL;
Philipp Reisner813472c2011-05-03 16:47:02 +02003369 struct disk_conf *old_disk_conf = NULL, *new_disk_conf = NULL;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003370 const int apv = connection->agreed_pro_version;
Philipp Reisner813472c2011-05-03 16:47:02 +02003371 struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
Philipp Reisner778f2712010-07-06 11:14:00 +02003372 int fifo_size = 0;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003373 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003374
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003375 peer_device = conn_peer_device(connection, pi->vnr);
3376 if (!peer_device)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003377 return config_unknown_volume(connection, pi);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003378 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003379
3380 exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param)
3381 : apv == 88 ? sizeof(struct p_rs_param)
3382 + SHARED_SECRET_MAX
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003383 : apv <= 94 ? sizeof(struct p_rs_param_89)
3384 : /* apv >= 95 */ sizeof(struct p_rs_param_95);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003385
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003386 if (pi->size > exp_max_sz) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003387 drbd_err(device, "SyncParam packet too long: received %u, expected <= %u bytes\n",
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003388 pi->size, exp_max_sz);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003389 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003390 }
3391
3392 if (apv <= 88) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003393 header_size = sizeof(struct p_rs_param);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003394 data_size = pi->size - header_size;
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003395 } else if (apv <= 94) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003396 header_size = sizeof(struct p_rs_param_89);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003397 data_size = pi->size - header_size;
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02003398 D_ASSERT(device, data_size == 0);
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003399 } else {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003400 header_size = sizeof(struct p_rs_param_95);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003401 data_size = pi->size - header_size;
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02003402 D_ASSERT(device, data_size == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003403 }
3404
3405 /* initialize verify_alg and csums_alg */
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003406 p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003407 memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
3408
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003409 err = drbd_recv_all(peer_device->connection, p, header_size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003410 if (err)
3411 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003412
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003413 mutex_lock(&connection->resource->conf_update);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003414 old_net_conf = peer_device->connection->net_conf;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003415 if (get_ldev(device)) {
Philipp Reisner813472c2011-05-03 16:47:02 +02003416 new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3417 if (!new_disk_conf) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003418 put_ldev(device);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003419 mutex_unlock(&connection->resource->conf_update);
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003420 drbd_err(device, "Allocation of new disk_conf failed\n");
Philipp Reisner813472c2011-05-03 16:47:02 +02003421 return -ENOMEM;
3422 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003423
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003424 old_disk_conf = device->ldev->disk_conf;
Philipp Reisner813472c2011-05-03 16:47:02 +02003425 *new_disk_conf = *old_disk_conf;
3426
Andreas Gruenbacher6394b932011-05-11 14:29:52 +02003427 new_disk_conf->resync_rate = be32_to_cpu(p->resync_rate);
Philipp Reisner813472c2011-05-03 16:47:02 +02003428 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003429
3430 if (apv >= 88) {
3431 if (apv == 88) {
Philipp Reisner5de73822012-03-28 10:17:32 +02003432 if (data_size > SHARED_SECRET_MAX || data_size == 0) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003433 drbd_err(device, "verify-alg of wrong size, "
Philipp Reisner5de73822012-03-28 10:17:32 +02003434 "peer wants %u, accepting only up to %u byte\n",
3435 data_size, SHARED_SECRET_MAX);
Philipp Reisner813472c2011-05-03 16:47:02 +02003436 err = -EIO;
3437 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003438 }
3439
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003440 err = drbd_recv_all(peer_device->connection, p->verify_alg, data_size);
Philipp Reisner813472c2011-05-03 16:47:02 +02003441 if (err)
3442 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003443 /* we expect NUL terminated string */
3444 /* but just in case someone tries to be evil */
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02003445 D_ASSERT(device, p->verify_alg[data_size-1] == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003446 p->verify_alg[data_size-1] = 0;
3447
3448 } else /* apv >= 89 */ {
3449 /* we still expect NUL terminated strings */
3450 /* but just in case someone tries to be evil */
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02003451 D_ASSERT(device, p->verify_alg[SHARED_SECRET_MAX-1] == 0);
3452 D_ASSERT(device, p->csums_alg[SHARED_SECRET_MAX-1] == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003453 p->verify_alg[SHARED_SECRET_MAX-1] = 0;
3454 p->csums_alg[SHARED_SECRET_MAX-1] = 0;
3455 }
3456
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003457 if (strcmp(old_net_conf->verify_alg, p->verify_alg)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003458 if (device->state.conn == C_WF_REPORT_PARAMS) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003459 drbd_err(device, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003460 old_net_conf->verify_alg, p->verify_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003461 goto disconnect;
3462 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003463 verify_tfm = drbd_crypto_alloc_digest_safe(device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003464 p->verify_alg, "verify-alg");
3465 if (IS_ERR(verify_tfm)) {
3466 verify_tfm = NULL;
3467 goto disconnect;
3468 }
3469 }
3470
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003471 if (apv >= 89 && strcmp(old_net_conf->csums_alg, p->csums_alg)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003472 if (device->state.conn == C_WF_REPORT_PARAMS) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003473 drbd_err(device, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003474 old_net_conf->csums_alg, p->csums_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003475 goto disconnect;
3476 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003477 csums_tfm = drbd_crypto_alloc_digest_safe(device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003478 p->csums_alg, "csums-alg");
3479 if (IS_ERR(csums_tfm)) {
3480 csums_tfm = NULL;
3481 goto disconnect;
3482 }
3483 }
3484
Philipp Reisner813472c2011-05-03 16:47:02 +02003485 if (apv > 94 && new_disk_conf) {
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003486 new_disk_conf->c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
3487 new_disk_conf->c_delay_target = be32_to_cpu(p->c_delay_target);
3488 new_disk_conf->c_fill_target = be32_to_cpu(p->c_fill_target);
3489 new_disk_conf->c_max_rate = be32_to_cpu(p->c_max_rate);
Philipp Reisner778f2712010-07-06 11:14:00 +02003490
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003491 fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003492 if (fifo_size != device->rs_plan_s->size) {
Philipp Reisner813472c2011-05-03 16:47:02 +02003493 new_plan = fifo_alloc(fifo_size);
3494 if (!new_plan) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003495 drbd_err(device, "kmalloc of fifo_buffer failed");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003496 put_ldev(device);
Philipp Reisner778f2712010-07-06 11:14:00 +02003497 goto disconnect;
3498 }
3499 }
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003500 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003501
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003502 if (verify_tfm || csums_tfm) {
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003503 new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
3504 if (!new_net_conf) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003505 drbd_err(device, "Allocation of new net_conf failed\n");
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003506 goto disconnect;
3507 }
3508
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003509 *new_net_conf = *old_net_conf;
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003510
3511 if (verify_tfm) {
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003512 strcpy(new_net_conf->verify_alg, p->verify_alg);
3513 new_net_conf->verify_alg_len = strlen(p->verify_alg) + 1;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003514 crypto_free_hash(peer_device->connection->verify_tfm);
3515 peer_device->connection->verify_tfm = verify_tfm;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003516 drbd_info(device, "using verify-alg: \"%s\"\n", p->verify_alg);
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003517 }
3518 if (csums_tfm) {
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003519 strcpy(new_net_conf->csums_alg, p->csums_alg);
3520 new_net_conf->csums_alg_len = strlen(p->csums_alg) + 1;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003521 crypto_free_hash(peer_device->connection->csums_tfm);
3522 peer_device->connection->csums_tfm = csums_tfm;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003523 drbd_info(device, "using csums-alg: \"%s\"\n", p->csums_alg);
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003524 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003525 rcu_assign_pointer(connection->net_conf, new_net_conf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003526 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003527 }
3528
Philipp Reisner813472c2011-05-03 16:47:02 +02003529 if (new_disk_conf) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003530 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
3531 put_ldev(device);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003532 }
Philipp Reisner813472c2011-05-03 16:47:02 +02003533
3534 if (new_plan) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003535 old_plan = device->rs_plan_s;
3536 rcu_assign_pointer(device->rs_plan_s, new_plan);
Philipp Reisner813472c2011-05-03 16:47:02 +02003537 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003538
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003539 mutex_unlock(&connection->resource->conf_update);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003540 synchronize_rcu();
3541 if (new_net_conf)
3542 kfree(old_net_conf);
3543 kfree(old_disk_conf);
Philipp Reisner813472c2011-05-03 16:47:02 +02003544 kfree(old_plan);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003545
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003546 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003547
Philipp Reisner813472c2011-05-03 16:47:02 +02003548reconnect:
3549 if (new_disk_conf) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003550 put_ldev(device);
Philipp Reisner813472c2011-05-03 16:47:02 +02003551 kfree(new_disk_conf);
3552 }
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003553 mutex_unlock(&connection->resource->conf_update);
Philipp Reisner813472c2011-05-03 16:47:02 +02003554 return -EIO;
3555
Philipp Reisnerb411b362009-09-25 16:07:19 -07003556disconnect:
Philipp Reisner813472c2011-05-03 16:47:02 +02003557 kfree(new_plan);
3558 if (new_disk_conf) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003559 put_ldev(device);
Philipp Reisner813472c2011-05-03 16:47:02 +02003560 kfree(new_disk_conf);
3561 }
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003562 mutex_unlock(&connection->resource->conf_update);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003563 /* just for completeness: actually not needed,
3564 * as this is not reached if csums_tfm was ok. */
3565 crypto_free_hash(csums_tfm);
3566 /* but free the verify_tfm again, if csums_tfm did not work out */
3567 crypto_free_hash(verify_tfm);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003568 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003569 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003570}
3571
Philipp Reisnerb411b362009-09-25 16:07:19 -07003572/* warn if the arguments differ by more than 12.5% */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003573static void warn_if_differ_considerably(struct drbd_device *device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003574 const char *s, sector_t a, sector_t b)
3575{
3576 sector_t d;
3577 if (a == 0 || b == 0)
3578 return;
3579 d = (a > b) ? (a - b) : (b - a);
3580 if (d > (a>>3) || d > (b>>3))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003581 drbd_warn(device, "Considerable difference in %s: %llus vs. %llus\n", s,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003582 (unsigned long long)a, (unsigned long long)b);
3583}
3584
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003585static int receive_sizes(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003586{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003587 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003588 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003589 struct p_sizes *p = pi->data;
Philipp Reisnere96c9632013-06-25 16:50:07 +02003590 enum determine_dev_size dd = DS_UNCHANGED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003591 sector_t p_size, p_usize, my_usize;
3592 int ldsc = 0; /* local disk size changed */
Philipp Reisnere89b5912010-03-24 17:11:33 +01003593 enum dds_flags ddsf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003594
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003595 peer_device = conn_peer_device(connection, pi->vnr);
3596 if (!peer_device)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003597 return config_unknown_volume(connection, pi);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003598 device = peer_device->device;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003599
Philipp Reisnerb411b362009-09-25 16:07:19 -07003600 p_size = be64_to_cpu(p->d_size);
3601 p_usize = be64_to_cpu(p->u_size);
3602
Philipp Reisnerb411b362009-09-25 16:07:19 -07003603 /* just store the peer's disk size for now.
3604 * we still need to figure out whether we accept that. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003605 device->p_size = p_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003606
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003607 if (get_ldev(device)) {
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003608 rcu_read_lock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003609 my_usize = rcu_dereference(device->ldev->disk_conf)->disk_size;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003610 rcu_read_unlock();
3611
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003612 warn_if_differ_considerably(device, "lower level device sizes",
3613 p_size, drbd_get_max_capacity(device->ldev));
3614 warn_if_differ_considerably(device, "user requested size",
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003615 p_usize, my_usize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003616
3617 /* if this is the first connect, or an otherwise expected
3618 * param exchange, choose the minimum */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003619 if (device->state.conn == C_WF_REPORT_PARAMS)
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003620 p_usize = min_not_zero(my_usize, p_usize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003621
3622 /* Never shrink a device with usable data during connect.
3623 But allow online shrinking if we are connected. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003624 if (drbd_new_dev_size(device, device->ldev, p_usize, 0) <
3625 drbd_get_capacity(device->this_bdev) &&
3626 device->state.disk >= D_OUTDATED &&
3627 device->state.conn < C_CONNECTED) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003628 drbd_err(device, "The peer's disk size is too small!\n");
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003629 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003630 put_ldev(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003631 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003632 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003633
3634 if (my_usize != p_usize) {
3635 struct disk_conf *old_disk_conf, *new_disk_conf = NULL;
3636
3637 new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3638 if (!new_disk_conf) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003639 drbd_err(device, "Allocation of new disk_conf failed\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003640 put_ldev(device);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003641 return -ENOMEM;
3642 }
3643
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003644 mutex_lock(&connection->resource->conf_update);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003645 old_disk_conf = device->ldev->disk_conf;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003646 *new_disk_conf = *old_disk_conf;
3647 new_disk_conf->disk_size = p_usize;
3648
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003649 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003650 mutex_unlock(&connection->resource->conf_update);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003651 synchronize_rcu();
3652 kfree(old_disk_conf);
3653
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003654 drbd_info(device, "Peer sets u_size to %lu sectors\n",
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003655 (unsigned long)my_usize);
3656 }
3657
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003658 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003659 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003660
Philipp Reisnere89b5912010-03-24 17:11:33 +01003661 ddsf = be16_to_cpu(p->dds_flags);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003662 if (get_ldev(device)) {
3663 dd = drbd_determine_dev_size(device, ddsf, NULL);
3664 put_ldev(device);
Philipp Reisnere96c9632013-06-25 16:50:07 +02003665 if (dd == DS_ERROR)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003666 return -EIO;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003667 drbd_md_sync(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003668 } else {
3669 /* I am diskless, need to accept the peer's size. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003670 drbd_set_my_capacity(device, p_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003671 }
3672
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003673 device->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
3674 drbd_reconsider_max_bio_size(device);
Philipp Reisner99432fc2011-05-20 16:39:13 +02003675
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003676 if (get_ldev(device)) {
3677 if (device->ldev->known_size != drbd_get_capacity(device->ldev->backing_bdev)) {
3678 device->ldev->known_size = drbd_get_capacity(device->ldev->backing_bdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003679 ldsc = 1;
3680 }
3681
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003682 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003683 }
3684
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003685 if (device->state.conn > C_WF_REPORT_PARAMS) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003686 if (be64_to_cpu(p->c_size) !=
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003687 drbd_get_capacity(device->this_bdev) || ldsc) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003688 /* we have different sizes, probably peer
3689 * needs to know my new size... */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003690 drbd_send_sizes(peer_device, 0, ddsf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003691 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003692 if (test_and_clear_bit(RESIZE_PENDING, &device->flags) ||
3693 (dd == DS_GREW && device->state.conn == C_CONNECTED)) {
3694 if (device->state.pdsk >= D_INCONSISTENT &&
3695 device->state.disk >= D_INCONSISTENT) {
Philipp Reisnere89b5912010-03-24 17:11:33 +01003696 if (ddsf & DDSF_NO_RESYNC)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003697 drbd_info(device, "Resync of new storage suppressed with --assume-clean\n");
Philipp Reisnere89b5912010-03-24 17:11:33 +01003698 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003699 resync_after_online_grow(device);
Philipp Reisnere89b5912010-03-24 17:11:33 +01003700 } else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003701 set_bit(RESYNC_AFTER_NEG, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003702 }
3703 }
3704
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003705 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003706}
3707
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003708static int receive_uuids(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003709{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003710 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003711 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003712 struct p_uuids *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003713 u64 *p_uuid;
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003714 int i, updated_uuids = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003715
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003716 peer_device = conn_peer_device(connection, pi->vnr);
3717 if (!peer_device)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003718 return config_unknown_volume(connection, pi);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003719 device = peer_device->device;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003720
Philipp Reisnerb411b362009-09-25 16:07:19 -07003721 p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO);
Jing Wang063eacf2012-10-25 15:00:56 +08003722 if (!p_uuid) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003723 drbd_err(device, "kmalloc of p_uuid failed\n");
Jing Wang063eacf2012-10-25 15:00:56 +08003724 return false;
3725 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003726
3727 for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
3728 p_uuid[i] = be64_to_cpu(p->uuid[i]);
3729
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003730 kfree(device->p_uuid);
3731 device->p_uuid = p_uuid;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003732
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003733 if (device->state.conn < C_CONNECTED &&
3734 device->state.disk < D_INCONSISTENT &&
3735 device->state.role == R_PRIMARY &&
3736 (device->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003737 drbd_err(device, "Can only connect to data with current UUID=%016llX\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003738 (unsigned long long)device->ed_uuid);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003739 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003740 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003741 }
3742
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003743 if (get_ldev(device)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003744 int skip_initial_sync =
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003745 device->state.conn == C_CONNECTED &&
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003746 peer_device->connection->agreed_pro_version >= 90 &&
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003747 device->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003748 (p_uuid[UI_FLAGS] & 8);
3749 if (skip_initial_sync) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003750 drbd_info(device, "Accepted new current UUID, preparing to skip initial sync\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003751 drbd_bitmap_io(device, &drbd_bmio_clear_n_write,
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003752 "clear_n_write from receive_uuids",
3753 BM_LOCKED_TEST_ALLOWED);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003754 _drbd_uuid_set(device, UI_CURRENT, p_uuid[UI_CURRENT]);
3755 _drbd_uuid_set(device, UI_BITMAP, 0);
3756 _drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
Philipp Reisnerb411b362009-09-25 16:07:19 -07003757 CS_VERBOSE, NULL);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003758 drbd_md_sync(device);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003759 updated_uuids = 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003760 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003761 put_ldev(device);
3762 } else if (device->state.disk < D_INCONSISTENT &&
3763 device->state.role == R_PRIMARY) {
Philipp Reisner18a50fa2010-06-21 14:14:15 +02003764 /* I am a diskless primary, the peer just created a new current UUID
3765 for me. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003766 updated_uuids = drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003767 }
3768
3769 /* Before we test for the disk state, we should wait until an eventually
3770 ongoing cluster wide state change is finished. That is important if
3771 we are primary and are detaching from our disk. We need to see the
3772 new disk state... */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003773 mutex_lock(device->state_mutex);
3774 mutex_unlock(device->state_mutex);
3775 if (device->state.conn >= C_CONNECTED && device->state.disk < D_INCONSISTENT)
3776 updated_uuids |= drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003777
3778 if (updated_uuids)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003779 drbd_print_uuids(device, "receiver updated UUIDs to");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003780
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003781 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003782}
3783
3784/**
3785 * convert_state() - Converts the peer's view of the cluster state to our point of view
3786 * @ps: The state as seen by the peer.
3787 */
3788static union drbd_state convert_state(union drbd_state ps)
3789{
3790 union drbd_state ms;
3791
3792 static enum drbd_conns c_tab[] = {
Philipp Reisner369bea62011-07-06 23:04:44 +02003793 [C_WF_REPORT_PARAMS] = C_WF_REPORT_PARAMS,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003794 [C_CONNECTED] = C_CONNECTED,
3795
3796 [C_STARTING_SYNC_S] = C_STARTING_SYNC_T,
3797 [C_STARTING_SYNC_T] = C_STARTING_SYNC_S,
3798 [C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */
3799 [C_VERIFY_S] = C_VERIFY_T,
3800 [C_MASK] = C_MASK,
3801 };
3802
3803 ms.i = ps.i;
3804
3805 ms.conn = c_tab[ps.conn];
3806 ms.peer = ps.role;
3807 ms.role = ps.peer;
3808 ms.pdsk = ps.disk;
3809 ms.disk = ps.pdsk;
3810 ms.peer_isp = (ps.aftr_isp | ps.user_isp);
3811
3812 return ms;
3813}
3814
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003815static int receive_req_state(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003816{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003817 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003818 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003819 struct p_req_state *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003820 union drbd_state mask, val;
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +01003821 enum drbd_state_rv rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003822
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003823 peer_device = conn_peer_device(connection, pi->vnr);
3824 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003825 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003826 device = peer_device->device;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003827
Philipp Reisnerb411b362009-09-25 16:07:19 -07003828 mask.i = be32_to_cpu(p->mask);
3829 val.i = be32_to_cpu(p->val);
3830
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003831 if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags) &&
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003832 mutex_is_locked(device->state_mutex)) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003833 drbd_send_sr_reply(peer_device, SS_CONCURRENT_ST_CHG);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003834 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003835 }
3836
3837 mask = convert_state(mask);
3838 val = convert_state(val);
3839
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003840 rv = drbd_change_state(device, CS_VERBOSE, mask, val);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003841 drbd_send_sr_reply(peer_device, rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003842
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003843 drbd_md_sync(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003844
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003845 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003846}
3847
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003848static int receive_req_conn_state(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003849{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003850 struct p_req_state *p = pi->data;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003851 union drbd_state mask, val;
3852 enum drbd_state_rv rv;
3853
3854 mask.i = be32_to_cpu(p->mask);
3855 val.i = be32_to_cpu(p->val);
3856
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003857 if (test_bit(RESOLVE_CONFLICTS, &connection->flags) &&
3858 mutex_is_locked(&connection->cstate_mutex)) {
3859 conn_send_sr_reply(connection, SS_CONCURRENT_ST_CHG);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003860 return 0;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003861 }
3862
3863 mask = convert_state(mask);
3864 val = convert_state(val);
3865
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003866 rv = conn_request_state(connection, mask, val, CS_VERBOSE | CS_LOCAL_ONLY | CS_IGN_OUTD_FAIL);
3867 conn_send_sr_reply(connection, rv);
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003868
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003869 return 0;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003870}
3871
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003872static int receive_state(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003873{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003874 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003875 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003876 struct p_state *p = pi->data;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003877 union drbd_state os, ns, peer_state;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003878 enum drbd_disk_state real_peer_disk;
Philipp Reisner65d922c2010-06-16 16:18:09 +02003879 enum chg_state_flags cs_flags;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003880 int rv;
3881
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003882 peer_device = conn_peer_device(connection, pi->vnr);
3883 if (!peer_device)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003884 return config_unknown_volume(connection, pi);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003885 device = peer_device->device;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003886
Philipp Reisnerb411b362009-09-25 16:07:19 -07003887 peer_state.i = be32_to_cpu(p->state);
3888
3889 real_peer_disk = peer_state.disk;
3890 if (peer_state.disk == D_NEGOTIATING) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003891 real_peer_disk = device->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003892 drbd_info(device, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003893 }
3894
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003895 spin_lock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003896 retry:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003897 os = ns = drbd_read_state(device);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003898 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003899
Lars Ellenberg545752d2011-12-05 14:39:25 +01003900 /* If some other part of the code (asender thread, timeout)
3901 * already decided to close the connection again,
3902 * we must not "re-establish" it here. */
3903 if (os.conn <= C_TEAR_DOWN)
Lars Ellenberg58ffa582012-07-26 14:09:49 +02003904 return -ECONNRESET;
Lars Ellenberg545752d2011-12-05 14:39:25 +01003905
Lars Ellenberg40424e42011-09-26 15:24:56 +02003906 /* If this is the "end of sync" confirmation, usually the peer disk
3907 * transitions from D_INCONSISTENT to D_UP_TO_DATE. For empty (0 bits
3908 * set) resync started in PausedSyncT, or if the timing of pause-/
3909 * unpause-sync events has been "just right", the peer disk may
3910 * transition from D_CONSISTENT to D_UP_TO_DATE as well.
3911 */
3912 if ((os.pdsk == D_INCONSISTENT || os.pdsk == D_CONSISTENT) &&
3913 real_peer_disk == D_UP_TO_DATE &&
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003914 os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
3915 /* If we are (becoming) SyncSource, but peer is still in sync
3916 * preparation, ignore its uptodate-ness to avoid flapping, it
3917 * will change to inconsistent once the peer reaches active
3918 * syncing states.
3919 * It may have changed syncer-paused flags, however, so we
3920 * cannot ignore this completely. */
3921 if (peer_state.conn > C_CONNECTED &&
3922 peer_state.conn < C_SYNC_SOURCE)
3923 real_peer_disk = D_INCONSISTENT;
3924
3925 /* if peer_state changes to connected at the same time,
3926 * it explicitly notifies us that it finished resync.
3927 * Maybe we should finish it up, too? */
3928 else if (os.conn >= C_SYNC_SOURCE &&
3929 peer_state.conn == C_CONNECTED) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003930 if (drbd_bm_total_weight(device) <= device->rs_failed)
3931 drbd_resync_finished(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003932 return 0;
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003933 }
3934 }
3935
Lars Ellenberg02b91b52012-06-28 18:26:52 +02003936 /* explicit verify finished notification, stop sector reached. */
3937 if (os.conn == C_VERIFY_T && os.disk == D_UP_TO_DATE &&
3938 peer_state.conn == C_CONNECTED && real_peer_disk == D_UP_TO_DATE) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003939 ov_out_of_sync_print(device);
3940 drbd_resync_finished(device);
Lars Ellenberg58ffa582012-07-26 14:09:49 +02003941 return 0;
Lars Ellenberg02b91b52012-06-28 18:26:52 +02003942 }
3943
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003944 /* peer says his disk is inconsistent, while we think it is uptodate,
3945 * and this happens while the peer still thinks we have a sync going on,
3946 * but we think we are already done with the sync.
3947 * We ignore this to avoid flapping pdsk.
3948 * This should not happen, if the peer is a recent version of drbd. */
3949 if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT &&
3950 os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE)
3951 real_peer_disk = D_UP_TO_DATE;
3952
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003953 if (ns.conn == C_WF_REPORT_PARAMS)
3954 ns.conn = C_CONNECTED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003955
Philipp Reisner67531712010-10-27 12:21:30 +02003956 if (peer_state.conn == C_AHEAD)
3957 ns.conn = C_BEHIND;
3958
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003959 if (device->p_uuid && peer_state.disk >= D_NEGOTIATING &&
3960 get_ldev_if_state(device, D_NEGOTIATING)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003961 int cr; /* consider resync */
3962
3963 /* if we established a new connection */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003964 cr = (os.conn < C_CONNECTED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003965 /* if we had an established connection
3966 * and one of the nodes newly attaches a disk */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003967 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003968 (peer_state.disk == D_NEGOTIATING ||
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003969 os.disk == D_NEGOTIATING));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003970 /* if we have both been inconsistent, and the peer has been
3971 * forced to be UpToDate with --overwrite-data */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003972 cr |= test_bit(CONSIDER_RESYNC, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003973 /* if we had been plain connected, and the admin requested to
3974 * start a sync by "invalidate" or "invalidate-remote" */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003975 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003976 (peer_state.conn >= C_STARTING_SYNC_S &&
3977 peer_state.conn <= C_WF_BITMAP_T));
3978
3979 if (cr)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003980 ns.conn = drbd_sync_handshake(peer_device, peer_state.role, real_peer_disk);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003981
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003982 put_ldev(device);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003983 if (ns.conn == C_MASK) {
3984 ns.conn = C_CONNECTED;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003985 if (device->state.disk == D_NEGOTIATING) {
3986 drbd_force_state(device, NS(disk, D_FAILED));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003987 } else if (peer_state.disk == D_NEGOTIATING) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003988 drbd_err(device, "Disk attach process on the peer node was aborted.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003989 peer_state.disk = D_DISKLESS;
Lars Ellenberg580b9762010-02-26 23:15:23 +01003990 real_peer_disk = D_DISKLESS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003991 } else {
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003992 if (test_and_clear_bit(CONN_DRY_RUN, &peer_device->connection->flags))
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003993 return -EIO;
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02003994 D_ASSERT(device, os.conn == C_WF_REPORT_PARAMS);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003995 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003996 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003997 }
3998 }
3999 }
4000
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004001 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004002 if (os.i != drbd_read_state(device).i)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004003 goto retry;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004004 clear_bit(CONSIDER_RESYNC, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004005 ns.peer = peer_state.role;
4006 ns.pdsk = real_peer_disk;
4007 ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004008 if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004009 ns.disk = device->new_state_tmp.disk;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004010 cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004011 if (ns.pdsk == D_CONSISTENT && drbd_suspended(device) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
4012 test_bit(NEW_CUR_UUID, &device->flags)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004013 /* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this
Philipp Reisner481c6f52010-06-22 14:03:27 +02004014 for temporal network outages! */
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004015 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004016 drbd_err(device, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004017 tl_clear(peer_device->connection);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004018 drbd_uuid_new_current(device);
4019 clear_bit(NEW_CUR_UUID, &device->flags);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004020 conn_request_state(peer_device->connection, NS2(conn, C_PROTOCOL_ERROR, susp, 0), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004021 return -EIO;
Philipp Reisner481c6f52010-06-22 14:03:27 +02004022 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004023 rv = _drbd_set_state(device, ns, cs_flags, NULL);
4024 ns = drbd_read_state(device);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004025 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004026
4027 if (rv < SS_SUCCESS) {
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004028 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004029 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004030 }
4031
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004032 if (os.conn > C_WF_REPORT_PARAMS) {
4033 if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07004034 peer_state.disk != D_NEGOTIATING ) {
4035 /* we want resync, peer has not yet decided to sync... */
4036 /* Nowadays only used when forcing a node into primary role and
4037 setting its disk to UpToDate with that */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004038 drbd_send_uuids(peer_device);
4039 drbd_send_current_state(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004040 }
4041 }
4042
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004043 clear_bit(DISCARD_MY_DATA, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004044
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004045 drbd_md_sync(device); /* update connected indicator, la_size_sect, ... */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004046
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004047 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004048}
4049
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004050static int receive_sync_uuid(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004051{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004052 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004053 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004054 struct p_rs_uuid *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004055
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004056 peer_device = conn_peer_device(connection, pi->vnr);
4057 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004058 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004059 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004060
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004061 wait_event(device->misc_wait,
4062 device->state.conn == C_WF_SYNC_UUID ||
4063 device->state.conn == C_BEHIND ||
4064 device->state.conn < C_CONNECTED ||
4065 device->state.disk < D_NEGOTIATING);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004066
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02004067 /* D_ASSERT(device, device->state.conn == C_WF_SYNC_UUID ); */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004068
Philipp Reisnerb411b362009-09-25 16:07:19 -07004069 /* Here the _drbd_uuid_ functions are right, current should
4070 _not_ be rotated into the history */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004071 if (get_ldev_if_state(device, D_NEGOTIATING)) {
4072 _drbd_uuid_set(device, UI_CURRENT, be64_to_cpu(p->uuid));
4073 _drbd_uuid_set(device, UI_BITMAP, 0UL);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004074
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004075 drbd_print_uuids(device, "updated sync uuid");
4076 drbd_start_resync(device, C_SYNC_TARGET);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004077
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004078 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004079 } else
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004080 drbd_err(device, "Ignoring SyncUUID packet!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004081
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004082 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004083}
4084
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004085/**
4086 * receive_bitmap_plain
4087 *
4088 * Return 0 when done, 1 when another iteration is needed, and a negative error
4089 * code upon failure.
4090 */
4091static int
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004092receive_bitmap_plain(struct drbd_peer_device *peer_device, unsigned int size,
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004093 unsigned long *p, struct bm_xfer_ctx *c)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004094{
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004095 unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE -
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004096 drbd_header_size(peer_device->connection);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004097 unsigned int num_words = min_t(size_t, data_size / sizeof(*p),
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004098 c->bm_words - c->word_offset);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004099 unsigned int want = num_words * sizeof(*p);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004100 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004101
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004102 if (want != size) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004103 drbd_err(peer_device, "%s:want (%u) != size (%u)\n", __func__, want, size);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004104 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004105 }
4106 if (want == 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004107 return 0;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004108 err = drbd_recv_all(peer_device->connection, p, want);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004109 if (err)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004110 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004111
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004112 drbd_bm_merge_lel(peer_device->device, c->word_offset, num_words, p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004113
4114 c->word_offset += num_words;
4115 c->bit_offset = c->word_offset * BITS_PER_LONG;
4116 if (c->bit_offset > c->bm_bits)
4117 c->bit_offset = c->bm_bits;
4118
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004119 return 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004120}
4121
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004122static enum drbd_bitmap_code dcbp_get_code(struct p_compressed_bm *p)
4123{
4124 return (enum drbd_bitmap_code)(p->encoding & 0x0f);
4125}
4126
4127static int dcbp_get_start(struct p_compressed_bm *p)
4128{
4129 return (p->encoding & 0x80) != 0;
4130}
4131
4132static int dcbp_get_pad_bits(struct p_compressed_bm *p)
4133{
4134 return (p->encoding >> 4) & 0x7;
4135}
4136
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004137/**
4138 * recv_bm_rle_bits
4139 *
4140 * Return 0 when done, 1 when another iteration is needed, and a negative error
4141 * code upon failure.
4142 */
4143static int
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004144recv_bm_rle_bits(struct drbd_peer_device *peer_device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07004145 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01004146 struct bm_xfer_ctx *c,
4147 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004148{
4149 struct bitstream bs;
4150 u64 look_ahead;
4151 u64 rl;
4152 u64 tmp;
4153 unsigned long s = c->bit_offset;
4154 unsigned long e;
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004155 int toggle = dcbp_get_start(p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004156 int have;
4157 int bits;
4158
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004159 bitstream_init(&bs, p->code, len, dcbp_get_pad_bits(p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004160
4161 bits = bitstream_get_bits(&bs, &look_ahead, 64);
4162 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004163 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004164
4165 for (have = bits; have > 0; s += rl, toggle = !toggle) {
4166 bits = vli_decode_bits(&rl, look_ahead);
4167 if (bits <= 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004168 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004169
4170 if (toggle) {
4171 e = s + rl -1;
4172 if (e >= c->bm_bits) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004173 drbd_err(peer_device, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004174 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004175 }
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004176 _drbd_bm_set_bits(peer_device->device, s, e);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004177 }
4178
4179 if (have < bits) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004180 drbd_err(peer_device, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07004181 have, bits, look_ahead,
4182 (unsigned int)(bs.cur.b - p->code),
4183 (unsigned int)bs.buf_len);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004184 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004185 }
Lars Ellenbergd2da5b02013-10-23 10:59:18 +02004186 /* if we consumed all 64 bits, assign 0; >> 64 is "undefined"; */
4187 if (likely(bits < 64))
4188 look_ahead >>= bits;
4189 else
4190 look_ahead = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004191 have -= bits;
4192
4193 bits = bitstream_get_bits(&bs, &tmp, 64 - have);
4194 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004195 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004196 look_ahead |= tmp << have;
4197 have += bits;
4198 }
4199
4200 c->bit_offset = s;
4201 bm_xfer_ctx_bit_to_word_offset(c);
4202
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004203 return (s != c->bm_bits);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004204}
4205
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004206/**
4207 * decode_bitmap_c
4208 *
4209 * Return 0 when done, 1 when another iteration is needed, and a negative error
4210 * code upon failure.
4211 */
4212static int
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004213decode_bitmap_c(struct drbd_peer_device *peer_device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07004214 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01004215 struct bm_xfer_ctx *c,
4216 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004217{
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004218 if (dcbp_get_code(p) == RLE_VLI_Bits)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004219 return recv_bm_rle_bits(peer_device, p, c, len - sizeof(*p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004220
4221 /* other variants had been implemented for evaluation,
4222 * but have been dropped as this one turned out to be "best"
4223 * during all our tests. */
4224
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004225 drbd_err(peer_device, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
4226 conn_request_state(peer_device->connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004227 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004228}
4229
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004230void INFO_bm_xfer_stats(struct drbd_device *device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07004231 const char *direction, struct bm_xfer_ctx *c)
4232{
4233 /* what would it take to transfer it "plaintext" */
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02004234 unsigned int header_size = drbd_header_size(first_peer_device(device)->connection);
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004235 unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE - header_size;
4236 unsigned int plain =
4237 header_size * (DIV_ROUND_UP(c->bm_words, data_size) + 1) +
4238 c->bm_words * sizeof(unsigned long);
4239 unsigned int total = c->bytes[0] + c->bytes[1];
4240 unsigned int r;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004241
4242 /* total can not be zero. but just in case: */
4243 if (total == 0)
4244 return;
4245
4246 /* don't report if not compressed */
4247 if (total >= plain)
4248 return;
4249
4250 /* total < plain. check for overflow, still */
4251 r = (total > UINT_MAX/1000) ? (total / (plain/1000))
4252 : (1000 * total / plain);
4253
4254 if (r > 1000)
4255 r = 1000;
4256
4257 r = 1000 - r;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004258 drbd_info(device, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
Philipp Reisnerb411b362009-09-25 16:07:19 -07004259 "total %u; compression: %u.%u%%\n",
4260 direction,
4261 c->bytes[1], c->packets[1],
4262 c->bytes[0], c->packets[0],
4263 total, r/10, r % 10);
4264}
4265
4266/* Since we are processing the bitfield from lower addresses to higher,
4267 it does not matter if the process it in 32 bit chunks or 64 bit
4268 chunks as long as it is little endian. (Understand it as byte stream,
4269 beginning with the lowest byte...) If we would use big endian
4270 we would need to process it from the highest address to the lowest,
4271 in order to be agnostic to the 32 vs 64 bits issue.
4272
4273 returns 0 on failure, 1 if we successfully received it. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004274static int receive_bitmap(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004275{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004276 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004277 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004278 struct bm_xfer_ctx c;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004279 int err;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004280
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004281 peer_device = conn_peer_device(connection, pi->vnr);
4282 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004283 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004284 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004285
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004286 drbd_bm_lock(device, "receive bitmap", BM_LOCKED_SET_ALLOWED);
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004287 /* you are supposed to send additional out-of-sync information
4288 * if you actually set bits during this phase */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004289
Philipp Reisnerb411b362009-09-25 16:07:19 -07004290 c = (struct bm_xfer_ctx) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004291 .bm_bits = drbd_bm_bits(device),
4292 .bm_words = drbd_bm_words(device),
Philipp Reisnerb411b362009-09-25 16:07:19 -07004293 };
4294
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004295 for(;;) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004296 if (pi->cmd == P_BITMAP)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004297 err = receive_bitmap_plain(peer_device, pi->size, pi->data, &c);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004298 else if (pi->cmd == P_COMPRESSED_BITMAP) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004299 /* MAYBE: sanity check that we speak proto >= 90,
4300 * and the feature is enabled! */
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004301 struct p_compressed_bm *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004302
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004303 if (pi->size > DRBD_SOCKET_BUFFER_SIZE - drbd_header_size(connection)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004304 drbd_err(device, "ReportCBitmap packet too large\n");
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004305 err = -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004306 goto out;
4307 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004308 if (pi->size <= sizeof(*p)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004309 drbd_err(device, "ReportCBitmap packet too small (l:%u)\n", pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004310 err = -EIO;
Andreas Gruenbacher78fcbda2010-12-10 22:18:27 +01004311 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004312 }
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004313 err = drbd_recv_all(peer_device->connection, p, pi->size);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004314 if (err)
4315 goto out;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004316 err = decode_bitmap_c(peer_device, p, &c, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004317 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004318 drbd_warn(device, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", pi->cmd);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004319 err = -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004320 goto out;
4321 }
4322
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004323 c.packets[pi->cmd == P_BITMAP]++;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004324 c.bytes[pi->cmd == P_BITMAP] += drbd_header_size(connection) + pi->size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004325
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004326 if (err <= 0) {
4327 if (err < 0)
4328 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004329 break;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004330 }
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004331 err = drbd_recv_header(peer_device->connection, pi);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004332 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004333 goto out;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004334 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004335
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004336 INFO_bm_xfer_stats(device, "receive", &c);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004337
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004338 if (device->state.conn == C_WF_BITMAP_T) {
Andreas Gruenbacherde1f8e42010-12-10 21:04:00 +01004339 enum drbd_state_rv rv;
4340
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004341 err = drbd_send_bitmap(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004342 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004343 goto out;
4344 /* Omit CS_ORDERED with this state transition to avoid deadlocks. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004345 rv = _drbd_request_state(device, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02004346 D_ASSERT(device, rv == SS_SUCCESS);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004347 } else if (device->state.conn != C_WF_BITMAP_S) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004348 /* admin may have requested C_DISCONNECTING,
4349 * other threads may have noticed network errors */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004350 drbd_info(device, "unexpected cstate (%s) in receive_bitmap\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004351 drbd_conn_str(device->state.conn));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004352 }
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004353 err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004354
Philipp Reisnerb411b362009-09-25 16:07:19 -07004355 out:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004356 drbd_bm_unlock(device);
4357 if (!err && device->state.conn == C_WF_BITMAP_S)
4358 drbd_start_resync(device, C_SYNC_SOURCE);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004359 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004360}
4361
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004362static int receive_skip(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004363{
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004364 drbd_warn(connection, "skipping unknown optional packet type %d, l: %d!\n",
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004365 pi->cmd, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004366
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004367 return ignore_remaining_packet(connection, pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004368}
4369
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004370static int receive_UnplugRemote(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004371{
Philipp Reisnerb411b362009-09-25 16:07:19 -07004372 /* Make sure we've acked all the TCP data associated
4373 * with the data requests being unplugged */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004374 drbd_tcp_quickack(connection->data.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004375
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004376 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004377}
4378
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004379static int receive_out_of_sync(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisner73a01a12010-10-27 14:33:00 +02004380{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004381 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004382 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004383 struct p_block_desc *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004384
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004385 peer_device = conn_peer_device(connection, pi->vnr);
4386 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004387 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004388 device = peer_device->device;
Philipp Reisner73a01a12010-10-27 14:33:00 +02004389
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004390 switch (device->state.conn) {
Lars Ellenbergf735e3632010-12-17 21:06:18 +01004391 case C_WF_SYNC_UUID:
4392 case C_WF_BITMAP_T:
4393 case C_BEHIND:
4394 break;
4395 default:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004396 drbd_err(device, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004397 drbd_conn_str(device->state.conn));
Lars Ellenbergf735e3632010-12-17 21:06:18 +01004398 }
4399
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004400 drbd_set_out_of_sync(device, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
Philipp Reisner73a01a12010-10-27 14:33:00 +02004401
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004402 return 0;
Philipp Reisner73a01a12010-10-27 14:33:00 +02004403}
4404
Philipp Reisner02918be2010-08-20 14:35:10 +02004405struct data_cmd {
4406 int expect_payload;
4407 size_t pkt_size;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004408 int (*fn)(struct drbd_connection *, struct packet_info *);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004409};
4410
Philipp Reisner02918be2010-08-20 14:35:10 +02004411static struct data_cmd drbd_cmd_handler[] = {
4412 [P_DATA] = { 1, sizeof(struct p_data), receive_Data },
4413 [P_DATA_REPLY] = { 1, sizeof(struct p_data), receive_DataReply },
4414 [P_RS_DATA_REPLY] = { 1, sizeof(struct p_data), receive_RSDataReply } ,
4415 [P_BARRIER] = { 0, sizeof(struct p_barrier), receive_Barrier } ,
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004416 [P_BITMAP] = { 1, 0, receive_bitmap } ,
4417 [P_COMPRESSED_BITMAP] = { 1, 0, receive_bitmap } ,
4418 [P_UNPLUG_REMOTE] = { 0, 0, receive_UnplugRemote },
Philipp Reisner02918be2010-08-20 14:35:10 +02004419 [P_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
4420 [P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004421 [P_SYNC_PARAM] = { 1, 0, receive_SyncParam },
4422 [P_SYNC_PARAM89] = { 1, 0, receive_SyncParam },
Philipp Reisner02918be2010-08-20 14:35:10 +02004423 [P_PROTOCOL] = { 1, sizeof(struct p_protocol), receive_protocol },
4424 [P_UUIDS] = { 0, sizeof(struct p_uuids), receive_uuids },
4425 [P_SIZES] = { 0, sizeof(struct p_sizes), receive_sizes },
4426 [P_STATE] = { 0, sizeof(struct p_state), receive_state },
4427 [P_STATE_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_state },
4428 [P_SYNC_UUID] = { 0, sizeof(struct p_rs_uuid), receive_sync_uuid },
4429 [P_OV_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
4430 [P_OV_REPLY] = { 1, sizeof(struct p_block_req), receive_DataRequest },
4431 [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest },
4432 [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip },
Philipp Reisner73a01a12010-10-27 14:33:00 +02004433 [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004434 [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_conn_state },
Philipp Reisner036b17e2011-05-16 17:38:11 +02004435 [P_PROTOCOL_UPDATE] = { 1, sizeof(struct p_protocol), receive_protocol },
Philipp Reisner02918be2010-08-20 14:35:10 +02004436};
4437
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004438static void drbdd(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004439{
Philipp Reisner77351055b2011-02-07 17:24:26 +01004440 struct packet_info pi;
Philipp Reisner02918be2010-08-20 14:35:10 +02004441 size_t shs; /* sub header size */
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004442 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004443
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004444 while (get_t_state(&connection->receiver) == RUNNING) {
Andreas Gruenbacherdeebe192011-03-25 00:01:04 +01004445 struct data_cmd *cmd;
4446
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004447 drbd_thread_current_set_cpu(&connection->receiver);
4448 if (drbd_recv_header(connection, &pi))
Philipp Reisner02918be2010-08-20 14:35:10 +02004449 goto err_out;
4450
Andreas Gruenbacherdeebe192011-03-25 00:01:04 +01004451 cmd = &drbd_cmd_handler[pi.cmd];
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004452 if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) || !cmd->fn)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004453 drbd_err(connection, "Unexpected data packet %s (0x%04x)",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004454 cmdname(pi.cmd), pi.cmd);
Philipp Reisner02918be2010-08-20 14:35:10 +02004455 goto err_out;
Lars Ellenberg0b33a912009-11-16 15:58:04 +01004456 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004457
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004458 shs = cmd->pkt_size;
4459 if (pi.size > shs && !cmd->expect_payload) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004460 drbd_err(connection, "No payload expected %s l:%d\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004461 cmdname(pi.cmd), pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02004462 goto err_out;
4463 }
4464
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02004465 if (shs) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004466 err = drbd_recv_all_warn(connection, pi.data, shs);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004467 if (err)
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02004468 goto err_out;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004469 pi.size -= shs;
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02004470 }
4471
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004472 err = cmd->fn(connection, &pi);
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004473 if (err) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004474 drbd_err(connection, "error receiving %s, e: %d l: %d!\n",
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004475 cmdname(pi.cmd), err, pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02004476 goto err_out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004477 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004478 }
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004479 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004480
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004481 err_out:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004482 conn_request_state(connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004483}
4484
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004485static void conn_disconnect(struct drbd_connection *connection)
Philipp Reisnerf70b35112010-06-24 14:34:40 +02004486{
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02004487 struct drbd_peer_device *peer_device;
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004488 enum drbd_conns oc;
Philipp Reisner376694a2011-11-07 10:54:28 +01004489 int vnr;
Philipp Reisnerf70b35112010-06-24 14:34:40 +02004490
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004491 if (connection->cstate == C_STANDALONE)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004492 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004493
Lars Ellenberg545752d2011-12-05 14:39:25 +01004494 /* We are about to start the cleanup after connection loss.
4495 * Make sure drbd_make_request knows about that.
4496 * Usually we should be in some network failure state already,
4497 * but just in case we are not, we fix it up here.
4498 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004499 conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
Lars Ellenberg545752d2011-12-05 14:39:25 +01004500
Philipp Reisnerb411b362009-09-25 16:07:19 -07004501 /* asender does not clean up anything. it must not interfere, either */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004502 drbd_thread_stop(&connection->asender);
4503 drbd_free_sock(connection);
Philipp Reisner360cc742011-02-08 14:29:53 +01004504
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02004505 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02004506 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
4507 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004508 kref_get(&device->kref);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02004509 rcu_read_unlock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004510 drbd_disconnected(peer_device);
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02004511 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02004512 rcu_read_lock();
4513 }
4514 rcu_read_unlock();
4515
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004516 if (!list_empty(&connection->current_epoch->list))
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004517 drbd_err(connection, "ASSERTION FAILED: connection->current_epoch->list not empty\n");
Philipp Reisner12038a32011-11-09 19:18:00 +01004518 /* ok, no more ee's on the fly, it is safe to reset the epoch_size */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004519 atomic_set(&connection->current_epoch->epoch_size, 0);
4520 connection->send.seen_any_write_yet = false;
Philipp Reisner12038a32011-11-09 19:18:00 +01004521
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004522 drbd_info(connection, "Connection closed\n");
Philipp Reisner360cc742011-02-08 14:29:53 +01004523
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004524 if (conn_highest_role(connection) == R_PRIMARY && conn_highest_pdsk(connection) >= D_UNKNOWN)
4525 conn_try_outdate_peer_async(connection);
Philipp Reisnercb703452011-03-24 11:03:07 +01004526
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004527 spin_lock_irq(&connection->resource->req_lock);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004528 oc = connection->cstate;
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004529 if (oc >= C_UNCONNECTED)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004530 _conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE);
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004531
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004532 spin_unlock_irq(&connection->resource->req_lock);
Philipp Reisner360cc742011-02-08 14:29:53 +01004533
Lars Ellenbergf3dfa402011-05-02 10:45:05 +02004534 if (oc == C_DISCONNECTING)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004535 conn_request_state(connection, NS(conn, C_STANDALONE), CS_VERBOSE | CS_HARD);
Philipp Reisner360cc742011-02-08 14:29:53 +01004536}
4537
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004538static int drbd_disconnected(struct drbd_peer_device *peer_device)
Philipp Reisner360cc742011-02-08 14:29:53 +01004539{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004540 struct drbd_device *device = peer_device->device;
Philipp Reisner360cc742011-02-08 14:29:53 +01004541 unsigned int i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004542
Philipp Reisner85719572010-07-21 10:20:17 +02004543 /* wait for current activity to cease. */
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004544 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004545 _drbd_wait_ee_list_empty(device, &device->active_ee);
4546 _drbd_wait_ee_list_empty(device, &device->sync_ee);
4547 _drbd_wait_ee_list_empty(device, &device->read_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004548 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004549
4550 /* We do not have data structures that would allow us to
4551 * get the rs_pending_cnt down to 0 again.
4552 * * On C_SYNC_TARGET we do not have any data structures describing
4553 * the pending RSDataRequest's we have sent.
4554 * * On C_SYNC_SOURCE there is no data structure that tracks
4555 * the P_RS_DATA_REPLY blocks that we sent to the SyncTarget.
4556 * And no, it is not the sum of the reference counts in the
4557 * resync_LRU. The resync_LRU tracks the whole operation including
4558 * the disk-IO, while the rs_pending_cnt only tracks the blocks
4559 * on the fly. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004560 drbd_rs_cancel_all(device);
4561 device->rs_total = 0;
4562 device->rs_failed = 0;
4563 atomic_set(&device->rs_pending_cnt, 0);
4564 wake_up(&device->misc_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004565
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004566 del_timer_sync(&device->resync_timer);
4567 resync_timer_fn((unsigned long)device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004568
Philipp Reisnerb411b362009-09-25 16:07:19 -07004569 /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
4570 * w_make_resync_request etc. which may still be on the worker queue
4571 * to be "canceled" */
Andreas Gruenbacherb5043c52011-07-28 15:56:02 +02004572 drbd_flush_workqueue(&peer_device->connection->sender_work);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004573
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004574 drbd_finish_peer_reqs(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004575
Philipp Reisnerd10b4ea2011-11-30 23:25:36 +01004576 /* This second workqueue flush is necessary, since drbd_finish_peer_reqs()
4577 might have issued a work again. The one before drbd_finish_peer_reqs() is
4578 necessary to reclain net_ee in drbd_finish_peer_reqs(). */
Andreas Gruenbacherb5043c52011-07-28 15:56:02 +02004579 drbd_flush_workqueue(&peer_device->connection->sender_work);
Philipp Reisnerd10b4ea2011-11-30 23:25:36 +01004580
Lars Ellenberg08332d72012-08-17 15:09:13 +02004581 /* need to do it again, drbd_finish_peer_reqs() may have populated it
4582 * again via drbd_try_clear_on_disk_bm(). */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004583 drbd_rs_cancel_all(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004584
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004585 kfree(device->p_uuid);
4586 device->p_uuid = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004587
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004588 if (!drbd_suspended(device))
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004589 tl_clear(peer_device->connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004590
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004591 drbd_md_sync(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004592
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004593 /* serialize with bitmap writeout triggered by the state change,
4594 * if any. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004595 wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004596
Philipp Reisnerb411b362009-09-25 16:07:19 -07004597 /* tcp_close and release of sendpage pages can be deferred. I don't
4598 * want to use SO_LINGER, because apparently it can be deferred for
4599 * more than 20 seconds (longest time I checked).
4600 *
4601 * Actually we don't care for exactly when the network stack does its
4602 * put_page(), but release our reference on these pages right here.
4603 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004604 i = drbd_free_peer_reqs(device, &device->net_ee);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004605 if (i)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004606 drbd_info(device, "net_ee not empty, killed %u entries\n", i);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004607 i = atomic_read(&device->pp_in_use_by_net);
Lars Ellenberg435f0742010-09-06 12:30:25 +02004608 if (i)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004609 drbd_info(device, "pp_in_use_by_net = %d, expected 0\n", i);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004610 i = atomic_read(&device->pp_in_use);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004611 if (i)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004612 drbd_info(device, "pp_in_use = %d, expected 0\n", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004613
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02004614 D_ASSERT(device, list_empty(&device->read_ee));
4615 D_ASSERT(device, list_empty(&device->active_ee));
4616 D_ASSERT(device, list_empty(&device->sync_ee));
4617 D_ASSERT(device, list_empty(&device->done_ee));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004618
Philipp Reisner360cc742011-02-08 14:29:53 +01004619 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004620}
4621
4622/*
4623 * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version
4624 * we can agree on is stored in agreed_pro_version.
4625 *
4626 * feature flags and the reserved array should be enough room for future
4627 * enhancements of the handshake protocol, and possible plugins...
4628 *
4629 * for now, they are expected to be zero, but ignored.
4630 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004631static int drbd_send_features(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004632{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004633 struct drbd_socket *sock;
4634 struct p_connection_features *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004635
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004636 sock = &connection->data;
4637 p = conn_prepare_command(connection, sock);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004638 if (!p)
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004639 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004640 memset(p, 0, sizeof(*p));
4641 p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
4642 p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004643 return conn_send_command(connection, sock, P_CONNECTION_FEATURES, sizeof(*p), NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004644}
4645
4646/*
4647 * return values:
4648 * 1 yes, we have a valid connection
4649 * 0 oops, did not work out, please try again
4650 * -1 peer talks different language,
4651 * no point in trying again, please go standalone.
4652 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004653static int drbd_do_features(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004654{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004655 /* ASSERT current == connection->receiver ... */
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004656 struct p_connection_features *p;
4657 const int expect = sizeof(struct p_connection_features);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004658 struct packet_info pi;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004659 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004660
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004661 err = drbd_send_features(connection);
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004662 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004663 return 0;
4664
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004665 err = drbd_recv_header(connection, &pi);
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004666 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004667 return 0;
4668
Andreas Gruenbacher60381782011-03-28 17:05:50 +02004669 if (pi.cmd != P_CONNECTION_FEATURES) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004670 drbd_err(connection, "expected ConnectionFeatures packet, received: %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004671 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004672 return -1;
4673 }
4674
Philipp Reisner77351055b2011-02-07 17:24:26 +01004675 if (pi.size != expect) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004676 drbd_err(connection, "expected ConnectionFeatures length: %u, received: %u\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004677 expect, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004678 return -1;
4679 }
4680
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004681 p = pi.data;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004682 err = drbd_recv_all_warn(connection, p, expect);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004683 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004684 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004685
Philipp Reisnerb411b362009-09-25 16:07:19 -07004686 p->protocol_min = be32_to_cpu(p->protocol_min);
4687 p->protocol_max = be32_to_cpu(p->protocol_max);
4688 if (p->protocol_max == 0)
4689 p->protocol_max = p->protocol_min;
4690
4691 if (PRO_VERSION_MAX < p->protocol_min ||
4692 PRO_VERSION_MIN > p->protocol_max)
4693 goto incompat;
4694
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004695 connection->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004696
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004697 drbd_info(connection, "Handshake successful: "
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004698 "Agreed network protocol version %d\n", connection->agreed_pro_version);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004699
4700 return 1;
4701
4702 incompat:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004703 drbd_err(connection, "incompatible DRBD dialects: "
Philipp Reisnerb411b362009-09-25 16:07:19 -07004704 "I support %d-%d, peer supports %d-%d\n",
4705 PRO_VERSION_MIN, PRO_VERSION_MAX,
4706 p->protocol_min, p->protocol_max);
4707 return -1;
4708}
4709
4710#if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004711static int drbd_do_auth(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004712{
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004713 drbd_err(connection, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
4714 drbd_err(connection, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004715 return -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004716}
4717#else
4718#define CHALLENGE_LEN 64
Johannes Thomab10d96c2010-01-07 16:02:50 +01004719
4720/* Return value:
4721 1 - auth succeeded,
4722 0 - failed, try again (network error),
4723 -1 - auth failed, don't try again.
4724*/
4725
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004726static int drbd_do_auth(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004727{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004728 struct drbd_socket *sock;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004729 char my_challenge[CHALLENGE_LEN]; /* 64 Bytes... */
4730 struct scatterlist sg;
4731 char *response = NULL;
4732 char *right_response = NULL;
4733 char *peers_ch = NULL;
Philipp Reisner44ed1672011-04-19 17:10:19 +02004734 unsigned int key_len;
4735 char secret[SHARED_SECRET_MAX]; /* 64 byte */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004736 unsigned int resp_size;
4737 struct hash_desc desc;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004738 struct packet_info pi;
Philipp Reisner44ed1672011-04-19 17:10:19 +02004739 struct net_conf *nc;
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004740 int err, rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004741
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004742 /* FIXME: Put the challenge/response into the preallocated socket buffer. */
4743
Philipp Reisner44ed1672011-04-19 17:10:19 +02004744 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004745 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +02004746 key_len = strlen(nc->shared_secret);
4747 memcpy(secret, nc->shared_secret, key_len);
4748 rcu_read_unlock();
4749
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004750 desc.tfm = connection->cram_hmac_tfm;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004751 desc.flags = 0;
4752
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004753 rv = crypto_hash_setkey(connection->cram_hmac_tfm, (u8 *)secret, key_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004754 if (rv) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004755 drbd_err(connection, "crypto_hash_setkey() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004756 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004757 goto fail;
4758 }
4759
4760 get_random_bytes(my_challenge, CHALLENGE_LEN);
4761
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004762 sock = &connection->data;
4763 if (!conn_prepare_command(connection, sock)) {
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004764 rv = 0;
4765 goto fail;
4766 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004767 rv = !conn_send_command(connection, sock, P_AUTH_CHALLENGE, 0,
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004768 my_challenge, CHALLENGE_LEN);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004769 if (!rv)
4770 goto fail;
4771
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004772 err = drbd_recv_header(connection, &pi);
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004773 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004774 rv = 0;
4775 goto fail;
4776 }
4777
Philipp Reisner77351055b2011-02-07 17:24:26 +01004778 if (pi.cmd != P_AUTH_CHALLENGE) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004779 drbd_err(connection, "expected AuthChallenge packet, received: %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004780 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004781 rv = 0;
4782 goto fail;
4783 }
4784
Philipp Reisner77351055b2011-02-07 17:24:26 +01004785 if (pi.size > CHALLENGE_LEN * 2) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004786 drbd_err(connection, "expected AuthChallenge payload too big.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004787 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004788 goto fail;
4789 }
4790
Philipp Reisner77351055b2011-02-07 17:24:26 +01004791 peers_ch = kmalloc(pi.size, GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004792 if (peers_ch == NULL) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004793 drbd_err(connection, "kmalloc of peers_ch failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004794 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004795 goto fail;
4796 }
4797
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004798 err = drbd_recv_all_warn(connection, peers_ch, pi.size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004799 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004800 rv = 0;
4801 goto fail;
4802 }
4803
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004804 resp_size = crypto_hash_digestsize(connection->cram_hmac_tfm);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004805 response = kmalloc(resp_size, GFP_NOIO);
4806 if (response == NULL) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004807 drbd_err(connection, "kmalloc of response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004808 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004809 goto fail;
4810 }
4811
4812 sg_init_table(&sg, 1);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004813 sg_set_buf(&sg, peers_ch, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004814
4815 rv = crypto_hash_digest(&desc, &sg, sg.length, response);
4816 if (rv) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004817 drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004818 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004819 goto fail;
4820 }
4821
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004822 if (!conn_prepare_command(connection, sock)) {
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004823 rv = 0;
4824 goto fail;
4825 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004826 rv = !conn_send_command(connection, sock, P_AUTH_RESPONSE, 0,
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004827 response, resp_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004828 if (!rv)
4829 goto fail;
4830
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004831 err = drbd_recv_header(connection, &pi);
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004832 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004833 rv = 0;
4834 goto fail;
4835 }
4836
Philipp Reisner77351055b2011-02-07 17:24:26 +01004837 if (pi.cmd != P_AUTH_RESPONSE) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004838 drbd_err(connection, "expected AuthResponse packet, received: %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004839 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004840 rv = 0;
4841 goto fail;
4842 }
4843
Philipp Reisner77351055b2011-02-07 17:24:26 +01004844 if (pi.size != resp_size) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004845 drbd_err(connection, "expected AuthResponse payload of wrong size\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004846 rv = 0;
4847 goto fail;
4848 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004849
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004850 err = drbd_recv_all_warn(connection, response , resp_size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004851 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004852 rv = 0;
4853 goto fail;
4854 }
4855
4856 right_response = kmalloc(resp_size, GFP_NOIO);
Julia Lawall2d1ee872009-12-27 22:27:11 +01004857 if (right_response == NULL) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004858 drbd_err(connection, "kmalloc of right_response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004859 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004860 goto fail;
4861 }
4862
4863 sg_set_buf(&sg, my_challenge, CHALLENGE_LEN);
4864
4865 rv = crypto_hash_digest(&desc, &sg, sg.length, right_response);
4866 if (rv) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004867 drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004868 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004869 goto fail;
4870 }
4871
4872 rv = !memcmp(response, right_response, resp_size);
4873
4874 if (rv)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004875 drbd_info(connection, "Peer authenticated using %d bytes HMAC\n",
Philipp Reisner44ed1672011-04-19 17:10:19 +02004876 resp_size);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004877 else
4878 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004879
4880 fail:
4881 kfree(peers_ch);
4882 kfree(response);
4883 kfree(right_response);
4884
4885 return rv;
4886}
4887#endif
4888
Andreas Gruenbacher8fe60552011-07-22 11:04:36 +02004889int drbd_receiver(struct drbd_thread *thi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004890{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004891 struct drbd_connection *connection = thi->connection;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004892 int h;
4893
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004894 drbd_info(connection, "receiver (re)started\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004895
4896 do {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004897 h = conn_connect(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004898 if (h == 0) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004899 conn_disconnect(connection);
Philipp Reisner20ee6392011-01-18 15:28:59 +01004900 schedule_timeout_interruptible(HZ);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004901 }
4902 if (h == -1) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004903 drbd_warn(connection, "Discarding network configuration.\n");
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004904 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004905 }
4906 } while (h == 0);
4907
Philipp Reisner91fd4da2011-04-20 17:47:29 +02004908 if (h > 0)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004909 drbdd(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004910
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004911 conn_disconnect(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004912
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004913 drbd_info(connection, "receiver terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004914 return 0;
4915}
4916
4917/* ********* acknowledge sender ******** */
4918
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004919static int got_conn_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004920{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004921 struct p_req_state_reply *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004922 int retcode = be32_to_cpu(p->retcode);
4923
4924 if (retcode >= SS_SUCCESS) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004925 set_bit(CONN_WD_ST_CHG_OKAY, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004926 } else {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004927 set_bit(CONN_WD_ST_CHG_FAIL, &connection->flags);
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004928 drbd_err(connection, "Requested state change failed by peer: %s (%d)\n",
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004929 drbd_set_st_err_str(retcode), retcode);
4930 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004931 wake_up(&connection->ping_wait);
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004932
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004933 return 0;
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004934}
4935
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004936static int got_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004937{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004938 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004939 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004940 struct p_req_state_reply *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004941 int retcode = be32_to_cpu(p->retcode);
4942
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004943 peer_device = conn_peer_device(connection, pi->vnr);
4944 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004945 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004946 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004947
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004948 if (test_bit(CONN_WD_ST_CHG_REQ, &connection->flags)) {
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02004949 D_ASSERT(device, connection->agreed_pro_version < 100);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004950 return got_conn_RqSReply(connection, pi);
Philipp Reisner4d0fc3f2012-01-20 13:52:27 +01004951 }
4952
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004953 if (retcode >= SS_SUCCESS) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004954 set_bit(CL_ST_CHG_SUCCESS, &device->flags);
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004955 } else {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004956 set_bit(CL_ST_CHG_FAIL, &device->flags);
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004957 drbd_err(device, "Requested state change failed by peer: %s (%d)\n",
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004958 drbd_set_st_err_str(retcode), retcode);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004959 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004960 wake_up(&device->state_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004961
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004962 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004963}
4964
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004965static int got_Ping(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004966{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004967 return drbd_send_ping_ack(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004968
4969}
4970
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004971static int got_PingAck(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004972{
4973 /* restore idle timeout */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004974 connection->meta.socket->sk->sk_rcvtimeo = connection->net_conf->ping_int*HZ;
4975 if (!test_and_set_bit(GOT_PING_ACK, &connection->flags))
4976 wake_up(&connection->ping_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004977
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004978 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004979}
4980
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004981static int got_IsInSync(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004982{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004983 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004984 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004985 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004986 sector_t sector = be64_to_cpu(p->sector);
4987 int blksize = be32_to_cpu(p->blksize);
4988
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004989 peer_device = conn_peer_device(connection, pi->vnr);
4990 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004991 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004992 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004993
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004994 D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004995
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004996 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004997
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004998 if (get_ldev(device)) {
4999 drbd_rs_complete_io(device, sector);
5000 drbd_set_in_sync(device, sector, blksize);
Lars Ellenberg1d53f092010-09-05 01:13:24 +02005001 /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005002 device->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
5003 put_ldev(device);
Lars Ellenberg1d53f092010-09-05 01:13:24 +02005004 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005005 dec_rs_pending(device);
5006 atomic_add(blksize >> 9, &device->rs_sect_in);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005007
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005008 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005009}
5010
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01005011static int
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005012validate_req_change_req_state(struct drbd_device *device, u64 id, sector_t sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01005013 struct rb_root *root, const char *func,
5014 enum drbd_req_event what, bool missing_ok)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005015{
5016 struct drbd_request *req;
5017 struct bio_and_error m;
5018
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005019 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005020 req = find_request(device, root, id, sector, missing_ok, func);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005021 if (unlikely(!req)) {
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005022 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005023 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005024 }
5025 __req_mod(req, what, &m);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005026 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005027
5028 if (m.bio)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005029 complete_master_bio(device, &m);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005030 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005031}
5032
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005033static int got_BlockAck(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005034{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005035 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005036 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005037 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005038 sector_t sector = be64_to_cpu(p->sector);
5039 int blksize = be32_to_cpu(p->blksize);
5040 enum drbd_req_event what;
5041
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005042 peer_device = conn_peer_device(connection, pi->vnr);
5043 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005044 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005045 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005046
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005047 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005048
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01005049 if (p->block_id == ID_SYNCER) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005050 drbd_set_in_sync(device, sector, blksize);
5051 dec_rs_pending(device);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005052 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005053 }
Andreas Gruenbachere05e1e52011-03-25 15:16:26 +01005054 switch (pi->cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07005055 case P_RS_WRITE_ACK:
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01005056 what = WRITE_ACKED_BY_PEER_AND_SIS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005057 break;
5058 case P_WRITE_ACK:
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01005059 what = WRITE_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005060 break;
5061 case P_RECV_ACK:
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01005062 what = RECV_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005063 break;
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02005064 case P_SUPERSEDED:
5065 what = CONFLICT_RESOLVED;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01005066 break;
5067 case P_RETRY_WRITE:
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01005068 what = POSTPONE_WRITE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005069 break;
5070 default:
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005071 BUG();
Philipp Reisnerb411b362009-09-25 16:07:19 -07005072 }
5073
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005074 return validate_req_change_req_state(device, p->block_id, sector,
5075 &device->write_requests, __func__,
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005076 what, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005077}
5078
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005079static int got_NegAck(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005080{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005081 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005082 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005083 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005084 sector_t sector = be64_to_cpu(p->sector);
Philipp Reisner2deb8332011-01-17 18:39:18 +01005085 int size = be32_to_cpu(p->blksize);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005086 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005087
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005088 peer_device = conn_peer_device(connection, pi->vnr);
5089 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005090 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005091 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005092
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005093 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005094
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01005095 if (p->block_id == ID_SYNCER) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005096 dec_rs_pending(device);
5097 drbd_rs_failed_io(device, sector, size);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005098 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005099 }
Philipp Reisner2deb8332011-01-17 18:39:18 +01005100
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005101 err = validate_req_change_req_state(device, p->block_id, sector,
5102 &device->write_requests, __func__,
Philipp Reisner303d1442011-04-13 16:24:47 -07005103 NEG_ACKED, true);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005104 if (err) {
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01005105 /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs.
5106 The master bio might already be completed, therefore the
5107 request is no longer in the collision hash. */
5108 /* In Protocol B we might already have got a P_RECV_ACK
5109 but then get a P_NEG_ACK afterwards. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005110 drbd_set_out_of_sync(device, sector, size);
Philipp Reisner2deb8332011-01-17 18:39:18 +01005111 }
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005112 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005113}
5114
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005115static int got_NegDReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005116{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005117 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005118 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005119 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005120 sector_t sector = be64_to_cpu(p->sector);
5121
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005122 peer_device = conn_peer_device(connection, pi->vnr);
5123 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005124 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005125 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005126
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005127 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01005128
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02005129 drbd_err(device, "Got NegDReply; Sector %llus, len %u.\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07005130 (unsigned long long)sector, be32_to_cpu(p->blksize));
5131
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005132 return validate_req_change_req_state(device, p->block_id, sector,
5133 &device->read_requests, __func__,
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005134 NEG_ACKED, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005135}
5136
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005137static int got_NegRSDReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005138{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005139 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005140 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005141 sector_t sector;
5142 int size;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005143 struct p_block_ack *p = pi->data;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005144
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005145 peer_device = conn_peer_device(connection, pi->vnr);
5146 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005147 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005148 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005149
5150 sector = be64_to_cpu(p->sector);
5151 size = be32_to_cpu(p->blksize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005152
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005153 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005154
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005155 dec_rs_pending(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005156
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005157 if (get_ldev_if_state(device, D_FAILED)) {
5158 drbd_rs_complete_io(device, sector);
Andreas Gruenbachere05e1e52011-03-25 15:16:26 +01005159 switch (pi->cmd) {
Philipp Reisnerd612d302010-12-27 10:53:28 +01005160 case P_NEG_RS_DREPLY:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005161 drbd_rs_failed_io(device, sector, size);
Philipp Reisnerd612d302010-12-27 10:53:28 +01005162 case P_RS_CANCEL:
5163 break;
5164 default:
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005165 BUG();
Philipp Reisnerd612d302010-12-27 10:53:28 +01005166 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005167 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005168 }
5169
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005170 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005171}
5172
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005173static int got_BarrierAck(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005174{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005175 struct p_barrier_ack *p = pi->data;
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005176 struct drbd_peer_device *peer_device;
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005177 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005178
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005179 tl_release(connection, p->barrier, be32_to_cpu(p->set_size));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005180
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005181 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005182 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5183 struct drbd_device *device = peer_device->device;
5184
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005185 if (device->state.conn == C_AHEAD &&
5186 atomic_read(&device->ap_in_flight) == 0 &&
5187 !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &device->flags)) {
5188 device->start_resync_timer.expires = jiffies + HZ;
5189 add_timer(&device->start_resync_timer);
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005190 }
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02005191 }
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005192 rcu_read_unlock();
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02005193
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005194 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005195}
5196
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005197static int got_OVResult(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005198{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005199 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005200 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005201 struct p_block_ack *p = pi->data;
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02005202 struct drbd_device_work *dw;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005203 sector_t sector;
5204 int size;
5205
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005206 peer_device = conn_peer_device(connection, pi->vnr);
5207 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005208 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005209 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005210
Philipp Reisnerb411b362009-09-25 16:07:19 -07005211 sector = be64_to_cpu(p->sector);
5212 size = be32_to_cpu(p->blksize);
5213
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005214 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005215
5216 if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005217 drbd_ov_out_of_sync_found(device, sector, size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005218 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005219 ov_out_of_sync_print(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005220
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005221 if (!get_ldev(device))
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005222 return 0;
Lars Ellenberg1d53f092010-09-05 01:13:24 +02005223
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005224 drbd_rs_complete_io(device, sector);
5225 dec_rs_pending(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005226
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005227 --device->ov_left;
Lars Ellenbergea5442a2010-11-05 09:48:01 +01005228
5229 /* let's advance progress step marks only for every other megabyte */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005230 if ((device->ov_left & 0x200) == 0x200)
5231 drbd_advance_rs_marks(device, device->ov_left);
Lars Ellenbergea5442a2010-11-05 09:48:01 +01005232
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005233 if (device->ov_left == 0) {
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02005234 dw = kmalloc(sizeof(*dw), GFP_NOIO);
5235 if (dw) {
5236 dw->w.cb = w_ov_finished;
5237 dw->device = device;
5238 drbd_queue_work(&peer_device->connection->sender_work, &dw->w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005239 } else {
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02005240 drbd_err(device, "kmalloc(dw) failed.");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005241 ov_out_of_sync_print(device);
5242 drbd_resync_finished(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005243 }
5244 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005245 put_ldev(device);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005246 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005247}
5248
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005249static int got_skip(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisner0ced55a2010-04-30 15:26:20 +02005250{
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005251 return 0;
Philipp Reisner0ced55a2010-04-30 15:26:20 +02005252}
5253
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005254static int connection_finish_peer_reqs(struct drbd_connection *connection)
Philipp Reisner32862ec2011-02-08 16:41:01 +01005255{
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005256 struct drbd_peer_device *peer_device;
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005257 int vnr, not_empty = 0;
Philipp Reisner32862ec2011-02-08 16:41:01 +01005258
5259 do {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005260 clear_bit(SIGNAL_ASENDER, &connection->flags);
Philipp Reisner32862ec2011-02-08 16:41:01 +01005261 flush_signals(current);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005262
5263 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005264 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5265 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005266 kref_get(&device->kref);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005267 rcu_read_unlock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005268 if (drbd_finish_peer_reqs(device)) {
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02005269 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005270 return 1;
Philipp Reisnerd3fcb492011-04-13 14:46:05 -07005271 }
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02005272 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005273 rcu_read_lock();
Philipp Reisner082a3432011-03-15 16:05:42 +01005274 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005275 set_bit(SIGNAL_ASENDER, &connection->flags);
Philipp Reisner082a3432011-03-15 16:05:42 +01005276
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005277 spin_lock_irq(&connection->resource->req_lock);
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005278 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5279 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005280 not_empty = !list_empty(&device->done_ee);
Philipp Reisner082a3432011-03-15 16:05:42 +01005281 if (not_empty)
5282 break;
5283 }
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005284 spin_unlock_irq(&connection->resource->req_lock);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005285 rcu_read_unlock();
Philipp Reisner32862ec2011-02-08 16:41:01 +01005286 } while (not_empty);
5287
5288 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005289}
5290
5291struct asender_cmd {
5292 size_t pkt_size;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005293 int (*fn)(struct drbd_connection *connection, struct packet_info *);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005294};
5295
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01005296static struct asender_cmd asender_tbl[] = {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005297 [P_PING] = { 0, got_Ping },
5298 [P_PING_ACK] = { 0, got_PingAck },
Philipp Reisnerb411b362009-09-25 16:07:19 -07005299 [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
5300 [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
5301 [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02005302 [P_SUPERSEDED] = { sizeof(struct p_block_ack), got_BlockAck },
Philipp Reisnerb411b362009-09-25 16:07:19 -07005303 [P_NEG_ACK] = { sizeof(struct p_block_ack), got_NegAck },
5304 [P_NEG_DREPLY] = { sizeof(struct p_block_ack), got_NegDReply },
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005305 [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), got_NegRSDReply },
Philipp Reisnerb411b362009-09-25 16:07:19 -07005306 [P_OV_RESULT] = { sizeof(struct p_block_ack), got_OVResult },
5307 [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck },
5308 [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
5309 [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync },
Philipp Reisner02918be2010-08-20 14:35:10 +02005310 [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip },
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005311 [P_RS_CANCEL] = { sizeof(struct p_block_ack), got_NegRSDReply },
5312 [P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), got_conn_RqSReply },
5313 [P_RETRY_WRITE] = { sizeof(struct p_block_ack), got_BlockAck },
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01005314};
Philipp Reisnerb411b362009-09-25 16:07:19 -07005315
5316int drbd_asender(struct drbd_thread *thi)
5317{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005318 struct drbd_connection *connection = thi->connection;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005319 struct asender_cmd *cmd = NULL;
Philipp Reisner77351055b2011-02-07 17:24:26 +01005320 struct packet_info pi;
Philipp Reisner257d0af2011-01-26 12:15:29 +01005321 int rv;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005322 void *buf = connection->meta.rbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005323 int received = 0;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005324 unsigned int header_size = drbd_header_size(connection);
Andreas Gruenbacher52b061a2011-03-30 11:38:49 +02005325 int expect = header_size;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005326 bool ping_timeout_active = false;
5327 struct net_conf *nc;
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005328 int ping_timeo, tcp_cork, ping_int;
Philipp Reisner3990e042013-03-27 14:08:48 +01005329 struct sched_param param = { .sched_priority = 2 };
Philipp Reisnerb411b362009-09-25 16:07:19 -07005330
Philipp Reisner3990e042013-03-27 14:08:48 +01005331 rv = sched_setscheduler(current, SCHED_RR, &param);
5332 if (rv < 0)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005333 drbd_err(connection, "drbd_asender: ERROR set priority, ret=%d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005334
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +01005335 while (get_t_state(thi) == RUNNING) {
Philipp Reisner80822282011-02-08 12:46:30 +01005336 drbd_thread_current_set_cpu(thi);
Philipp Reisner44ed1672011-04-19 17:10:19 +02005337
5338 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005339 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +02005340 ping_timeo = nc->ping_timeo;
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005341 tcp_cork = nc->tcp_cork;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005342 ping_int = nc->ping_int;
5343 rcu_read_unlock();
5344
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005345 if (test_and_clear_bit(SEND_PING, &connection->flags)) {
5346 if (drbd_send_ping(connection)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005347 drbd_err(connection, "drbd_send_ping has failed\n");
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01005348 goto reconnect;
5349 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005350 connection->meta.socket->sk->sk_rcvtimeo = ping_timeo * HZ / 10;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005351 ping_timeout_active = true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005352 }
5353
Philipp Reisner32862ec2011-02-08 16:41:01 +01005354 /* TODO: conditionally cork; it may hurt latency if we cork without
5355 much to send */
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005356 if (tcp_cork)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005357 drbd_tcp_cork(connection->meta.socket);
5358 if (connection_finish_peer_reqs(connection)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005359 drbd_err(connection, "connection_finish_peer_reqs() failed\n");
Philipp Reisner32862ec2011-02-08 16:41:01 +01005360 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005361 }
5362 /* but unconditionally uncork unless disabled */
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005363 if (tcp_cork)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005364 drbd_tcp_uncork(connection->meta.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005365
5366 /* short circuit, recv_msg would return EINTR anyways. */
5367 if (signal_pending(current))
5368 continue;
5369
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005370 rv = drbd_recv_short(connection->meta.socket, buf, expect-received, 0);
5371 clear_bit(SIGNAL_ASENDER, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005372
5373 flush_signals(current);
5374
5375 /* Note:
5376 * -EINTR (on meta) we got a signal
5377 * -EAGAIN (on meta) rcvtimeo expired
5378 * -ECONNRESET other side closed the connection
5379 * -ERESTARTSYS (on data) we got a signal
5380 * rv < 0 other than above: unexpected error!
5381 * rv == expected: full header or command
5382 * rv < expected: "woken" by signal during receive
5383 * rv == 0 : "connection shut down by peer"
5384 */
5385 if (likely(rv > 0)) {
5386 received += rv;
5387 buf += rv;
5388 } else if (rv == 0) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005389 if (test_bit(DISCONNECT_SENT, &connection->flags)) {
Philipp Reisnerb66623e2012-08-08 21:19:09 +02005390 long t;
5391 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005392 t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
Philipp Reisnerb66623e2012-08-08 21:19:09 +02005393 rcu_read_unlock();
5394
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005395 t = wait_event_timeout(connection->ping_wait,
5396 connection->cstate < C_WF_REPORT_PARAMS,
Philipp Reisnerb66623e2012-08-08 21:19:09 +02005397 t);
Philipp Reisner599377a2012-08-17 14:50:22 +02005398 if (t)
5399 break;
5400 }
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005401 drbd_err(connection, "meta connection shut down by peer.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005402 goto reconnect;
5403 } else if (rv == -EAGAIN) {
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02005404 /* If the data socket received something meanwhile,
5405 * that is good enough: peer is still alive. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005406 if (time_after(connection->last_received,
5407 jiffies - connection->meta.socket->sk->sk_rcvtimeo))
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02005408 continue;
Lars Ellenbergf36af182011-03-09 22:44:55 +01005409 if (ping_timeout_active) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005410 drbd_err(connection, "PingAck did not arrive in time.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005411 goto reconnect;
5412 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005413 set_bit(SEND_PING, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005414 continue;
5415 } else if (rv == -EINTR) {
5416 continue;
5417 } else {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005418 drbd_err(connection, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005419 goto reconnect;
5420 }
5421
5422 if (received == expect && cmd == NULL) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005423 if (decode_header(connection, connection->meta.rbuf, &pi))
Philipp Reisnerb411b362009-09-25 16:07:19 -07005424 goto reconnect;
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01005425 cmd = &asender_tbl[pi.cmd];
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005426 if (pi.cmd >= ARRAY_SIZE(asender_tbl) || !cmd->fn) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005427 drbd_err(connection, "Unexpected meta packet %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02005428 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005429 goto disconnect;
5430 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005431 expect = header_size + cmd->pkt_size;
Andreas Gruenbacher52b061a2011-03-30 11:38:49 +02005432 if (pi.size != expect - header_size) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005433 drbd_err(connection, "Wrong packet size on meta (c: %d, l: %d)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01005434 pi.cmd, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005435 goto reconnect;
Philipp Reisner257d0af2011-01-26 12:15:29 +01005436 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005437 }
5438 if (received == expect) {
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005439 bool err;
Philipp Reisnera4fbda82011-03-16 11:13:17 +01005440
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005441 err = cmd->fn(connection, &pi);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005442 if (err) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005443 drbd_err(connection, "%pf failed\n", cmd->fn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005444 goto reconnect;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005445 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005446
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005447 connection->last_received = jiffies;
Lars Ellenbergf36af182011-03-09 22:44:55 +01005448
Philipp Reisner44ed1672011-04-19 17:10:19 +02005449 if (cmd == &asender_tbl[P_PING_ACK]) {
5450 /* restore idle timeout */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005451 connection->meta.socket->sk->sk_rcvtimeo = ping_int * HZ;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005452 ping_timeout_active = false;
5453 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005454
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005455 buf = connection->meta.rbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005456 received = 0;
Andreas Gruenbacher52b061a2011-03-30 11:38:49 +02005457 expect = header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005458 cmd = NULL;
5459 }
5460 }
5461
5462 if (0) {
5463reconnect:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005464 conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
5465 conn_md_sync(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005466 }
5467 if (0) {
5468disconnect:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005469 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005470 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005471 clear_bit(SIGNAL_ASENDER, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005472
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005473 drbd_info(connection, "asender terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005474
5475 return 0;
5476}