blob: 9640b645d5ba7dc0d0cc1c05f42c6a6fcf1a9264 [file] [log] [blame]
Philipp Reisnerb411b362009-09-25 16:07:19 -07001/*
2 drbd_receiver.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23 */
24
25
Philipp Reisnerb411b362009-09-25 16:07:19 -070026#include <linux/module.h>
27
28#include <asm/uaccess.h>
29#include <net/sock.h>
30
Philipp Reisnerb411b362009-09-25 16:07:19 -070031#include <linux/drbd.h>
32#include <linux/fs.h>
33#include <linux/file.h>
34#include <linux/in.h>
35#include <linux/mm.h>
36#include <linux/memcontrol.h>
37#include <linux/mm_inline.h>
38#include <linux/slab.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070039#include <linux/pkt_sched.h>
40#define __KERNEL_SYSCALLS__
41#include <linux/unistd.h>
42#include <linux/vmalloc.h>
43#include <linux/random.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070044#include <linux/string.h>
45#include <linux/scatterlist.h>
46#include "drbd_int.h"
Andreas Gruenbachera3603a62011-05-30 11:47:37 +020047#include "drbd_protocol.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070048#include "drbd_req.h"
49
50#include "drbd_vli.h"
51
Philipp Reisner77351055b2011-02-07 17:24:26 +010052struct packet_info {
53 enum drbd_packet cmd;
Andreas Gruenbachere2857212011-03-25 00:57:38 +010054 unsigned int size;
55 unsigned int vnr;
Andreas Gruenbachere6589832011-03-30 12:54:42 +020056 void *data;
Philipp Reisner77351055b2011-02-07 17:24:26 +010057};
58
Philipp Reisnerb411b362009-09-25 16:07:19 -070059enum finish_epoch {
60 FE_STILL_LIVE,
61 FE_DESTROYED,
62 FE_RECYCLED,
63};
64
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +020065static int drbd_do_features(struct drbd_connection *connection);
66static int drbd_do_auth(struct drbd_connection *connection);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +020067static int drbd_disconnected(struct drbd_peer_device *);
Lars Ellenberga0fb3c42014-04-28 18:43:23 +020068static void conn_wait_active_ee_empty(struct drbd_connection *connection);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +020069static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *, struct drbd_epoch *, enum epoch_event);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +010070static int e_end_block(struct drbd_work *, int);
Philipp Reisnerb411b362009-09-25 16:07:19 -070071
Philipp Reisnerb411b362009-09-25 16:07:19 -070072
73#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
74
Lars Ellenberg45bb9122010-05-14 17:10:48 +020075/*
76 * some helper functions to deal with single linked page lists,
77 * page->private being our "next" pointer.
78 */
79
80/* If at least n pages are linked at head, get n pages off.
81 * Otherwise, don't modify head, and return NULL.
82 * Locking is the responsibility of the caller.
83 */
84static struct page *page_chain_del(struct page **head, int n)
85{
86 struct page *page;
87 struct page *tmp;
88
89 BUG_ON(!n);
90 BUG_ON(!head);
91
92 page = *head;
Philipp Reisner23ce4222010-05-20 13:35:31 +020093
94 if (!page)
95 return NULL;
96
Lars Ellenberg45bb9122010-05-14 17:10:48 +020097 while (page) {
98 tmp = page_chain_next(page);
99 if (--n == 0)
100 break; /* found sufficient pages */
101 if (tmp == NULL)
102 /* insufficient pages, don't use any of them. */
103 return NULL;
104 page = tmp;
105 }
106
107 /* add end of list marker for the returned list */
108 set_page_private(page, 0);
109 /* actual return value, and adjustment of head */
110 page = *head;
111 *head = tmp;
112 return page;
113}
114
115/* may be used outside of locks to find the tail of a (usually short)
116 * "private" page chain, before adding it back to a global chain head
117 * with page_chain_add() under a spinlock. */
118static struct page *page_chain_tail(struct page *page, int *len)
119{
120 struct page *tmp;
121 int i = 1;
122 while ((tmp = page_chain_next(page)))
123 ++i, page = tmp;
124 if (len)
125 *len = i;
126 return page;
127}
128
129static int page_chain_free(struct page *page)
130{
131 struct page *tmp;
132 int i = 0;
133 page_chain_for_each_safe(page, tmp) {
134 put_page(page);
135 ++i;
136 }
137 return i;
138}
139
140static void page_chain_add(struct page **head,
141 struct page *chain_first, struct page *chain_last)
142{
143#if 1
144 struct page *tmp;
145 tmp = page_chain_tail(chain_first, NULL);
146 BUG_ON(tmp != chain_last);
147#endif
148
149 /* add chain to head */
150 set_page_private(chain_last, (unsigned long)*head);
151 *head = chain_first;
152}
153
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200154static struct page *__drbd_alloc_pages(struct drbd_device *device,
Andreas Gruenbacher18c2d522011-04-07 21:08:50 +0200155 unsigned int number)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700156{
157 struct page *page = NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200158 struct page *tmp = NULL;
Andreas Gruenbacher18c2d522011-04-07 21:08:50 +0200159 unsigned int i = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700160
161 /* Yes, testing drbd_pp_vacant outside the lock is racy.
162 * So what. It saves a spin_lock. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200163 if (drbd_pp_vacant >= number) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700164 spin_lock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200165 page = page_chain_del(&drbd_pp_pool, number);
166 if (page)
167 drbd_pp_vacant -= number;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700168 spin_unlock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200169 if (page)
170 return page;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700171 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200172
Philipp Reisnerb411b362009-09-25 16:07:19 -0700173 /* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD
174 * "criss-cross" setup, that might cause write-out on some other DRBD,
175 * which in turn might block on the other node at this very place. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200176 for (i = 0; i < number; i++) {
177 tmp = alloc_page(GFP_TRY);
178 if (!tmp)
179 break;
180 set_page_private(tmp, (unsigned long)page);
181 page = tmp;
182 }
183
184 if (i == number)
185 return page;
186
187 /* Not enough pages immediately available this time.
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200188 * No need to jump around here, drbd_alloc_pages will retry this
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200189 * function "soon". */
190 if (page) {
191 tmp = page_chain_tail(page, NULL);
192 spin_lock(&drbd_pp_lock);
193 page_chain_add(&drbd_pp_pool, page, tmp);
194 drbd_pp_vacant += i;
195 spin_unlock(&drbd_pp_lock);
196 }
197 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700198}
199
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200200static void reclaim_finished_net_peer_reqs(struct drbd_device *device,
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200201 struct list_head *to_be_freed)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700202{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200203 struct drbd_peer_request *peer_req, *tmp;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700204
205 /* The EEs are always appended to the end of the list. Since
206 they are sent in order over the wire, they have to finish
207 in order. As soon as we see the first not finished we can
208 stop to examine the list... */
209
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200210 list_for_each_entry_safe(peer_req, tmp, &device->net_ee, w.list) {
Andreas Gruenbacher045417f2011-04-07 21:34:24 +0200211 if (drbd_peer_req_has_active_page(peer_req))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700212 break;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200213 list_move(&peer_req->w.list, to_be_freed);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700214 }
215}
216
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200217static void drbd_kick_lo_and_reclaim_net(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700218{
219 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100220 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700221
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200222 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200223 reclaim_finished_net_peer_reqs(device, &reclaimed);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200224 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700225
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200226 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200227 drbd_free_net_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700228}
229
230/**
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200231 * drbd_alloc_pages() - Returns @number pages, retries forever (or until signalled)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200232 * @device: DRBD device.
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200233 * @number: number of pages requested
234 * @retry: whether to retry, if not enough pages are available right now
Philipp Reisnerb411b362009-09-25 16:07:19 -0700235 *
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200236 * Tries to allocate number pages, first from our own page pool, then from
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200237 * the kernel.
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200238 * Possibly retry until DRBD frees sufficient pages somewhere else.
239 *
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200240 * If this allocation would exceed the max_buffers setting, we throttle
241 * allocation (schedule_timeout) to give the system some room to breathe.
242 *
243 * We do not use max-buffers as hard limit, because it could lead to
244 * congestion and further to a distributed deadlock during online-verify or
245 * (checksum based) resync, if the max-buffers, socket buffer sizes and
246 * resync-rate settings are mis-configured.
247 *
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200248 * Returns a page chain linked via page->private.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700249 */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200250struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int number,
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200251 bool retry)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700252{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200253 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700254 struct page *page = NULL;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200255 struct net_conf *nc;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700256 DEFINE_WAIT(wait);
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200257 unsigned int mxb;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700258
Philipp Reisner44ed1672011-04-19 17:10:19 +0200259 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200260 nc = rcu_dereference(peer_device->connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200261 mxb = nc ? nc->max_buffers : 1000000;
262 rcu_read_unlock();
263
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200264 if (atomic_read(&device->pp_in_use) < mxb)
265 page = __drbd_alloc_pages(device, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700266
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200267 while (page == NULL) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700268 prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
269
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200270 drbd_kick_lo_and_reclaim_net(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700271
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200272 if (atomic_read(&device->pp_in_use) < mxb) {
273 page = __drbd_alloc_pages(device, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700274 if (page)
275 break;
276 }
277
278 if (!retry)
279 break;
280
281 if (signal_pending(current)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200282 drbd_warn(device, "drbd_alloc_pages interrupted!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700283 break;
284 }
285
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200286 if (schedule_timeout(HZ/10) == 0)
287 mxb = UINT_MAX;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700288 }
289 finish_wait(&drbd_pp_wait, &wait);
290
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200291 if (page)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200292 atomic_add(number, &device->pp_in_use);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700293 return page;
294}
295
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200296/* Must not be used from irq, as that may deadlock: see drbd_alloc_pages.
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200297 * Is also used from inside an other spin_lock_irq(&resource->req_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200298 * Either links the page chain back to the global pool,
299 * or returns all pages to the system. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200300static void drbd_free_pages(struct drbd_device *device, struct page *page, int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700301{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200302 atomic_t *a = is_net ? &device->pp_in_use_by_net : &device->pp_in_use;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700303 int i;
Lars Ellenberg435f0742010-09-06 12:30:25 +0200304
Lars Ellenberga73ff322012-06-25 19:15:38 +0200305 if (page == NULL)
306 return;
307
Philipp Reisner81a5d602011-02-22 19:53:16 -0500308 if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200309 i = page_chain_free(page);
310 else {
311 struct page *tmp;
312 tmp = page_chain_tail(page, &i);
313 spin_lock(&drbd_pp_lock);
314 page_chain_add(&drbd_pp_pool, page, tmp);
315 drbd_pp_vacant += i;
316 spin_unlock(&drbd_pp_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700317 }
Lars Ellenberg435f0742010-09-06 12:30:25 +0200318 i = atomic_sub_return(i, a);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200319 if (i < 0)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200320 drbd_warn(device, "ASSERTION FAILED: %s: %d < 0\n",
Lars Ellenberg435f0742010-09-06 12:30:25 +0200321 is_net ? "pp_in_use_by_net" : "pp_in_use", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700322 wake_up(&drbd_pp_wait);
323}
324
325/*
326You need to hold the req_lock:
327 _drbd_wait_ee_list_empty()
328
329You must not have the req_lock:
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +0200330 drbd_free_peer_req()
Andreas Gruenbacher0db55362011-04-06 16:09:15 +0200331 drbd_alloc_peer_req()
Andreas Gruenbacher7721f562011-04-06 17:14:02 +0200332 drbd_free_peer_reqs()
Philipp Reisnerb411b362009-09-25 16:07:19 -0700333 drbd_ee_fix_bhs()
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200334 drbd_finish_peer_reqs()
Philipp Reisnerb411b362009-09-25 16:07:19 -0700335 drbd_clear_done_ee()
336 drbd_wait_ee_list_empty()
337*/
338
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100339struct drbd_peer_request *
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200340drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
Lars Ellenberga0fb3c42014-04-28 18:43:23 +0200341 unsigned int data_size, bool has_payload, gfp_t gfp_mask) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700342{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200343 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100344 struct drbd_peer_request *peer_req;
Lars Ellenberga73ff322012-06-25 19:15:38 +0200345 struct page *page = NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200346 unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700347
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200348 if (drbd_insert_fault(device, DRBD_FAULT_AL_EE))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700349 return NULL;
350
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100351 peer_req = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
352 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700353 if (!(gfp_mask & __GFP_NOWARN))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200354 drbd_err(device, "%s: allocation failed\n", __func__);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700355 return NULL;
356 }
357
Lars Ellenberga0fb3c42014-04-28 18:43:23 +0200358 if (has_payload && data_size) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200359 page = drbd_alloc_pages(peer_device, nr_pages, (gfp_mask & __GFP_WAIT));
Lars Ellenberga73ff322012-06-25 19:15:38 +0200360 if (!page)
361 goto fail;
362 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700363
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100364 drbd_clear_interval(&peer_req->i);
365 peer_req->i.size = data_size;
366 peer_req->i.sector = sector;
367 peer_req->i.local = false;
368 peer_req->i.waiting = false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700369
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100370 peer_req->epoch = NULL;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200371 peer_req->peer_device = peer_device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100372 peer_req->pages = page;
373 atomic_set(&peer_req->pending_bios, 0);
374 peer_req->flags = 0;
Andreas Gruenbacher9a8e7752011-01-11 14:04:09 +0100375 /*
376 * The block_id is opaque to the receiver. It is not endianness
377 * converted, and sent back to the sender unchanged.
378 */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100379 peer_req->block_id = id;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700380
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100381 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700382
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200383 fail:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100384 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700385 return NULL;
386}
387
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200388void __drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request *peer_req,
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100389 int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700390{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100391 if (peer_req->flags & EE_HAS_DIGEST)
392 kfree(peer_req->digest);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200393 drbd_free_pages(device, peer_req->pages, is_net);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +0200394 D_ASSERT(device, atomic_read(&peer_req->pending_bios) == 0);
395 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100396 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700397}
398
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200399int drbd_free_peer_reqs(struct drbd_device *device, struct list_head *list)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700400{
401 LIST_HEAD(work_list);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100402 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700403 int count = 0;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200404 int is_net = list == &device->net_ee;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700405
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200406 spin_lock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700407 list_splice_init(list, &work_list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200408 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700409
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200410 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200411 __drbd_free_peer_req(device, peer_req, is_net);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700412 count++;
413 }
414 return count;
415}
416
Philipp Reisnerb411b362009-09-25 16:07:19 -0700417/*
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200418 * See also comments in _req_mod(,BARRIER_ACKED) and receive_Barrier.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700419 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200420static int drbd_finish_peer_reqs(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700421{
422 LIST_HEAD(work_list);
423 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100424 struct drbd_peer_request *peer_req, *t;
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100425 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700426
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200427 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200428 reclaim_finished_net_peer_reqs(device, &reclaimed);
429 list_splice_init(&device->done_ee, &work_list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200430 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700431
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200432 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200433 drbd_free_net_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700434
435 /* possible callbacks here:
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +0200436 * e_end_block, and e_end_resync_block, e_send_superseded.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700437 * all ignore the last argument.
438 */
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200439 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100440 int err2;
441
Philipp Reisnerb411b362009-09-25 16:07:19 -0700442 /* list_del not necessary, next/prev members not touched */
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200443 err2 = peer_req->w.cb(&peer_req->w, !!err);
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100444 if (!err)
445 err = err2;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200446 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700447 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200448 wake_up(&device->ee_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700449
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100450 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700451}
452
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200453static void _drbd_wait_ee_list_empty(struct drbd_device *device,
Andreas Gruenbacherd4da1532011-04-07 00:06:56 +0200454 struct list_head *head)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700455{
456 DEFINE_WAIT(wait);
457
458 /* avoids spin_lock/unlock
459 * and calling prepare_to_wait in the fast path */
460 while (!list_empty(head)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200461 prepare_to_wait(&device->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200462 spin_unlock_irq(&device->resource->req_lock);
Jens Axboe7eaceac2011-03-10 08:52:07 +0100463 io_schedule();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200464 finish_wait(&device->ee_wait, &wait);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200465 spin_lock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700466 }
467}
468
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200469static void drbd_wait_ee_list_empty(struct drbd_device *device,
Andreas Gruenbacherd4da1532011-04-07 00:06:56 +0200470 struct list_head *head)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700471{
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200472 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200473 _drbd_wait_ee_list_empty(device, head);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200474 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700475}
476
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100477static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700478{
Philipp Reisnerb411b362009-09-25 16:07:19 -0700479 struct kvec iov = {
480 .iov_base = buf,
481 .iov_len = size,
482 };
483 struct msghdr msg = {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700484 .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
485 };
Al Virof730c842014-02-08 21:07:38 -0500486 return kernel_recvmsg(sock, &msg, &iov, 1, size, msg.msg_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700487}
488
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200489static int drbd_recv(struct drbd_connection *connection, void *buf, size_t size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700490{
Philipp Reisnerb411b362009-09-25 16:07:19 -0700491 int rv;
492
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200493 rv = drbd_recv_short(connection->data.socket, buf, size, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700494
Philipp Reisnerdbd08202012-08-17 16:55:47 +0200495 if (rv < 0) {
496 if (rv == -ECONNRESET)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200497 drbd_info(connection, "sock was reset by peer\n");
Philipp Reisnerdbd08202012-08-17 16:55:47 +0200498 else if (rv != -ERESTARTSYS)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200499 drbd_err(connection, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerdbd08202012-08-17 16:55:47 +0200500 } else if (rv == 0) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200501 if (test_bit(DISCONNECT_SENT, &connection->flags)) {
Philipp Reisnerb66623e2012-08-08 21:19:09 +0200502 long t;
503 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200504 t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
Philipp Reisnerb66623e2012-08-08 21:19:09 +0200505 rcu_read_unlock();
506
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200507 t = wait_event_timeout(connection->ping_wait, connection->cstate < C_WF_REPORT_PARAMS, t);
Philipp Reisnerb66623e2012-08-08 21:19:09 +0200508
Philipp Reisner599377a2012-08-17 14:50:22 +0200509 if (t)
510 goto out;
511 }
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200512 drbd_info(connection, "sock was shut down by peer\n");
Philipp Reisner599377a2012-08-17 14:50:22 +0200513 }
514
Philipp Reisnerb411b362009-09-25 16:07:19 -0700515 if (rv != size)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200516 conn_request_state(connection, NS(conn, C_BROKEN_PIPE), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700517
Philipp Reisner599377a2012-08-17 14:50:22 +0200518out:
Philipp Reisnerb411b362009-09-25 16:07:19 -0700519 return rv;
520}
521
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200522static int drbd_recv_all(struct drbd_connection *connection, void *buf, size_t size)
Andreas Gruenbacherc6967742011-03-17 17:15:20 +0100523{
524 int err;
525
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200526 err = drbd_recv(connection, buf, size);
Andreas Gruenbacherc6967742011-03-17 17:15:20 +0100527 if (err != size) {
528 if (err >= 0)
529 err = -EIO;
530 } else
531 err = 0;
532 return err;
533}
534
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200535static int drbd_recv_all_warn(struct drbd_connection *connection, void *buf, size_t size)
Andreas Gruenbachera5c31902011-03-24 03:28:04 +0100536{
537 int err;
538
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200539 err = drbd_recv_all(connection, buf, size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +0100540 if (err && !signal_pending(current))
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200541 drbd_warn(connection, "short read (expected size %d)\n", (int)size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +0100542 return err;
543}
544
Lars Ellenberg5dbf1672010-05-25 16:18:01 +0200545/* quoting tcp(7):
546 * On individual connections, the socket buffer size must be set prior to the
547 * listen(2) or connect(2) calls in order to have it take effect.
548 * This is our wrapper to do so.
549 */
550static void drbd_setbufsize(struct socket *sock, unsigned int snd,
551 unsigned int rcv)
552{
553 /* open coded SO_SNDBUF, SO_RCVBUF */
554 if (snd) {
555 sock->sk->sk_sndbuf = snd;
556 sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
557 }
558 if (rcv) {
559 sock->sk->sk_rcvbuf = rcv;
560 sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
561 }
562}
563
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200564static struct socket *drbd_try_connect(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700565{
566 const char *what;
567 struct socket *sock;
568 struct sockaddr_in6 src_in6;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200569 struct sockaddr_in6 peer_in6;
570 struct net_conf *nc;
571 int err, peer_addr_len, my_addr_len;
Andreas Gruenbacher69ef82d2011-05-11 14:34:35 +0200572 int sndbuf_size, rcvbuf_size, connect_int;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700573 int disconnect_on_error = 1;
574
Philipp Reisner44ed1672011-04-19 17:10:19 +0200575 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200576 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200577 if (!nc) {
578 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700579 return NULL;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200580 }
Philipp Reisner44ed1672011-04-19 17:10:19 +0200581 sndbuf_size = nc->sndbuf_size;
582 rcvbuf_size = nc->rcvbuf_size;
Andreas Gruenbacher69ef82d2011-05-11 14:34:35 +0200583 connect_int = nc->connect_int;
Andreas Gruenbacher089c0752011-06-14 18:28:09 +0200584 rcu_read_unlock();
Philipp Reisner44ed1672011-04-19 17:10:19 +0200585
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200586 my_addr_len = min_t(int, connection->my_addr_len, sizeof(src_in6));
587 memcpy(&src_in6, &connection->my_addr, my_addr_len);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200588
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200589 if (((struct sockaddr *)&connection->my_addr)->sa_family == AF_INET6)
Philipp Reisner44ed1672011-04-19 17:10:19 +0200590 src_in6.sin6_port = 0;
591 else
592 ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
593
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200594 peer_addr_len = min_t(int, connection->peer_addr_len, sizeof(src_in6));
595 memcpy(&peer_in6, &connection->peer_addr, peer_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700596
597 what = "sock_create_kern";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200598 err = sock_create_kern(((struct sockaddr *)&src_in6)->sa_family,
599 SOCK_STREAM, IPPROTO_TCP, &sock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700600 if (err < 0) {
601 sock = NULL;
602 goto out;
603 }
604
605 sock->sk->sk_rcvtimeo =
Andreas Gruenbacher69ef82d2011-05-11 14:34:35 +0200606 sock->sk->sk_sndtimeo = connect_int * HZ;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200607 drbd_setbufsize(sock, sndbuf_size, rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700608
609 /* explicitly bind to the configured IP as source IP
610 * for the outgoing connections.
611 * This is needed for multihomed hosts and to be
612 * able to use lo: interfaces for drbd.
613 * Make sure to use 0 as port number, so linux selects
614 * a free one dynamically.
615 */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700616 what = "bind before connect";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200617 err = sock->ops->bind(sock, (struct sockaddr *) &src_in6, my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700618 if (err < 0)
619 goto out;
620
621 /* connect may fail, peer not yet available.
622 * stay C_WF_CONNECTION, don't go Disconnecting! */
623 disconnect_on_error = 0;
624 what = "connect";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200625 err = sock->ops->connect(sock, (struct sockaddr *) &peer_in6, peer_addr_len, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700626
627out:
628 if (err < 0) {
629 if (sock) {
630 sock_release(sock);
631 sock = NULL;
632 }
633 switch (-err) {
634 /* timeout, busy, signal pending */
635 case ETIMEDOUT: case EAGAIN: case EINPROGRESS:
636 case EINTR: case ERESTARTSYS:
637 /* peer not (yet) available, network problem */
638 case ECONNREFUSED: case ENETUNREACH:
639 case EHOSTDOWN: case EHOSTUNREACH:
640 disconnect_on_error = 0;
641 break;
642 default:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200643 drbd_err(connection, "%s failed, err = %d\n", what, err);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700644 }
645 if (disconnect_on_error)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200646 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700647 }
Philipp Reisner44ed1672011-04-19 17:10:19 +0200648
Philipp Reisnerb411b362009-09-25 16:07:19 -0700649 return sock;
650}
651
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200652struct accept_wait_data {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200653 struct drbd_connection *connection;
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200654 struct socket *s_listen;
655 struct completion door_bell;
656 void (*original_sk_state_change)(struct sock *sk);
657
658};
659
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200660static void drbd_incoming_connection(struct sock *sk)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700661{
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200662 struct accept_wait_data *ad = sk->sk_user_data;
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200663 void (*state_change)(struct sock *sk);
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200664
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200665 state_change = ad->original_sk_state_change;
666 if (sk->sk_state == TCP_ESTABLISHED)
667 complete(&ad->door_bell);
668 state_change(sk);
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200669}
670
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200671static int prepare_listen_socket(struct drbd_connection *connection, struct accept_wait_data *ad)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700672{
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200673 int err, sndbuf_size, rcvbuf_size, my_addr_len;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200674 struct sockaddr_in6 my_addr;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200675 struct socket *s_listen;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200676 struct net_conf *nc;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700677 const char *what;
678
Philipp Reisner44ed1672011-04-19 17:10:19 +0200679 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200680 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200681 if (!nc) {
682 rcu_read_unlock();
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200683 return -EIO;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200684 }
Philipp Reisner44ed1672011-04-19 17:10:19 +0200685 sndbuf_size = nc->sndbuf_size;
686 rcvbuf_size = nc->rcvbuf_size;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200687 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700688
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200689 my_addr_len = min_t(int, connection->my_addr_len, sizeof(struct sockaddr_in6));
690 memcpy(&my_addr, &connection->my_addr, my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700691
692 what = "sock_create_kern";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200693 err = sock_create_kern(((struct sockaddr *)&my_addr)->sa_family,
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200694 SOCK_STREAM, IPPROTO_TCP, &s_listen);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700695 if (err) {
696 s_listen = NULL;
697 goto out;
698 }
699
Philipp Reisner98683652012-11-09 14:18:43 +0100700 s_listen->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
Philipp Reisner44ed1672011-04-19 17:10:19 +0200701 drbd_setbufsize(s_listen, sndbuf_size, rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700702
703 what = "bind before listen";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200704 err = s_listen->ops->bind(s_listen, (struct sockaddr *)&my_addr, my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700705 if (err < 0)
706 goto out;
707
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200708 ad->s_listen = s_listen;
709 write_lock_bh(&s_listen->sk->sk_callback_lock);
710 ad->original_sk_state_change = s_listen->sk->sk_state_change;
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200711 s_listen->sk->sk_state_change = drbd_incoming_connection;
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200712 s_listen->sk->sk_user_data = ad;
713 write_unlock_bh(&s_listen->sk->sk_callback_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700714
Philipp Reisner2820fd32012-07-12 10:22:48 +0200715 what = "listen";
716 err = s_listen->ops->listen(s_listen, 5);
717 if (err < 0)
718 goto out;
719
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200720 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700721out:
722 if (s_listen)
723 sock_release(s_listen);
724 if (err < 0) {
725 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200726 drbd_err(connection, "%s failed, err = %d\n", what, err);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200727 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700728 }
729 }
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200730
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200731 return -EIO;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200732}
733
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200734static void unregister_state_change(struct sock *sk, struct accept_wait_data *ad)
735{
736 write_lock_bh(&sk->sk_callback_lock);
737 sk->sk_state_change = ad->original_sk_state_change;
738 sk->sk_user_data = NULL;
739 write_unlock_bh(&sk->sk_callback_lock);
740}
741
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200742static struct socket *drbd_wait_for_connect(struct drbd_connection *connection, struct accept_wait_data *ad)
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200743{
744 int timeo, connect_int, err = 0;
745 struct socket *s_estab = NULL;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200746 struct net_conf *nc;
747
748 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200749 nc = rcu_dereference(connection->net_conf);
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200750 if (!nc) {
751 rcu_read_unlock();
752 return NULL;
753 }
754 connect_int = nc->connect_int;
755 rcu_read_unlock();
756
757 timeo = connect_int * HZ;
Akinobu Mita38b682b22013-04-29 16:21:31 -0700758 /* 28.5% random jitter */
759 timeo += (prandom_u32() & 1) ? timeo / 7 : -timeo / 7;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200760
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200761 err = wait_for_completion_interruptible_timeout(&ad->door_bell, timeo);
762 if (err <= 0)
763 return NULL;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200764
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200765 err = kernel_accept(ad->s_listen, &s_estab, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700766 if (err < 0) {
767 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200768 drbd_err(connection, "accept failed, err = %d\n", err);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200769 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700770 }
771 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700772
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200773 if (s_estab)
774 unregister_state_change(s_estab->sk, ad);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700775
776 return s_estab;
777}
778
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200779static int decode_header(struct drbd_connection *, void *, struct packet_info *);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700780
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200781static int send_first_packet(struct drbd_connection *connection, struct drbd_socket *sock,
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200782 enum drbd_packet cmd)
783{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200784 if (!conn_prepare_command(connection, sock))
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200785 return -EIO;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200786 return conn_send_command(connection, sock, cmd, 0, NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700787}
788
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200789static int receive_first_packet(struct drbd_connection *connection, struct socket *sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700790{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200791 unsigned int header_size = drbd_header_size(connection);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200792 struct packet_info pi;
793 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700794
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200795 err = drbd_recv_short(sock, connection->data.rbuf, header_size, 0);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200796 if (err != header_size) {
797 if (err >= 0)
798 err = -EIO;
799 return err;
800 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200801 err = decode_header(connection, connection->data.rbuf, &pi);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200802 if (err)
803 return err;
804 return pi.cmd;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700805}
806
807/**
808 * drbd_socket_okay() - Free the socket if its connection is not okay
Philipp Reisnerb411b362009-09-25 16:07:19 -0700809 * @sock: pointer to the pointer to the socket.
810 */
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100811static int drbd_socket_okay(struct socket **sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700812{
813 int rr;
814 char tb[4];
815
816 if (!*sock)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100817 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700818
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100819 rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700820
821 if (rr > 0 || rr == -EAGAIN) {
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100822 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700823 } else {
824 sock_release(*sock);
825 *sock = NULL;
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100826 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700827 }
828}
Philipp Reisner2325eb62011-03-15 16:56:18 +0100829/* Gets called if a connection is established, or if a new minor gets created
830 in a connection */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200831int drbd_connected(struct drbd_peer_device *peer_device)
Philipp Reisner907599e2011-02-08 11:25:37 +0100832{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200833 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100834 int err;
Philipp Reisner907599e2011-02-08 11:25:37 +0100835
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200836 atomic_set(&device->packet_seq, 0);
837 device->peer_seq = 0;
Philipp Reisner907599e2011-02-08 11:25:37 +0100838
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200839 device->state_mutex = peer_device->connection->agreed_pro_version < 100 ?
840 &peer_device->connection->cstate_mutex :
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200841 &device->own_state_mutex;
Philipp Reisner8410da82011-02-11 20:11:10 +0100842
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200843 err = drbd_send_sync_param(peer_device);
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100844 if (!err)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200845 err = drbd_send_sizes(peer_device, 0, 0);
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100846 if (!err)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200847 err = drbd_send_uuids(peer_device);
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100848 if (!err)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200849 err = drbd_send_current_state(peer_device);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200850 clear_bit(USE_DEGR_WFC_T, &device->flags);
851 clear_bit(RESIZE_PENDING, &device->flags);
852 atomic_set(&device->ap_in_flight, 0);
853 mod_timer(&device->request_timer, jiffies + HZ); /* just start it here. */
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100854 return err;
Philipp Reisner907599e2011-02-08 11:25:37 +0100855}
Philipp Reisnerb411b362009-09-25 16:07:19 -0700856
857/*
858 * return values:
859 * 1 yes, we have a valid connection
860 * 0 oops, did not work out, please try again
861 * -1 peer talks different language,
862 * no point in trying again, please go standalone.
863 * -2 We do not have a network config...
864 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200865static int conn_connect(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700866{
Philipp Reisner7da35862011-12-19 22:42:56 +0100867 struct drbd_socket sock, msock;
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +0200868 struct drbd_peer_device *peer_device;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200869 struct net_conf *nc;
Philipp Reisner92f14952012-08-01 11:41:01 +0200870 int vnr, timeout, h, ok;
Philipp Reisner08b165b2011-09-05 16:22:33 +0200871 bool discard_my_data;
Philipp Reisner197296f2012-03-26 16:47:11 +0200872 enum drbd_state_rv rv;
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200873 struct accept_wait_data ad = {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200874 .connection = connection,
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200875 .door_bell = COMPLETION_INITIALIZER_ONSTACK(ad.door_bell),
876 };
Philipp Reisnerb411b362009-09-25 16:07:19 -0700877
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200878 clear_bit(DISCONNECT_SENT, &connection->flags);
879 if (conn_request_state(connection, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700880 return -2;
881
Philipp Reisner7da35862011-12-19 22:42:56 +0100882 mutex_init(&sock.mutex);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200883 sock.sbuf = connection->data.sbuf;
884 sock.rbuf = connection->data.rbuf;
Philipp Reisner7da35862011-12-19 22:42:56 +0100885 sock.socket = NULL;
886 mutex_init(&msock.mutex);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200887 msock.sbuf = connection->meta.sbuf;
888 msock.rbuf = connection->meta.rbuf;
Philipp Reisner7da35862011-12-19 22:42:56 +0100889 msock.socket = NULL;
890
Andreas Gruenbacher0916e0e2011-03-21 14:10:15 +0100891 /* Assume that the peer only understands protocol 80 until we know better. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200892 connection->agreed_pro_version = 80;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700893
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200894 if (prepare_listen_socket(connection, &ad))
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200895 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700896
897 do {
Andreas Gruenbacher2bf89622011-03-28 16:33:12 +0200898 struct socket *s;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700899
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200900 s = drbd_try_connect(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700901 if (s) {
Philipp Reisner7da35862011-12-19 22:42:56 +0100902 if (!sock.socket) {
903 sock.socket = s;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200904 send_first_packet(connection, &sock, P_INITIAL_DATA);
Philipp Reisner7da35862011-12-19 22:42:56 +0100905 } else if (!msock.socket) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200906 clear_bit(RESOLVE_CONFLICTS, &connection->flags);
Philipp Reisner7da35862011-12-19 22:42:56 +0100907 msock.socket = s;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200908 send_first_packet(connection, &msock, P_INITIAL_META);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700909 } else {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200910 drbd_err(connection, "Logic error in conn_connect()\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700911 goto out_release_sockets;
912 }
913 }
914
Philipp Reisner7da35862011-12-19 22:42:56 +0100915 if (sock.socket && msock.socket) {
916 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200917 nc = rcu_dereference(connection->net_conf);
Philipp Reisner7da35862011-12-19 22:42:56 +0100918 timeout = nc->ping_timeo * HZ / 10;
919 rcu_read_unlock();
920 schedule_timeout_interruptible(timeout);
921 ok = drbd_socket_okay(&sock.socket);
922 ok = drbd_socket_okay(&msock.socket) && ok;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700923 if (ok)
924 break;
925 }
926
927retry:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200928 s = drbd_wait_for_connect(connection, &ad);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700929 if (s) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200930 int fp = receive_first_packet(connection, s);
Philipp Reisner7da35862011-12-19 22:42:56 +0100931 drbd_socket_okay(&sock.socket);
932 drbd_socket_okay(&msock.socket);
Philipp Reisner92f14952012-08-01 11:41:01 +0200933 switch (fp) {
Andreas Gruenbachere5d6f332011-03-28 16:44:40 +0200934 case P_INITIAL_DATA:
Philipp Reisner7da35862011-12-19 22:42:56 +0100935 if (sock.socket) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200936 drbd_warn(connection, "initial packet S crossed\n");
Philipp Reisner7da35862011-12-19 22:42:56 +0100937 sock_release(sock.socket);
Philipp Reisner80c6eed2012-08-01 14:53:39 +0200938 sock.socket = s;
939 goto randomize;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700940 }
Philipp Reisner7da35862011-12-19 22:42:56 +0100941 sock.socket = s;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700942 break;
Andreas Gruenbachere5d6f332011-03-28 16:44:40 +0200943 case P_INITIAL_META:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200944 set_bit(RESOLVE_CONFLICTS, &connection->flags);
Philipp Reisner7da35862011-12-19 22:42:56 +0100945 if (msock.socket) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200946 drbd_warn(connection, "initial packet M crossed\n");
Philipp Reisner7da35862011-12-19 22:42:56 +0100947 sock_release(msock.socket);
Philipp Reisner80c6eed2012-08-01 14:53:39 +0200948 msock.socket = s;
949 goto randomize;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700950 }
Philipp Reisner7da35862011-12-19 22:42:56 +0100951 msock.socket = s;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700952 break;
953 default:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200954 drbd_warn(connection, "Error receiving initial packet\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700955 sock_release(s);
Philipp Reisner80c6eed2012-08-01 14:53:39 +0200956randomize:
Akinobu Mita38b682b22013-04-29 16:21:31 -0700957 if (prandom_u32() & 1)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700958 goto retry;
959 }
960 }
961
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200962 if (connection->cstate <= C_DISCONNECTING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700963 goto out_release_sockets;
964 if (signal_pending(current)) {
965 flush_signals(current);
966 smp_rmb();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200967 if (get_t_state(&connection->receiver) == EXITING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700968 goto out_release_sockets;
969 }
970
Philipp Reisnerb666dbf2012-07-26 14:12:59 +0200971 ok = drbd_socket_okay(&sock.socket);
972 ok = drbd_socket_okay(&msock.socket) && ok;
973 } while (!ok);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700974
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200975 if (ad.s_listen)
976 sock_release(ad.s_listen);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700977
Philipp Reisner98683652012-11-09 14:18:43 +0100978 sock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
979 msock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700980
Philipp Reisner7da35862011-12-19 22:42:56 +0100981 sock.socket->sk->sk_allocation = GFP_NOIO;
982 msock.socket->sk->sk_allocation = GFP_NOIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700983
Philipp Reisner7da35862011-12-19 22:42:56 +0100984 sock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
985 msock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700986
Philipp Reisnerb411b362009-09-25 16:07:19 -0700987 /* NOT YET ...
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200988 * sock.socket->sk->sk_sndtimeo = connection->net_conf->timeout*HZ/10;
Philipp Reisner7da35862011-12-19 22:42:56 +0100989 * sock.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
Andreas Gruenbacher60381782011-03-28 17:05:50 +0200990 * first set it to the P_CONNECTION_FEATURES timeout,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700991 * which we set to 4x the configured ping_timeout. */
Philipp Reisner44ed1672011-04-19 17:10:19 +0200992 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200993 nc = rcu_dereference(connection->net_conf);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700994
Philipp Reisner7da35862011-12-19 22:42:56 +0100995 sock.socket->sk->sk_sndtimeo =
996 sock.socket->sk->sk_rcvtimeo = nc->ping_timeo*4*HZ/10;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200997
Philipp Reisner7da35862011-12-19 22:42:56 +0100998 msock.socket->sk->sk_rcvtimeo = nc->ping_int*HZ;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200999 timeout = nc->timeout * HZ / 10;
Philipp Reisner08b165b2011-09-05 16:22:33 +02001000 discard_my_data = nc->discard_my_data;
Philipp Reisner44ed1672011-04-19 17:10:19 +02001001 rcu_read_unlock();
1002
Philipp Reisner7da35862011-12-19 22:42:56 +01001003 msock.socket->sk->sk_sndtimeo = timeout;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001004
1005 /* we don't want delays.
Lucas De Marchi25985ed2011-03-30 22:57:33 -03001006 * we use TCP_CORK where appropriate, though */
Philipp Reisner7da35862011-12-19 22:42:56 +01001007 drbd_tcp_nodelay(sock.socket);
1008 drbd_tcp_nodelay(msock.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001009
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001010 connection->data.socket = sock.socket;
1011 connection->meta.socket = msock.socket;
1012 connection->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001013
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001014 h = drbd_do_features(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001015 if (h <= 0)
1016 return h;
1017
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001018 if (connection->cram_hmac_tfm) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001019 /* drbd_request_state(device, NS(conn, WFAuth)); */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001020 switch (drbd_do_auth(connection)) {
Johannes Thomab10d96c2010-01-07 16:02:50 +01001021 case -1:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001022 drbd_err(connection, "Authentication of peer failed\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001023 return -1;
Johannes Thomab10d96c2010-01-07 16:02:50 +01001024 case 0:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001025 drbd_err(connection, "Authentication of peer failed, trying again.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01001026 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001027 }
1028 }
1029
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001030 connection->data.socket->sk->sk_sndtimeo = timeout;
1031 connection->data.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001032
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001033 if (drbd_send_protocol(connection) == -EOPNOTSUPP)
Philipp Reisner7e2455c2010-04-22 14:50:23 +02001034 return -1;
Philipp Reisner1e86ac42011-08-04 10:33:08 +02001035
Philipp Reisner31007742014-04-28 18:43:12 +02001036 /* Prevent a race between resync-handshake and
1037 * being promoted to Primary.
1038 *
1039 * Grab and release the state mutex, so we know that any current
1040 * drbd_set_role() is finished, and any incoming drbd_set_role
1041 * will see the STATE_SENT flag, and wait for it to be cleared.
1042 */
1043 idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
1044 mutex_lock(peer_device->device->state_mutex);
1045
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001046 set_bit(STATE_SENT, &connection->flags);
Philipp Reisner197296f2012-03-26 16:47:11 +02001047
Philipp Reisner31007742014-04-28 18:43:12 +02001048 idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
1049 mutex_unlock(peer_device->device->state_mutex);
1050
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02001051 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001052 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1053 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001054 kref_get(&device->kref);
Andreas Gruenbacher26ea8f92013-06-25 16:50:03 +02001055 rcu_read_unlock();
1056
Philipp Reisner08b165b2011-09-05 16:22:33 +02001057 if (discard_my_data)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001058 set_bit(DISCARD_MY_DATA, &device->flags);
Philipp Reisner08b165b2011-09-05 16:22:33 +02001059 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001060 clear_bit(DISCARD_MY_DATA, &device->flags);
Philipp Reisner08b165b2011-09-05 16:22:33 +02001061
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001062 drbd_connected(peer_device);
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001063 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02001064 rcu_read_lock();
1065 }
1066 rcu_read_unlock();
1067
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001068 rv = conn_request_state(connection, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE);
1069 if (rv < SS_SUCCESS || connection->cstate != C_WF_REPORT_PARAMS) {
1070 clear_bit(STATE_SENT, &connection->flags);
Philipp Reisner1e86ac42011-08-04 10:33:08 +02001071 return 0;
Philipp Reisnera1096a62012-04-06 12:07:34 +02001072 }
Philipp Reisner1e86ac42011-08-04 10:33:08 +02001073
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001074 drbd_thread_start(&connection->asender);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001075
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001076 mutex_lock(&connection->resource->conf_update);
Philipp Reisner08b165b2011-09-05 16:22:33 +02001077 /* The discard_my_data flag is a single-shot modifier to the next
1078 * connection attempt, the handshake of which is now well underway.
1079 * No need for rcu style copying of the whole struct
1080 * just to clear a single value. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001081 connection->net_conf->discard_my_data = 0;
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001082 mutex_unlock(&connection->resource->conf_update);
Philipp Reisner08b165b2011-09-05 16:22:33 +02001083
Philipp Reisnerd3fcb492011-04-13 14:46:05 -07001084 return h;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001085
1086out_release_sockets:
Philipp Reisner7a426fd2012-07-12 14:22:37 +02001087 if (ad.s_listen)
1088 sock_release(ad.s_listen);
Philipp Reisner7da35862011-12-19 22:42:56 +01001089 if (sock.socket)
1090 sock_release(sock.socket);
1091 if (msock.socket)
1092 sock_release(msock.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001093 return -1;
1094}
1095
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001096static int decode_header(struct drbd_connection *connection, void *header, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001097{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001098 unsigned int header_size = drbd_header_size(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001099
Andreas Gruenbacher0c8e36d2011-03-30 16:00:17 +02001100 if (header_size == sizeof(struct p_header100) &&
1101 *(__be32 *)header == cpu_to_be32(DRBD_MAGIC_100)) {
1102 struct p_header100 *h = header;
1103 if (h->pad != 0) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001104 drbd_err(connection, "Header padding is not zero\n");
Andreas Gruenbacher0c8e36d2011-03-30 16:00:17 +02001105 return -EINVAL;
1106 }
1107 pi->vnr = be16_to_cpu(h->volume);
1108 pi->cmd = be16_to_cpu(h->command);
1109 pi->size = be32_to_cpu(h->length);
1110 } else if (header_size == sizeof(struct p_header95) &&
1111 *(__be16 *)header == cpu_to_be16(DRBD_MAGIC_BIG)) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001112 struct p_header95 *h = header;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001113 pi->cmd = be16_to_cpu(h->command);
Andreas Gruenbacherb55d84b2011-03-22 13:17:47 +01001114 pi->size = be32_to_cpu(h->length);
1115 pi->vnr = 0;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001116 } else if (header_size == sizeof(struct p_header80) &&
1117 *(__be32 *)header == cpu_to_be32(DRBD_MAGIC)) {
1118 struct p_header80 *h = header;
1119 pi->cmd = be16_to_cpu(h->command);
1120 pi->size = be16_to_cpu(h->length);
Philipp Reisner77351055b2011-02-07 17:24:26 +01001121 pi->vnr = 0;
Philipp Reisner02918be2010-08-20 14:35:10 +02001122 } else {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001123 drbd_err(connection, "Wrong magic value 0x%08x in protocol version %d\n",
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001124 be32_to_cpu(*(__be32 *)header),
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001125 connection->agreed_pro_version);
Andreas Gruenbacher8172f3e2011-03-16 17:22:39 +01001126 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001127 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001128 pi->data = header + header_size;
Andreas Gruenbacher8172f3e2011-03-16 17:22:39 +01001129 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001130}
1131
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001132static int drbd_recv_header(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisner257d0af2011-01-26 12:15:29 +01001133{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001134 void *buffer = connection->data.rbuf;
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001135 int err;
Philipp Reisner257d0af2011-01-26 12:15:29 +01001136
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001137 err = drbd_recv_all_warn(connection, buffer, drbd_header_size(connection));
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001138 if (err)
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001139 return err;
Philipp Reisner257d0af2011-01-26 12:15:29 +01001140
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001141 err = decode_header(connection, buffer, pi);
1142 connection->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001143
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001144 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001145}
1146
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001147static void drbd_flush(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001148{
1149 int rv;
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001150 struct drbd_peer_device *peer_device;
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001151 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001152
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001153 if (connection->write_ordering >= WO_bdev_flush) {
Lars Ellenberg615e0872011-11-17 14:32:12 +01001154 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001155 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1156 struct drbd_device *device = peer_device->device;
1157
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001158 if (!get_ldev(device))
Lars Ellenberg615e0872011-11-17 14:32:12 +01001159 continue;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001160 kref_get(&device->kref);
Lars Ellenberg615e0872011-11-17 14:32:12 +01001161 rcu_read_unlock();
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001162
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001163 rv = blkdev_issue_flush(device->ldev->backing_bdev,
Lars Ellenberg615e0872011-11-17 14:32:12 +01001164 GFP_NOIO, NULL);
1165 if (rv) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001166 drbd_info(device, "local disk flush failed with status %d\n", rv);
Lars Ellenberg615e0872011-11-17 14:32:12 +01001167 /* would rather check on EOPNOTSUPP, but that is not reliable.
1168 * don't try again for ANY return value != 0
1169 * if (rv == -EOPNOTSUPP) */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001170 drbd_bump_write_ordering(connection, WO_drain_io);
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001171 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001172 put_ldev(device);
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001173 kref_put(&device->kref, drbd_destroy_device);
Lars Ellenberg615e0872011-11-17 14:32:12 +01001174
1175 rcu_read_lock();
1176 if (rv)
1177 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001178 }
Lars Ellenberg615e0872011-11-17 14:32:12 +01001179 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07001180 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001181}
1182
1183/**
1184 * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it.
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001185 * @device: DRBD device.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001186 * @epoch: Epoch object.
1187 * @ev: Epoch event.
1188 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001189static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *connection,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001190 struct drbd_epoch *epoch,
1191 enum epoch_event ev)
1192{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001193 int epoch_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001194 struct drbd_epoch *next_epoch;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001195 enum finish_epoch rv = FE_STILL_LIVE;
1196
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001197 spin_lock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001198 do {
1199 next_epoch = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001200
1201 epoch_size = atomic_read(&epoch->epoch_size);
1202
1203 switch (ev & ~EV_CLEANUP) {
1204 case EV_PUT:
1205 atomic_dec(&epoch->active);
1206 break;
1207 case EV_GOT_BARRIER_NR:
1208 set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001209 break;
1210 case EV_BECAME_LAST:
1211 /* nothing to do*/
1212 break;
1213 }
1214
Philipp Reisnerb411b362009-09-25 16:07:19 -07001215 if (epoch_size != 0 &&
1216 atomic_read(&epoch->active) == 0 &&
Philipp Reisner80f9fd52011-07-18 15:45:15 +02001217 (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) || ev & EV_CLEANUP)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001218 if (!(ev & EV_CLEANUP)) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001219 spin_unlock(&connection->epoch_lock);
1220 drbd_send_b_ack(epoch->connection, epoch->barrier_nr, epoch_size);
1221 spin_lock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001222 }
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001223#if 0
1224 /* FIXME: dec unacked on connection, once we have
1225 * something to count pending connection packets in. */
Philipp Reisner80f9fd52011-07-18 15:45:15 +02001226 if (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags))
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001227 dec_unacked(epoch->connection);
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001228#endif
Philipp Reisnerb411b362009-09-25 16:07:19 -07001229
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001230 if (connection->current_epoch != epoch) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001231 next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
1232 list_del(&epoch->list);
1233 ev = EV_BECAME_LAST | (ev & EV_CLEANUP);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001234 connection->epochs--;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001235 kfree(epoch);
1236
1237 if (rv == FE_STILL_LIVE)
1238 rv = FE_DESTROYED;
1239 } else {
1240 epoch->flags = 0;
1241 atomic_set(&epoch->epoch_size, 0);
Uwe Kleine-König698f9312010-07-02 20:41:51 +02001242 /* atomic_set(&epoch->active, 0); is already zero */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001243 if (rv == FE_STILL_LIVE)
1244 rv = FE_RECYCLED;
1245 }
1246 }
1247
1248 if (!next_epoch)
1249 break;
1250
1251 epoch = next_epoch;
1252 } while (1);
1253
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001254 spin_unlock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001255
Philipp Reisnerb411b362009-09-25 16:07:19 -07001256 return rv;
1257}
1258
1259/**
1260 * drbd_bump_write_ordering() - Fall back to an other write ordering method
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001261 * @connection: DRBD connection.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001262 * @wo: Write ordering method to try.
1263 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001264void drbd_bump_write_ordering(struct drbd_connection *connection, enum write_ordering_e wo)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001265{
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001266 struct disk_conf *dc;
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001267 struct drbd_peer_device *peer_device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001268 enum write_ordering_e pwo;
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001269 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001270 static char *write_ordering_str[] = {
1271 [WO_none] = "none",
1272 [WO_drain_io] = "drain",
1273 [WO_bdev_flush] = "flush",
Philipp Reisnerb411b362009-09-25 16:07:19 -07001274 };
1275
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001276 pwo = connection->write_ordering;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001277 wo = min(pwo, wo);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001278 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001279 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1280 struct drbd_device *device = peer_device->device;
1281
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001282 if (!get_ldev_if_state(device, D_ATTACHING))
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001283 continue;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001284 dc = rcu_dereference(device->ldev->disk_conf);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001285
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001286 if (wo == WO_bdev_flush && !dc->disk_flushes)
1287 wo = WO_drain_io;
1288 if (wo == WO_drain_io && !dc->disk_drain)
1289 wo = WO_none;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001290 put_ldev(device);
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001291 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001292 rcu_read_unlock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001293 connection->write_ordering = wo;
1294 if (pwo != connection->write_ordering || wo == WO_bdev_flush)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001295 drbd_info(connection, "Method to ensure write ordering: %s\n", write_ordering_str[connection->write_ordering]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001296}
1297
1298/**
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001299 * drbd_submit_peer_request()
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001300 * @device: DRBD device.
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001301 * @peer_req: peer request
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001302 * @rw: flag field, see bio->bi_rw
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001303 *
1304 * May spread the pages to multiple bios,
1305 * depending on bio_add_page restrictions.
1306 *
1307 * Returns 0 if all bios have been submitted,
1308 * -ENOMEM if we could not allocate enough bios,
1309 * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a
1310 * single page to an empty bio (which should never happen and likely indicates
1311 * that the lower level IO stack is in some way broken). This has been observed
1312 * on certain Xen deployments.
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001313 */
1314/* TODO allocate from our own bio_set. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001315int drbd_submit_peer_request(struct drbd_device *device,
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001316 struct drbd_peer_request *peer_req,
1317 const unsigned rw, const int fault_type)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001318{
1319 struct bio *bios = NULL;
1320 struct bio *bio;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001321 struct page *page = peer_req->pages;
1322 sector_t sector = peer_req->i.sector;
1323 unsigned ds = peer_req->i.size;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001324 unsigned n_bios = 0;
1325 unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT;
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001326 int err = -ENOMEM;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001327
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001328 if (peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) {
1329 /* wait for all pending IO completions, before we start
1330 * zeroing things out. */
1331 conn_wait_active_ee_empty(first_peer_device(device)->connection);
1332 if (blkdev_issue_zeroout(device->ldev->backing_bdev,
1333 sector, ds >> 9, GFP_NOIO))
1334 peer_req->flags |= EE_WAS_ERROR;
1335 drbd_endio_write_sec_final(peer_req);
1336 return 0;
1337 }
1338
1339 if (peer_req->flags & EE_IS_TRIM)
1340 nr_pages = 0; /* discards don't have any payload. */
1341
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001342 /* In most cases, we will only need one bio. But in case the lower
1343 * level restrictions happen to be different at this offset on this
1344 * side than those of the sending peer, we may need to submit the
Lars Ellenberg9476f392011-02-23 17:02:01 +01001345 * request in more than one bio.
1346 *
1347 * Plain bio_alloc is good enough here, this is no DRBD internally
1348 * generated bio, but a bio allocated on behalf of the peer.
1349 */
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001350next_bio:
1351 bio = bio_alloc(GFP_NOIO, nr_pages);
1352 if (!bio) {
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001353 drbd_err(device, "submit_ee: Allocation of a bio failed (nr_pages=%u)\n", nr_pages);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001354 goto fail;
1355 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001356 /* > peer_req->i.sector, unless this is the first bio */
Kent Overstreet4f024f32013-10-11 15:44:27 -07001357 bio->bi_iter.bi_sector = sector;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001358 bio->bi_bdev = device->ldev->backing_bdev;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001359 bio->bi_rw = rw;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001360 bio->bi_private = peer_req;
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001361 bio->bi_end_io = drbd_peer_request_endio;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001362
1363 bio->bi_next = bios;
1364 bios = bio;
1365 ++n_bios;
1366
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001367 if (rw & REQ_DISCARD) {
1368 bio->bi_iter.bi_size = ds;
1369 goto submit;
1370 }
1371
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001372 page_chain_for_each(page) {
1373 unsigned len = min_t(unsigned, ds, PAGE_SIZE);
1374 if (!bio_add_page(bio, page, len, 0)) {
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001375 /* A single page must always be possible!
1376 * But in case it fails anyways,
1377 * we deal with it, and complain (below). */
1378 if (bio->bi_vcnt == 0) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001379 drbd_err(device,
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001380 "bio_add_page failed for len=%u, "
1381 "bi_vcnt=0 (bi_sector=%llu)\n",
Kent Overstreet4f024f32013-10-11 15:44:27 -07001382 len, (uint64_t)bio->bi_iter.bi_sector);
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001383 err = -ENOSPC;
1384 goto fail;
1385 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001386 goto next_bio;
1387 }
1388 ds -= len;
1389 sector += len >> 9;
1390 --nr_pages;
1391 }
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001392 D_ASSERT(device, ds == 0);
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001393submit:
1394 D_ASSERT(device, page == NULL);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001395
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001396 atomic_set(&peer_req->pending_bios, n_bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001397 do {
1398 bio = bios;
1399 bios = bios->bi_next;
1400 bio->bi_next = NULL;
1401
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001402 drbd_generic_make_request(device, fault_type, bio);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001403 } while (bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001404 return 0;
1405
1406fail:
1407 while (bios) {
1408 bio = bios;
1409 bios = bios->bi_next;
1410 bio_put(bio);
1411 }
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001412 return err;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001413}
1414
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001415static void drbd_remove_epoch_entry_interval(struct drbd_device *device,
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001416 struct drbd_peer_request *peer_req)
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001417{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001418 struct drbd_interval *i = &peer_req->i;
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001419
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001420 drbd_remove_interval(&device->write_requests, i);
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001421 drbd_clear_interval(i);
1422
Andreas Gruenbacher6c852be2011-02-04 15:38:52 +01001423 /* Wake up any processes waiting for this peer request to complete. */
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001424 if (i->waiting)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001425 wake_up(&device->misc_wait);
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001426}
1427
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001428static void conn_wait_active_ee_empty(struct drbd_connection *connection)
Philipp Reisner77fede52011-11-10 21:19:11 +01001429{
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001430 struct drbd_peer_device *peer_device;
Philipp Reisner77fede52011-11-10 21:19:11 +01001431 int vnr;
1432
1433 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001434 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1435 struct drbd_device *device = peer_device->device;
1436
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001437 kref_get(&device->kref);
Philipp Reisner77fede52011-11-10 21:19:11 +01001438 rcu_read_unlock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001439 drbd_wait_ee_list_empty(device, &device->active_ee);
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001440 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisner77fede52011-11-10 21:19:11 +01001441 rcu_read_lock();
1442 }
1443 rcu_read_unlock();
1444}
1445
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001446static struct drbd_peer_device *
1447conn_peer_device(struct drbd_connection *connection, int volume_number)
1448{
1449 return idr_find(&connection->peer_devices, volume_number);
1450}
1451
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001452static int receive_Barrier(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001453{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001454 int rv;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001455 struct p_barrier *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001456 struct drbd_epoch *epoch;
1457
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001458 /* FIXME these are unacked on connection,
1459 * not a specific (peer)device.
1460 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001461 connection->current_epoch->barrier_nr = p->barrier;
1462 connection->current_epoch->connection = connection;
1463 rv = drbd_may_finish_epoch(connection, connection->current_epoch, EV_GOT_BARRIER_NR);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001464
1465 /* P_BARRIER_ACK may imply that the corresponding extent is dropped from
1466 * the activity log, which means it would not be resynced in case the
1467 * R_PRIMARY crashes now.
1468 * Therefore we must send the barrier_ack after the barrier request was
1469 * completed. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001470 switch (connection->write_ordering) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001471 case WO_none:
1472 if (rv == FE_RECYCLED)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001473 return 0;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001474
1475 /* receiver context, in the writeout path of the other node.
1476 * avoid potential distributed deadlock */
1477 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1478 if (epoch)
1479 break;
1480 else
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001481 drbd_warn(connection, "Allocation of an epoch failed, slowing down\n");
Philipp Reisner2451fc32010-08-24 13:43:11 +02001482 /* Fall through */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001483
1484 case WO_bdev_flush:
1485 case WO_drain_io:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001486 conn_wait_active_ee_empty(connection);
1487 drbd_flush(connection);
Philipp Reisner2451fc32010-08-24 13:43:11 +02001488
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001489 if (atomic_read(&connection->current_epoch->epoch_size)) {
Philipp Reisner2451fc32010-08-24 13:43:11 +02001490 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1491 if (epoch)
1492 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001493 }
1494
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001495 return 0;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001496 default:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001497 drbd_err(connection, "Strangeness in connection->write_ordering %d\n", connection->write_ordering);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001498 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001499 }
1500
1501 epoch->flags = 0;
1502 atomic_set(&epoch->epoch_size, 0);
1503 atomic_set(&epoch->active, 0);
1504
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001505 spin_lock(&connection->epoch_lock);
1506 if (atomic_read(&connection->current_epoch->epoch_size)) {
1507 list_add(&epoch->list, &connection->current_epoch->list);
1508 connection->current_epoch = epoch;
1509 connection->epochs++;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001510 } else {
1511 /* The current_epoch got recycled while we allocated this one... */
1512 kfree(epoch);
1513 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001514 spin_unlock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001515
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001516 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001517}
1518
1519/* used from receive_RSDataReply (recv_resync_read)
1520 * and from receive_Data */
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +01001521static struct drbd_peer_request *
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001522read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001523 struct packet_info *pi) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001524{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001525 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001526 const sector_t capacity = drbd_get_capacity(device->this_bdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001527 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001528 struct page *page;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001529 int dgs, ds, err;
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001530 int data_size = pi->size;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001531 void *dig_in = peer_device->connection->int_dig_in;
1532 void *dig_vv = peer_device->connection->int_dig_vv;
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001533 unsigned long *data;
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001534 struct p_trim *trim = (pi->cmd == P_TRIM) ? pi->data : NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001535
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001536 dgs = 0;
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001537 if (!trim && peer_device->connection->peer_integrity_tfm) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001538 dgs = crypto_hash_digestsize(peer_device->connection->peer_integrity_tfm);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001539 /*
1540 * FIXME: Receive the incoming digest into the receive buffer
1541 * here, together with its struct p_data?
1542 */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001543 err = drbd_recv_all_warn(peer_device->connection, dig_in, dgs);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001544 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001545 return NULL;
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001546 data_size -= dgs;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001547 }
1548
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001549 if (trim) {
1550 D_ASSERT(peer_device, data_size == 0);
1551 data_size = be32_to_cpu(trim->size);
1552 }
1553
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01001554 if (!expect(IS_ALIGNED(data_size, 512)))
1555 return NULL;
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001556 /* prepare for larger trim requests. */
1557 if (!trim && !expect(data_size <= DRBD_MAX_BIO_SIZE))
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01001558 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001559
Lars Ellenberg66660322010-04-06 12:15:04 +02001560 /* even though we trust out peer,
1561 * we sometimes have to double check. */
1562 if (sector + (data_size>>9) > capacity) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001563 drbd_err(device, "request from peer beyond end of local disk: "
Lars Ellenbergfdda6542011-01-24 15:11:01 +01001564 "capacity: %llus < sector: %llus + size: %u\n",
Lars Ellenberg66660322010-04-06 12:15:04 +02001565 (unsigned long long)capacity,
1566 (unsigned long long)sector, data_size);
1567 return NULL;
1568 }
1569
Philipp Reisnerb411b362009-09-25 16:07:19 -07001570 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
1571 * "criss-cross" setup, that might cause write-out on some other DRBD,
1572 * which in turn might block on the other node at this very place. */
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001573 peer_req = drbd_alloc_peer_req(peer_device, id, sector, data_size, trim == NULL, GFP_NOIO);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001574 if (!peer_req)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001575 return NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001576
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001577 if (trim)
Lars Ellenberg81a35372012-07-30 09:00:54 +02001578 return peer_req;
Lars Ellenberga73ff322012-06-25 19:15:38 +02001579
Philipp Reisnerb411b362009-09-25 16:07:19 -07001580 ds = data_size;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001581 page = peer_req->pages;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001582 page_chain_for_each(page) {
1583 unsigned len = min_t(int, ds, PAGE_SIZE);
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001584 data = kmap(page);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001585 err = drbd_recv_all_warn(peer_device->connection, data, len);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001586 if (drbd_insert_fault(device, DRBD_FAULT_RECEIVE)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001587 drbd_err(device, "Fault injection: Corrupting data on receive\n");
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001588 data[0] = data[0] ^ (unsigned long)-1;
1589 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001590 kunmap(page);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001591 if (err) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001592 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001593 return NULL;
1594 }
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001595 ds -= len;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001596 }
1597
1598 if (dgs) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001599 drbd_csum_ee(peer_device->connection->peer_integrity_tfm, peer_req, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001600 if (memcmp(dig_in, dig_vv, dgs)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001601 drbd_err(device, "Digest integrity check FAILED: %llus +%u\n",
Lars Ellenberg470be442010-11-10 10:36:52 +01001602 (unsigned long long)sector, data_size);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001603 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001604 return NULL;
1605 }
1606 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001607 device->recv_cnt += data_size>>9;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001608 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001609}
1610
1611/* drbd_drain_block() just takes a data block
1612 * out of the socket input buffer, and discards it.
1613 */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001614static int drbd_drain_block(struct drbd_peer_device *peer_device, int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001615{
1616 struct page *page;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001617 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001618 void *data;
1619
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001620 if (!data_size)
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001621 return 0;
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001622
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001623 page = drbd_alloc_pages(peer_device, 1, 1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001624
1625 data = kmap(page);
1626 while (data_size) {
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001627 unsigned int len = min_t(int, data_size, PAGE_SIZE);
1628
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001629 err = drbd_recv_all_warn(peer_device->connection, data, len);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001630 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001631 break;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001632 data_size -= len;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001633 }
1634 kunmap(page);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001635 drbd_free_pages(peer_device->device, page, 0);
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001636 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001637}
1638
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001639static int recv_dless_read(struct drbd_peer_device *peer_device, struct drbd_request *req,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001640 sector_t sector, int data_size)
1641{
Kent Overstreet79886132013-11-23 17:19:00 -08001642 struct bio_vec bvec;
1643 struct bvec_iter iter;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001644 struct bio *bio;
Kent Overstreet79886132013-11-23 17:19:00 -08001645 int dgs, err, expect;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001646 void *dig_in = peer_device->connection->int_dig_in;
1647 void *dig_vv = peer_device->connection->int_dig_vv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001648
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001649 dgs = 0;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001650 if (peer_device->connection->peer_integrity_tfm) {
1651 dgs = crypto_hash_digestsize(peer_device->connection->peer_integrity_tfm);
1652 err = drbd_recv_all_warn(peer_device->connection, dig_in, dgs);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001653 if (err)
1654 return err;
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001655 data_size -= dgs;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001656 }
1657
Philipp Reisnerb411b362009-09-25 16:07:19 -07001658 /* optimistically update recv_cnt. if receiving fails below,
1659 * we disconnect anyways, and counters will be reset. */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001660 peer_device->device->recv_cnt += data_size>>9;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001661
1662 bio = req->master_bio;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001663 D_ASSERT(peer_device->device, sector == bio->bi_iter.bi_sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001664
Kent Overstreet79886132013-11-23 17:19:00 -08001665 bio_for_each_segment(bvec, bio, iter) {
1666 void *mapped = kmap(bvec.bv_page) + bvec.bv_offset;
1667 expect = min_t(int, data_size, bvec.bv_len);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001668 err = drbd_recv_all_warn(peer_device->connection, mapped, expect);
Kent Overstreet79886132013-11-23 17:19:00 -08001669 kunmap(bvec.bv_page);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001670 if (err)
1671 return err;
1672 data_size -= expect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001673 }
1674
1675 if (dgs) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001676 drbd_csum_bio(peer_device->connection->peer_integrity_tfm, bio, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001677 if (memcmp(dig_in, dig_vv, dgs)) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001678 drbd_err(peer_device, "Digest integrity check FAILED. Broken NICs?\n");
Andreas Gruenbacher28284ce2011-03-16 17:54:02 +01001679 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001680 }
1681 }
1682
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001683 D_ASSERT(peer_device->device, data_size == 0);
Andreas Gruenbacher28284ce2011-03-16 17:54:02 +01001684 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001685}
1686
Andreas Gruenbachera990be42011-04-06 17:56:48 +02001687/*
1688 * e_end_resync_block() is called in asender context via
1689 * drbd_finish_peer_reqs().
1690 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001691static int e_end_resync_block(struct drbd_work *w, int unused)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001692{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001693 struct drbd_peer_request *peer_req =
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001694 container_of(w, struct drbd_peer_request, w);
1695 struct drbd_peer_device *peer_device = peer_req->peer_device;
1696 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001697 sector_t sector = peer_req->i.sector;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001698 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001699
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001700 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001701
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001702 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001703 drbd_set_in_sync(device, sector, peer_req->i.size);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001704 err = drbd_send_ack(peer_device, P_RS_WRITE_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001705 } else {
1706 /* Record failure to sync */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001707 drbd_rs_failed_io(device, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001708
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001709 err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001710 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001711 dec_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001712
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001713 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001714}
1715
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001716static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t sector,
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001717 struct packet_info *pi) __releases(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001718{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001719 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001720 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001721
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001722 peer_req = read_in_block(peer_device, ID_SYNCER, sector, pi);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001723 if (!peer_req)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001724 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001725
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001726 dec_rs_pending(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001727
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001728 inc_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001729 /* corresponding dec_unacked() in e_end_resync_block()
1730 * respective _drbd_clear_done_ee */
1731
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001732 peer_req->w.cb = e_end_resync_block;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001733
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001734 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001735 list_add(&peer_req->w.list, &device->sync_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001736 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001737
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001738 atomic_add(pi->size >> 9, &device->rs_sect_ev);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001739 if (drbd_submit_peer_request(device, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0)
Andreas Gruenbachere1c1b0f2011-03-16 17:58:27 +01001740 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001741
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001742 /* don't care for the reason here */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001743 drbd_err(device, "submit failed, triggering re-connect\n");
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001744 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001745 list_del(&peer_req->w.list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001746 spin_unlock_irq(&device->resource->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02001747
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001748 drbd_free_peer_req(device, peer_req);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001749fail:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001750 put_ldev(device);
Andreas Gruenbachere1c1b0f2011-03-16 17:58:27 +01001751 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001752}
1753
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001754static struct drbd_request *
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001755find_request(struct drbd_device *device, struct rb_root *root, u64 id,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001756 sector_t sector, bool missing_ok, const char *func)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001757{
1758 struct drbd_request *req;
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001759
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001760 /* Request object according to our peer */
1761 req = (struct drbd_request *)(unsigned long)id;
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001762 if (drbd_contains_interval(root, sector, &req->i) && req->i.local)
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001763 return req;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001764 if (!missing_ok) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001765 drbd_err(device, "%s: failed to find request 0x%lx, sector %llus\n", func,
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001766 (unsigned long)id, (unsigned long long)sector);
1767 }
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001768 return NULL;
1769}
1770
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001771static int receive_DataReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001772{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001773 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001774 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001775 struct drbd_request *req;
1776 sector_t sector;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001777 int err;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001778 struct p_data *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001779
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001780 peer_device = conn_peer_device(connection, pi->vnr);
1781 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001782 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001783 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001784
1785 sector = be64_to_cpu(p->sector);
1786
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001787 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001788 req = find_request(device, &device->read_requests, p->block_id, sector, false, __func__);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001789 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001790 if (unlikely(!req))
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001791 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001792
Bart Van Assche24c48302011-05-21 18:32:29 +02001793 /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid
Philipp Reisnerb411b362009-09-25 16:07:19 -07001794 * special casing it there for the various failure cases.
1795 * still no race with drbd_fail_pending_reads */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001796 err = recv_dless_read(peer_device, req, sector, pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001797 if (!err)
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001798 req_mod(req, DATA_RECEIVED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001799 /* else: nothing. handled from drbd_disconnect...
1800 * I don't think we may complete this just yet
1801 * in case we are "on-disconnect: freeze" */
1802
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001803 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001804}
1805
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001806static int receive_RSDataReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001807{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001808 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001809 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001810 sector_t sector;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001811 int err;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001812 struct p_data *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001813
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001814 peer_device = conn_peer_device(connection, pi->vnr);
1815 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001816 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001817 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001818
1819 sector = be64_to_cpu(p->sector);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001820 D_ASSERT(device, p->block_id == ID_SYNCER);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001821
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001822 if (get_ldev(device)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001823 /* data is submitted to disk within recv_resync_read.
1824 * corresponding put_ldev done below on error,
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001825 * or in drbd_peer_request_endio. */
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001826 err = recv_resync_read(peer_device, sector, pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001827 } else {
1828 if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001829 drbd_err(device, "Can not write resync data to local disk.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001830
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001831 err = drbd_drain_block(peer_device, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001832
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001833 drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001834 }
1835
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001836 atomic_add(pi->size >> 9, &device->rs_sect_in);
Philipp Reisner778f2712010-07-06 11:14:00 +02001837
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001838 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001839}
1840
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001841static void restart_conflicting_writes(struct drbd_device *device,
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001842 sector_t sector, int size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001843{
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001844 struct drbd_interval *i;
1845 struct drbd_request *req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001846
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001847 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001848 if (!i->local)
1849 continue;
1850 req = container_of(i, struct drbd_request, i);
1851 if (req->rq_state & RQ_LOCAL_PENDING ||
1852 !(req->rq_state & RQ_POSTPONED))
1853 continue;
Lars Ellenberg2312f0b32011-11-24 10:36:25 +01001854 /* as it is RQ_POSTPONED, this will cause it to
1855 * be queued on the retry workqueue. */
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02001856 __req_mod(req, CONFLICT_RESOLVED, NULL);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001857 }
1858}
1859
Andreas Gruenbachera990be42011-04-06 17:56:48 +02001860/*
1861 * e_end_block() is called in asender context via drbd_finish_peer_reqs().
Philipp Reisnerb411b362009-09-25 16:07:19 -07001862 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001863static int e_end_block(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001864{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001865 struct drbd_peer_request *peer_req =
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001866 container_of(w, struct drbd_peer_request, w);
1867 struct drbd_peer_device *peer_device = peer_req->peer_device;
1868 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001869 sector_t sector = peer_req->i.sector;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001870 int err = 0, pcmd;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001871
Philipp Reisner303d1442011-04-13 16:24:47 -07001872 if (peer_req->flags & EE_SEND_WRITE_ACK) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001873 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001874 pcmd = (device->state.conn >= C_SYNC_SOURCE &&
1875 device->state.conn <= C_PAUSED_SYNC_T &&
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001876 peer_req->flags & EE_MAY_SET_IN_SYNC) ?
Philipp Reisnerb411b362009-09-25 16:07:19 -07001877 P_RS_WRITE_ACK : P_WRITE_ACK;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001878 err = drbd_send_ack(peer_device, pcmd, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001879 if (pcmd == P_RS_WRITE_ACK)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001880 drbd_set_in_sync(device, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001881 } else {
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001882 err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001883 /* we expect it to be marked out of sync anyways...
1884 * maybe assert this? */
1885 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001886 dec_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001887 }
1888 /* we delete from the conflict detection hash _after_ we sent out the
1889 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */
Philipp Reisner302bdea2011-04-21 11:36:49 +02001890 if (peer_req->flags & EE_IN_INTERVAL_TREE) {
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001891 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001892 D_ASSERT(device, !drbd_interval_empty(&peer_req->i));
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001893 drbd_remove_epoch_entry_interval(device, peer_req);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001894 if (peer_req->flags & EE_RESTART_REQUESTS)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001895 restart_conflicting_writes(device, sector, peer_req->i.size);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001896 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +01001897 } else
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001898 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001899
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001900 drbd_may_finish_epoch(first_peer_device(device)->connection, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001901
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001902 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001903}
1904
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001905static int e_send_ack(struct drbd_work *w, enum drbd_packet ack)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001906{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001907 struct drbd_peer_request *peer_req =
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001908 container_of(w, struct drbd_peer_request, w);
1909 struct drbd_peer_device *peer_device = peer_req->peer_device;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001910 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001911
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001912 err = drbd_send_ack(peer_device, ack, peer_req);
1913 dec_unacked(peer_device->device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001914
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001915 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001916}
1917
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02001918static int e_send_superseded(struct drbd_work *w, int unused)
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001919{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001920 return e_send_ack(w, P_SUPERSEDED);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001921}
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001922
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001923static int e_send_retry_write(struct drbd_work *w, int unused)
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001924{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001925 struct drbd_peer_request *peer_req =
1926 container_of(w, struct drbd_peer_request, w);
1927 struct drbd_connection *connection = peer_req->peer_device->connection;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001928
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001929 return e_send_ack(w, connection->agreed_pro_version >= 100 ?
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02001930 P_RETRY_WRITE : P_SUPERSEDED);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001931}
1932
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001933static bool seq_greater(u32 a, u32 b)
1934{
1935 /*
1936 * We assume 32-bit wrap-around here.
1937 * For 24-bit wrap-around, we would have to shift:
1938 * a <<= 8; b <<= 8;
1939 */
1940 return (s32)a - (s32)b > 0;
1941}
1942
1943static u32 seq_max(u32 a, u32 b)
1944{
1945 return seq_greater(a, b) ? a : b;
1946}
1947
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001948static void update_peer_seq(struct drbd_peer_device *peer_device, unsigned int peer_seq)
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001949{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001950 struct drbd_device *device = peer_device->device;
Lars Ellenberg3c13b682011-02-23 16:10:01 +01001951 unsigned int newest_peer_seq;
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001952
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001953 if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001954 spin_lock(&device->peer_seq_lock);
1955 newest_peer_seq = seq_max(device->peer_seq, peer_seq);
1956 device->peer_seq = newest_peer_seq;
1957 spin_unlock(&device->peer_seq_lock);
1958 /* wake up only if we actually changed device->peer_seq */
Lars Ellenberg3c13b682011-02-23 16:10:01 +01001959 if (peer_seq == newest_peer_seq)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001960 wake_up(&device->seq_wait);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001961 }
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001962}
1963
Lars Ellenbergd93f6302012-03-26 15:49:13 +02001964static inline int overlaps(sector_t s1, int l1, sector_t s2, int l2)
1965{
1966 return !((s1 + (l1>>9) <= s2) || (s1 >= s2 + (l2>>9)));
1967}
1968
1969/* maybe change sync_ee into interval trees as well? */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001970static bool overlapping_resync_write(struct drbd_device *device, struct drbd_peer_request *peer_req)
Lars Ellenbergd93f6302012-03-26 15:49:13 +02001971{
1972 struct drbd_peer_request *rs_req;
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001973 bool rv = 0;
1974
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001975 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001976 list_for_each_entry(rs_req, &device->sync_ee, w.list) {
Lars Ellenbergd93f6302012-03-26 15:49:13 +02001977 if (overlaps(peer_req->i.sector, peer_req->i.size,
1978 rs_req->i.sector, rs_req->i.size)) {
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001979 rv = 1;
1980 break;
1981 }
1982 }
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001983 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001984
1985 return rv;
1986}
1987
Philipp Reisnerb411b362009-09-25 16:07:19 -07001988/* Called from receive_Data.
1989 * Synchronize packets on sock with packets on msock.
1990 *
1991 * This is here so even when a P_DATA packet traveling via sock overtook an Ack
1992 * packet traveling on msock, they are still processed in the order they have
1993 * been sent.
1994 *
1995 * Note: we don't care for Ack packets overtaking P_DATA packets.
1996 *
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001997 * In case packet_seq is larger than device->peer_seq number, there are
Philipp Reisnerb411b362009-09-25 16:07:19 -07001998 * outstanding packets on the msock. We wait for them to arrive.
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001999 * In case we are the logically next packet, we update device->peer_seq
Philipp Reisnerb411b362009-09-25 16:07:19 -07002000 * ourselves. Correctly handles 32bit wrap around.
2001 *
2002 * Assume we have a 10 GBit connection, that is about 1<<30 byte per second,
2003 * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds
2004 * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have
2005 * 1<<9 == 512 seconds aka ages for the 32bit wrap around...
2006 *
2007 * returns 0 if we may process the packet,
2008 * -ERESTARTSYS if we were interrupted (by disconnect signal). */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002009static int wait_for_and_update_peer_seq(struct drbd_peer_device *peer_device, const u32 peer_seq)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002010{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002011 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002012 DEFINE_WAIT(wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002013 long timeout;
Philipp Reisnerb874d232013-10-23 10:59:16 +02002014 int ret = 0, tp;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002015
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002016 if (!test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags))
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002017 return 0;
2018
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002019 spin_lock(&device->peer_seq_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002020 for (;;) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002021 if (!seq_greater(peer_seq - 1, device->peer_seq)) {
2022 device->peer_seq = seq_max(device->peer_seq, peer_seq);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002023 break;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002024 }
Philipp Reisnerb874d232013-10-23 10:59:16 +02002025
Philipp Reisnerb411b362009-09-25 16:07:19 -07002026 if (signal_pending(current)) {
2027 ret = -ERESTARTSYS;
2028 break;
2029 }
Philipp Reisnerb874d232013-10-23 10:59:16 +02002030
2031 rcu_read_lock();
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002032 tp = rcu_dereference(first_peer_device(device)->connection->net_conf)->two_primaries;
Philipp Reisnerb874d232013-10-23 10:59:16 +02002033 rcu_read_unlock();
2034
2035 if (!tp)
2036 break;
2037
2038 /* Only need to wait if two_primaries is enabled */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002039 prepare_to_wait(&device->seq_wait, &wait, TASK_INTERRUPTIBLE);
2040 spin_unlock(&device->peer_seq_lock);
Philipp Reisner44ed1672011-04-19 17:10:19 +02002041 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002042 timeout = rcu_dereference(peer_device->connection->net_conf)->ping_timeo*HZ/10;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002043 rcu_read_unlock();
Andreas Gruenbacher71b1c1e2011-03-01 15:40:43 +01002044 timeout = schedule_timeout(timeout);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002045 spin_lock(&device->peer_seq_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002046 if (!timeout) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002047 ret = -ETIMEDOUT;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002048 drbd_err(device, "Timed out waiting for missing ack packets; disconnecting\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002049 break;
2050 }
2051 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002052 spin_unlock(&device->peer_seq_lock);
2053 finish_wait(&device->seq_wait, &wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002054 return ret;
2055}
2056
Lars Ellenberg688593c2010-11-17 22:25:03 +01002057/* see also bio_flags_to_wire()
2058 * DRBD_REQ_*, because we need to semantically map the flags to data packet
2059 * flags and back. We may replicate to other kernel versions. */
Andreas Gruenbacher81f0ffd2011-08-30 16:22:33 +02002060static unsigned long wire_flags_to_bio(u32 dpf)
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02002061{
Lars Ellenberg688593c2010-11-17 22:25:03 +01002062 return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
2063 (dpf & DP_FUA ? REQ_FUA : 0) |
2064 (dpf & DP_FLUSH ? REQ_FLUSH : 0) |
2065 (dpf & DP_DISCARD ? REQ_DISCARD : 0);
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02002066}
2067
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002068static void fail_postponed_requests(struct drbd_device *device, sector_t sector,
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002069 unsigned int size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002070{
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002071 struct drbd_interval *i;
2072
2073 repeat:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002074 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002075 struct drbd_request *req;
2076 struct bio_and_error m;
2077
2078 if (!i->local)
2079 continue;
2080 req = container_of(i, struct drbd_request, i);
2081 if (!(req->rq_state & RQ_POSTPONED))
2082 continue;
2083 req->rq_state &= ~RQ_POSTPONED;
2084 __req_mod(req, NEG_ACKED, &m);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002085 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002086 if (m.bio)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002087 complete_master_bio(device, &m);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002088 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002089 goto repeat;
2090 }
2091}
2092
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002093static int handle_write_conflicts(struct drbd_device *device,
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002094 struct drbd_peer_request *peer_req)
2095{
Andreas Gruenbachere33b32d2011-08-30 15:38:04 +02002096 struct drbd_connection *connection = peer_req->peer_device->connection;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002097 bool resolve_conflicts = test_bit(RESOLVE_CONFLICTS, &connection->flags);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002098 sector_t sector = peer_req->i.sector;
2099 const unsigned int size = peer_req->i.size;
2100 struct drbd_interval *i;
2101 bool equal;
2102 int err;
2103
2104 /*
2105 * Inserting the peer request into the write_requests tree will prevent
2106 * new conflicting local requests from being added.
2107 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002108 drbd_insert_interval(&device->write_requests, &peer_req->i);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002109
2110 repeat:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002111 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002112 if (i == &peer_req->i)
2113 continue;
2114
2115 if (!i->local) {
2116 /*
2117 * Our peer has sent a conflicting remote request; this
2118 * should not happen in a two-node setup. Wait for the
2119 * earlier peer request to complete.
2120 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002121 err = drbd_wait_misc(device, i);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002122 if (err)
2123 goto out;
2124 goto repeat;
2125 }
2126
2127 equal = i->sector == sector && i->size == size;
2128 if (resolve_conflicts) {
2129 /*
2130 * If the peer request is fully contained within the
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002131 * overlapping request, it can be considered overwritten
2132 * and thus superseded; otherwise, it will be retried
2133 * once all overlapping requests have completed.
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002134 */
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002135 bool superseded = i->sector <= sector && i->sector +
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002136 (i->size >> 9) >= sector + (size >> 9);
2137
2138 if (!equal)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002139 drbd_alert(device, "Concurrent writes detected: "
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002140 "local=%llus +%u, remote=%llus +%u, "
2141 "assuming %s came first\n",
2142 (unsigned long long)i->sector, i->size,
2143 (unsigned long long)sector, size,
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002144 superseded ? "local" : "remote");
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002145
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002146 inc_unacked(device);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002147 peer_req->w.cb = superseded ? e_send_superseded :
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002148 e_send_retry_write;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002149 list_add_tail(&peer_req->w.list, &device->done_ee);
Andreas Gruenbachere33b32d2011-08-30 15:38:04 +02002150 wake_asender(connection);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002151
2152 err = -ENOENT;
2153 goto out;
2154 } else {
2155 struct drbd_request *req =
2156 container_of(i, struct drbd_request, i);
2157
2158 if (!equal)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002159 drbd_alert(device, "Concurrent writes detected: "
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002160 "local=%llus +%u, remote=%llus +%u\n",
2161 (unsigned long long)i->sector, i->size,
2162 (unsigned long long)sector, size);
2163
2164 if (req->rq_state & RQ_LOCAL_PENDING ||
2165 !(req->rq_state & RQ_POSTPONED)) {
2166 /*
2167 * Wait for the node with the discard flag to
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002168 * decide if this request has been superseded
2169 * or needs to be retried.
2170 * Requests that have been superseded will
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002171 * disappear from the write_requests tree.
2172 *
2173 * In addition, wait for the conflicting
2174 * request to finish locally before submitting
2175 * the conflicting peer request.
2176 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002177 err = drbd_wait_misc(device, &req->i);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002178 if (err) {
Andreas Gruenbachere33b32d2011-08-30 15:38:04 +02002179 _conn_request_state(connection, NS(conn, C_TIMEOUT), CS_HARD);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002180 fail_postponed_requests(device, sector, size);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002181 goto out;
2182 }
2183 goto repeat;
2184 }
2185 /*
2186 * Remember to restart the conflicting requests after
2187 * the new peer request has completed.
2188 */
2189 peer_req->flags |= EE_RESTART_REQUESTS;
2190 }
2191 }
2192 err = 0;
2193
2194 out:
2195 if (err)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002196 drbd_remove_epoch_entry_interval(device, peer_req);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002197 return err;
2198}
2199
Philipp Reisnerb411b362009-09-25 16:07:19 -07002200/* mirrored write */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002201static int receive_Data(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002202{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002203 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002204 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002205 sector_t sector;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002206 struct drbd_peer_request *peer_req;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02002207 struct p_data *p = pi->data;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002208 u32 peer_seq = be32_to_cpu(p->seq_num);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002209 int rw = WRITE;
2210 u32 dp_flags;
Philipp Reisner302bdea2011-04-21 11:36:49 +02002211 int err, tp;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002212
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002213 peer_device = conn_peer_device(connection, pi->vnr);
2214 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002215 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002216 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002217
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002218 if (!get_ldev(device)) {
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002219 int err2;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002220
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002221 err = wait_for_and_update_peer_seq(peer_device, peer_seq);
2222 drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002223 atomic_inc(&connection->current_epoch->epoch_size);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002224 err2 = drbd_drain_block(peer_device, pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002225 if (!err)
2226 err = err2;
2227 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002228 }
2229
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01002230 /*
2231 * Corresponding put_ldev done either below (on various errors), or in
2232 * drbd_peer_request_endio, if we successfully submit the data at the
2233 * end of this function.
2234 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002235
2236 sector = be64_to_cpu(p->sector);
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02002237 peer_req = read_in_block(peer_device, p->block_id, sector, pi);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002238 if (!peer_req) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002239 put_ldev(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002240 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002241 }
2242
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002243 peer_req->w.cb = e_end_block;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002244
Lars Ellenberg688593c2010-11-17 22:25:03 +01002245 dp_flags = be32_to_cpu(p->dp_flags);
Andreas Gruenbacher81f0ffd2011-08-30 16:22:33 +02002246 rw |= wire_flags_to_bio(dp_flags);
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02002247 if (pi->cmd == P_TRIM) {
2248 struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev);
2249 peer_req->flags |= EE_IS_TRIM;
2250 if (!blk_queue_discard(q))
2251 peer_req->flags |= EE_IS_TRIM_USE_ZEROOUT;
2252 D_ASSERT(peer_device, peer_req->i.size > 0);
2253 D_ASSERT(peer_device, rw & REQ_DISCARD);
2254 D_ASSERT(peer_device, peer_req->pages == NULL);
2255 } else if (peer_req->pages == NULL) {
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02002256 D_ASSERT(device, peer_req->i.size == 0);
2257 D_ASSERT(device, dp_flags & DP_FLUSH);
Lars Ellenberga73ff322012-06-25 19:15:38 +02002258 }
Lars Ellenberg688593c2010-11-17 22:25:03 +01002259
2260 if (dp_flags & DP_MAY_SET_IN_SYNC)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002261 peer_req->flags |= EE_MAY_SET_IN_SYNC;
Lars Ellenberg688593c2010-11-17 22:25:03 +01002262
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002263 spin_lock(&connection->epoch_lock);
2264 peer_req->epoch = connection->current_epoch;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002265 atomic_inc(&peer_req->epoch->epoch_size);
2266 atomic_inc(&peer_req->epoch->active);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002267 spin_unlock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002268
Philipp Reisner302bdea2011-04-21 11:36:49 +02002269 rcu_read_lock();
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002270 tp = rcu_dereference(peer_device->connection->net_conf)->two_primaries;
Philipp Reisner302bdea2011-04-21 11:36:49 +02002271 rcu_read_unlock();
2272 if (tp) {
2273 peer_req->flags |= EE_IN_INTERVAL_TREE;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002274 err = wait_for_and_update_peer_seq(peer_device, peer_seq);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002275 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002276 goto out_interrupted;
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002277 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002278 err = handle_write_conflicts(device, peer_req);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002279 if (err) {
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002280 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002281 if (err == -ENOENT) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002282 put_ldev(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002283 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002284 }
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002285 goto out_interrupted;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002286 }
Philipp Reisnerb874d232013-10-23 10:59:16 +02002287 } else {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002288 update_peer_seq(peer_device, peer_seq);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002289 spin_lock_irq(&device->resource->req_lock);
Philipp Reisnerb874d232013-10-23 10:59:16 +02002290 }
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02002291 /* if we use the zeroout fallback code, we process synchronously
2292 * and we wait for all pending requests, respectively wait for
2293 * active_ee to become empty in drbd_submit_peer_request();
2294 * better not add ourselves here. */
2295 if ((peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) == 0)
2296 list_add(&peer_req->w.list, &device->active_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002297 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002298
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002299 if (device->state.conn == C_SYNC_TARGET)
2300 wait_event(device->ee_wait, !overlapping_resync_write(device, peer_req));
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01002301
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002302 if (peer_device->connection->agreed_pro_version < 100) {
Philipp Reisner44ed1672011-04-19 17:10:19 +02002303 rcu_read_lock();
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002304 switch (rcu_dereference(peer_device->connection->net_conf)->wire_protocol) {
Philipp Reisner303d1442011-04-13 16:24:47 -07002305 case DRBD_PROT_C:
2306 dp_flags |= DP_SEND_WRITE_ACK;
2307 break;
2308 case DRBD_PROT_B:
2309 dp_flags |= DP_SEND_RECEIVE_ACK;
2310 break;
2311 }
Philipp Reisner44ed1672011-04-19 17:10:19 +02002312 rcu_read_unlock();
Philipp Reisner303d1442011-04-13 16:24:47 -07002313 }
2314
2315 if (dp_flags & DP_SEND_WRITE_ACK) {
2316 peer_req->flags |= EE_SEND_WRITE_ACK;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002317 inc_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002318 /* corresponding dec_unacked() in e_end_block()
2319 * respective _drbd_clear_done_ee */
Philipp Reisner303d1442011-04-13 16:24:47 -07002320 }
2321
2322 if (dp_flags & DP_SEND_RECEIVE_ACK) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002323 /* I really don't like it that the receiver thread
2324 * sends on the msock, but anyways */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002325 drbd_send_ack(first_peer_device(device), P_RECV_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002326 }
2327
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002328 if (device->state.pdsk < D_INCONSISTENT) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002329 /* In case we have the only disk of the cluster, */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002330 drbd_set_out_of_sync(device, peer_req->i.sector, peer_req->i.size);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002331 peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
2332 peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002333 drbd_al_begin_io(device, &peer_req->i, true);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002334 }
2335
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002336 err = drbd_submit_peer_request(device, peer_req, rw, DRBD_FAULT_DT_WR);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002337 if (!err)
2338 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002339
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002340 /* don't care for the reason here */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002341 drbd_err(device, "submit failed, triggering re-connect\n");
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002342 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002343 list_del(&peer_req->w.list);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002344 drbd_remove_epoch_entry_interval(device, peer_req);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002345 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002346 if (peer_req->flags & EE_CALL_AL_COMPLETE_IO)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002347 drbd_al_complete_io(device, &peer_req->i);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002348
Philipp Reisnerb411b362009-09-25 16:07:19 -07002349out_interrupted:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002350 drbd_may_finish_epoch(connection, peer_req->epoch, EV_PUT + EV_CLEANUP);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002351 put_ldev(device);
2352 drbd_free_peer_req(device, peer_req);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002353 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002354}
2355
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002356/* We may throttle resync, if the lower device seems to be busy,
2357 * and current sync rate is above c_min_rate.
2358 *
2359 * To decide whether or not the lower device is busy, we use a scheme similar
2360 * to MD RAID is_mddev_idle(): if the partition stats reveal "significant"
2361 * (more than 64 sectors) of activity we cannot account for with our own resync
2362 * activity, it obviously is "busy".
2363 *
2364 * The current sync rate used here uses only the most recent two step marks,
2365 * to have a short time average so we can react faster.
2366 */
Lars Ellenberge8299872014-04-28 18:43:19 +02002367bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector)
2368{
2369 struct lc_element *tmp;
2370 bool throttle = true;
2371
2372 if (!drbd_rs_c_min_rate_throttle(device))
2373 return false;
2374
2375 spin_lock_irq(&device->al_lock);
2376 tmp = lc_find(device->resync, BM_SECT_TO_EXT(sector));
2377 if (tmp) {
2378 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
2379 if (test_bit(BME_PRIORITY, &bm_ext->flags))
2380 throttle = false;
2381 /* Do not slow down if app IO is already waiting for this extent */
2382 }
2383 spin_unlock_irq(&device->al_lock);
2384
2385 return throttle;
2386}
2387
2388bool drbd_rs_c_min_rate_throttle(struct drbd_device *device)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002389{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002390 struct gendisk *disk = device->ldev->backing_bdev->bd_contains->bd_disk;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002391 unsigned long db, dt, dbdt;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002392 unsigned int c_min_rate;
Lars Ellenberge8299872014-04-28 18:43:19 +02002393 int curr_events;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002394
2395 rcu_read_lock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002396 c_min_rate = rcu_dereference(device->ldev->disk_conf)->c_min_rate;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002397 rcu_read_unlock();
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002398
2399 /* feature disabled? */
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002400 if (c_min_rate == 0)
Lars Ellenberge8299872014-04-28 18:43:19 +02002401 return false;
Philipp Reisnere3555d82010-11-07 15:56:29 +01002402
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002403 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
2404 (int)part_stat_read(&disk->part0, sectors[1]) -
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002405 atomic_read(&device->rs_sect_ev);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002406 if (!device->rs_last_events || curr_events - device->rs_last_events > 64) {
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002407 unsigned long rs_left;
2408 int i;
2409
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002410 device->rs_last_events = curr_events;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002411
2412 /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
2413 * approx. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002414 i = (device->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
Lars Ellenberg2649f082010-11-05 10:05:47 +01002415
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002416 if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T)
2417 rs_left = device->ov_left;
Lars Ellenberg2649f082010-11-05 10:05:47 +01002418 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002419 rs_left = drbd_bm_total_weight(device) - device->rs_failed;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002420
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002421 dt = ((long)jiffies - (long)device->rs_mark_time[i]) / HZ;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002422 if (!dt)
2423 dt++;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002424 db = device->rs_mark_left[i] - rs_left;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002425 dbdt = Bit2KB(db/dt);
2426
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002427 if (dbdt > c_min_rate)
Lars Ellenberge8299872014-04-28 18:43:19 +02002428 return true;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002429 }
Lars Ellenberge8299872014-04-28 18:43:19 +02002430 return false;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002431}
2432
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002433static int receive_DataRequest(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002434{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002435 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002436 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002437 sector_t sector;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002438 sector_t capacity;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002439 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002440 struct digest_info *di = NULL;
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002441 int size, verb;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002442 unsigned int fault_type;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02002443 struct p_block_req *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002444
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002445 peer_device = conn_peer_device(connection, pi->vnr);
2446 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002447 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002448 device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002449 capacity = drbd_get_capacity(device->this_bdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002450
2451 sector = be64_to_cpu(p->sector);
2452 size = be32_to_cpu(p->blksize);
2453
Andreas Gruenbacherc670a392011-02-21 12:41:39 +01002454 if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002455 drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
Philipp Reisnerb411b362009-09-25 16:07:19 -07002456 (unsigned long long)sector, size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002457 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002458 }
2459 if (sector + (size>>9) > capacity) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002460 drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
Philipp Reisnerb411b362009-09-25 16:07:19 -07002461 (unsigned long long)sector, size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002462 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002463 }
2464
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002465 if (!get_ldev_if_state(device, D_UP_TO_DATE)) {
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002466 verb = 1;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002467 switch (pi->cmd) {
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002468 case P_DATA_REQUEST:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002469 drbd_send_ack_rp(peer_device, P_NEG_DREPLY, p);
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002470 break;
2471 case P_RS_DATA_REQUEST:
2472 case P_CSUM_RS_REQUEST:
2473 case P_OV_REQUEST:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002474 drbd_send_ack_rp(peer_device, P_NEG_RS_DREPLY , p);
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002475 break;
2476 case P_OV_REPLY:
2477 verb = 0;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002478 dec_rs_pending(device);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002479 drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size, ID_IN_SYNC);
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002480 break;
2481 default:
Andreas Gruenbacher49ba9b12011-03-25 00:35:45 +01002482 BUG();
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002483 }
2484 if (verb && __ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002485 drbd_err(device, "Can not satisfy peer's read request, "
Philipp Reisnerb411b362009-09-25 16:07:19 -07002486 "no local data.\n");
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002487
Lars Ellenberga821cc42010-09-06 12:31:37 +02002488 /* drain possibly payload */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002489 return drbd_drain_block(peer_device, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002490 }
2491
2492 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
2493 * "criss-cross" setup, that might cause write-out on some other DRBD,
2494 * which in turn might block on the other node at this very place. */
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02002495 peer_req = drbd_alloc_peer_req(peer_device, p->block_id, sector, size,
2496 true /* has real payload */, GFP_NOIO);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002497 if (!peer_req) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002498 put_ldev(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002499 return -ENOMEM;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002500 }
2501
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002502 switch (pi->cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002503 case P_DATA_REQUEST:
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002504 peer_req->w.cb = w_e_end_data_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002505 fault_type = DRBD_FAULT_DT_RD;
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002506 /* application IO, don't drbd_rs_begin_io */
2507 goto submit;
2508
Philipp Reisnerb411b362009-09-25 16:07:19 -07002509 case P_RS_DATA_REQUEST:
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002510 peer_req->w.cb = w_e_end_rsdata_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002511 fault_type = DRBD_FAULT_RS_RD;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002512 /* used in the sector offset progress display */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002513 device->bm_resync_fo = BM_SECT_TO_BIT(sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002514 break;
2515
2516 case P_OV_REPLY:
2517 case P_CSUM_RS_REQUEST:
2518 fault_type = DRBD_FAULT_RS_RD;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002519 di = kmalloc(sizeof(*di) + pi->size, GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002520 if (!di)
2521 goto out_free_e;
2522
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002523 di->digest_size = pi->size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002524 di->digest = (((char *)di)+sizeof(struct digest_info));
2525
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002526 peer_req->digest = di;
2527 peer_req->flags |= EE_HAS_DIGEST;
Lars Ellenbergc36c3ce2010-08-11 20:42:55 +02002528
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002529 if (drbd_recv_all(peer_device->connection, di->digest, pi->size))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002530 goto out_free_e;
2531
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002532 if (pi->cmd == P_CSUM_RS_REQUEST) {
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002533 D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002534 peer_req->w.cb = w_e_end_csum_rs_req;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002535 /* used in the sector offset progress display */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002536 device->bm_resync_fo = BM_SECT_TO_BIT(sector);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002537 } else if (pi->cmd == P_OV_REPLY) {
Lars Ellenberg2649f082010-11-05 10:05:47 +01002538 /* track progress, we may need to throttle */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002539 atomic_add(size >> 9, &device->rs_sect_in);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002540 peer_req->w.cb = w_e_end_ov_reply;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002541 dec_rs_pending(device);
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002542 /* drbd_rs_begin_io done when we sent this request,
2543 * but accounting still needs to be done. */
2544 goto submit_for_resync;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002545 }
2546 break;
2547
2548 case P_OV_REQUEST:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002549 if (device->ov_start_sector == ~(sector_t)0 &&
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002550 peer_device->connection->agreed_pro_version >= 90) {
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002551 unsigned long now = jiffies;
2552 int i;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002553 device->ov_start_sector = sector;
2554 device->ov_position = sector;
2555 device->ov_left = drbd_bm_bits(device) - BM_SECT_TO_BIT(sector);
2556 device->rs_total = device->ov_left;
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002557 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002558 device->rs_mark_left[i] = device->ov_left;
2559 device->rs_mark_time[i] = now;
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002560 }
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002561 drbd_info(device, "Online Verify start sector: %llu\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07002562 (unsigned long long)sector);
2563 }
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002564 peer_req->w.cb = w_e_end_ov_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002565 fault_type = DRBD_FAULT_RS_RD;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002566 break;
2567
Philipp Reisnerb411b362009-09-25 16:07:19 -07002568 default:
Andreas Gruenbacher49ba9b12011-03-25 00:35:45 +01002569 BUG();
Philipp Reisnerb411b362009-09-25 16:07:19 -07002570 }
2571
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002572 /* Throttle, drbd_rs_begin_io and submit should become asynchronous
2573 * wrt the receiver, but it is not as straightforward as it may seem.
2574 * Various places in the resync start and stop logic assume resync
2575 * requests are processed in order, requeuing this on the worker thread
2576 * introduces a bunch of new code for synchronization between threads.
2577 *
2578 * Unlimited throttling before drbd_rs_begin_io may stall the resync
2579 * "forever", throttling after drbd_rs_begin_io will lock that extent
2580 * for application writes for the same time. For now, just throttle
2581 * here, where the rest of the code expects the receiver to sleep for
2582 * a while, anyways.
2583 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002584
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002585 /* Throttle before drbd_rs_begin_io, as that locks out application IO;
2586 * this defers syncer requests for some time, before letting at least
2587 * on request through. The resync controller on the receiving side
2588 * will adapt to the incoming rate accordingly.
2589 *
2590 * We cannot throttle here if remote is Primary/SyncTarget:
2591 * we would also throttle its application reads.
2592 * In that case, throttling is done on the SyncTarget only.
2593 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002594 if (device->state.peer != R_PRIMARY && drbd_rs_should_slow_down(device, sector))
Philipp Reisnere3555d82010-11-07 15:56:29 +01002595 schedule_timeout_uninterruptible(HZ/10);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002596 if (drbd_rs_begin_io(device, sector))
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002597 goto out_free_e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002598
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002599submit_for_resync:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002600 atomic_add(size >> 9, &device->rs_sect_ev);
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002601
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002602submit:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002603 inc_unacked(device);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002604 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002605 list_add_tail(&peer_req->w.list, &device->read_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002606 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002607
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002608 if (drbd_submit_peer_request(device, peer_req, READ, fault_type) == 0)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002609 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002610
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002611 /* don't care for the reason here */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002612 drbd_err(device, "submit failed, triggering re-connect\n");
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002613 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002614 list_del(&peer_req->w.list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002615 spin_unlock_irq(&device->resource->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002616 /* no drbd_rs_complete_io(), we are dropping the connection anyways */
2617
Philipp Reisnerb411b362009-09-25 16:07:19 -07002618out_free_e:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002619 put_ldev(device);
2620 drbd_free_peer_req(device, peer_req);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002621 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002622}
2623
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002624/**
2625 * drbd_asb_recover_0p - Recover after split-brain with no remaining primaries
2626 */
2627static int drbd_asb_recover_0p(struct drbd_peer_device *peer_device) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002628{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002629 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002630 int self, peer, rv = -100;
2631 unsigned long ch_self, ch_peer;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002632 enum drbd_after_sb_p after_sb_0p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002633
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002634 self = device->ldev->md.uuid[UI_BITMAP] & 1;
2635 peer = device->p_uuid[UI_BITMAP] & 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002636
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002637 ch_peer = device->p_uuid[UI_SIZE];
2638 ch_self = device->comm_bm_set;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002639
Philipp Reisner44ed1672011-04-19 17:10:19 +02002640 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002641 after_sb_0p = rcu_dereference(peer_device->connection->net_conf)->after_sb_0p;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002642 rcu_read_unlock();
2643 switch (after_sb_0p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002644 case ASB_CONSENSUS:
2645 case ASB_DISCARD_SECONDARY:
2646 case ASB_CALL_HELPER:
Philipp Reisner44ed1672011-04-19 17:10:19 +02002647 case ASB_VIOLENTLY:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002648 drbd_err(device, "Configuration error.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002649 break;
2650 case ASB_DISCONNECT:
2651 break;
2652 case ASB_DISCARD_YOUNGER_PRI:
2653 if (self == 0 && peer == 1) {
2654 rv = -1;
2655 break;
2656 }
2657 if (self == 1 && peer == 0) {
2658 rv = 1;
2659 break;
2660 }
2661 /* Else fall through to one of the other strategies... */
2662 case ASB_DISCARD_OLDER_PRI:
2663 if (self == 0 && peer == 1) {
2664 rv = 1;
2665 break;
2666 }
2667 if (self == 1 && peer == 0) {
2668 rv = -1;
2669 break;
2670 }
2671 /* Else fall through to one of the other strategies... */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002672 drbd_warn(device, "Discard younger/older primary did not find a decision\n"
Philipp Reisnerb411b362009-09-25 16:07:19 -07002673 "Using discard-least-changes instead\n");
2674 case ASB_DISCARD_ZERO_CHG:
2675 if (ch_peer == 0 && ch_self == 0) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002676 rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002677 ? -1 : 1;
2678 break;
2679 } else {
2680 if (ch_peer == 0) { rv = 1; break; }
2681 if (ch_self == 0) { rv = -1; break; }
2682 }
Philipp Reisner44ed1672011-04-19 17:10:19 +02002683 if (after_sb_0p == ASB_DISCARD_ZERO_CHG)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002684 break;
2685 case ASB_DISCARD_LEAST_CHG:
2686 if (ch_self < ch_peer)
2687 rv = -1;
2688 else if (ch_self > ch_peer)
2689 rv = 1;
2690 else /* ( ch_self == ch_peer ) */
2691 /* Well, then use something else. */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002692 rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002693 ? -1 : 1;
2694 break;
2695 case ASB_DISCARD_LOCAL:
2696 rv = -1;
2697 break;
2698 case ASB_DISCARD_REMOTE:
2699 rv = 1;
2700 }
2701
2702 return rv;
2703}
2704
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002705/**
2706 * drbd_asb_recover_1p - Recover after split-brain with one remaining primary
2707 */
2708static int drbd_asb_recover_1p(struct drbd_peer_device *peer_device) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002709{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002710 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002711 int hg, rv = -100;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002712 enum drbd_after_sb_p after_sb_1p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002713
Philipp Reisner44ed1672011-04-19 17:10:19 +02002714 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002715 after_sb_1p = rcu_dereference(peer_device->connection->net_conf)->after_sb_1p;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002716 rcu_read_unlock();
2717 switch (after_sb_1p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002718 case ASB_DISCARD_YOUNGER_PRI:
2719 case ASB_DISCARD_OLDER_PRI:
2720 case ASB_DISCARD_LEAST_CHG:
2721 case ASB_DISCARD_LOCAL:
2722 case ASB_DISCARD_REMOTE:
Philipp Reisner44ed1672011-04-19 17:10:19 +02002723 case ASB_DISCARD_ZERO_CHG:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002724 drbd_err(device, "Configuration error.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002725 break;
2726 case ASB_DISCONNECT:
2727 break;
2728 case ASB_CONSENSUS:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002729 hg = drbd_asb_recover_0p(peer_device);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002730 if (hg == -1 && device->state.role == R_SECONDARY)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002731 rv = hg;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002732 if (hg == 1 && device->state.role == R_PRIMARY)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002733 rv = hg;
2734 break;
2735 case ASB_VIOLENTLY:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002736 rv = drbd_asb_recover_0p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002737 break;
2738 case ASB_DISCARD_SECONDARY:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002739 return device->state.role == R_PRIMARY ? 1 : -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002740 case ASB_CALL_HELPER:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002741 hg = drbd_asb_recover_0p(peer_device);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002742 if (hg == -1 && device->state.role == R_PRIMARY) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002743 enum drbd_state_rv rv2;
2744
Philipp Reisnerb411b362009-09-25 16:07:19 -07002745 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2746 * we might be here in C_WF_REPORT_PARAMS which is transient.
2747 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002748 rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002749 if (rv2 != SS_SUCCESS) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002750 drbd_khelper(device, "pri-lost-after-sb");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002751 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002752 drbd_warn(device, "Successfully gave up primary role.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002753 rv = hg;
2754 }
2755 } else
2756 rv = hg;
2757 }
2758
2759 return rv;
2760}
2761
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002762/**
2763 * drbd_asb_recover_2p - Recover after split-brain with two remaining primaries
2764 */
2765static int drbd_asb_recover_2p(struct drbd_peer_device *peer_device) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002766{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002767 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002768 int hg, rv = -100;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002769 enum drbd_after_sb_p after_sb_2p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002770
Philipp Reisner44ed1672011-04-19 17:10:19 +02002771 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002772 after_sb_2p = rcu_dereference(peer_device->connection->net_conf)->after_sb_2p;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002773 rcu_read_unlock();
2774 switch (after_sb_2p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002775 case ASB_DISCARD_YOUNGER_PRI:
2776 case ASB_DISCARD_OLDER_PRI:
2777 case ASB_DISCARD_LEAST_CHG:
2778 case ASB_DISCARD_LOCAL:
2779 case ASB_DISCARD_REMOTE:
2780 case ASB_CONSENSUS:
2781 case ASB_DISCARD_SECONDARY:
Philipp Reisner44ed1672011-04-19 17:10:19 +02002782 case ASB_DISCARD_ZERO_CHG:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002783 drbd_err(device, "Configuration error.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002784 break;
2785 case ASB_VIOLENTLY:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002786 rv = drbd_asb_recover_0p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002787 break;
2788 case ASB_DISCONNECT:
2789 break;
2790 case ASB_CALL_HELPER:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002791 hg = drbd_asb_recover_0p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002792 if (hg == -1) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002793 enum drbd_state_rv rv2;
2794
Philipp Reisnerb411b362009-09-25 16:07:19 -07002795 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2796 * we might be here in C_WF_REPORT_PARAMS which is transient.
2797 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002798 rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002799 if (rv2 != SS_SUCCESS) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002800 drbd_khelper(device, "pri-lost-after-sb");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002801 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002802 drbd_warn(device, "Successfully gave up primary role.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002803 rv = hg;
2804 }
2805 } else
2806 rv = hg;
2807 }
2808
2809 return rv;
2810}
2811
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002812static void drbd_uuid_dump(struct drbd_device *device, char *text, u64 *uuid,
Philipp Reisnerb411b362009-09-25 16:07:19 -07002813 u64 bits, u64 flags)
2814{
2815 if (!uuid) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002816 drbd_info(device, "%s uuid info vanished while I was looking!\n", text);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002817 return;
2818 }
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002819 drbd_info(device, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07002820 text,
2821 (unsigned long long)uuid[UI_CURRENT],
2822 (unsigned long long)uuid[UI_BITMAP],
2823 (unsigned long long)uuid[UI_HISTORY_START],
2824 (unsigned long long)uuid[UI_HISTORY_END],
2825 (unsigned long long)bits,
2826 (unsigned long long)flags);
2827}
2828
2829/*
2830 100 after split brain try auto recover
2831 2 C_SYNC_SOURCE set BitMap
2832 1 C_SYNC_SOURCE use BitMap
2833 0 no Sync
2834 -1 C_SYNC_TARGET use BitMap
2835 -2 C_SYNC_TARGET set BitMap
2836 -100 after split brain, disconnect
2837-1000 unrelated data
Philipp Reisner4a23f262011-01-11 17:42:17 +01002838-1091 requires proto 91
2839-1096 requires proto 96
Philipp Reisnerb411b362009-09-25 16:07:19 -07002840 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002841static int drbd_uuid_compare(struct drbd_device *device, int *rule_nr) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002842{
2843 u64 self, peer;
2844 int i, j;
2845
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002846 self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2847 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002848
2849 *rule_nr = 10;
2850 if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED)
2851 return 0;
2852
2853 *rule_nr = 20;
2854 if ((self == UUID_JUST_CREATED || self == (u64)0) &&
2855 peer != UUID_JUST_CREATED)
2856 return -2;
2857
2858 *rule_nr = 30;
2859 if (self != UUID_JUST_CREATED &&
2860 (peer == UUID_JUST_CREATED || peer == (u64)0))
2861 return 2;
2862
2863 if (self == peer) {
2864 int rct, dc; /* roles at crash time */
2865
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002866 if (device->p_uuid[UI_BITMAP] == (u64)0 && device->ldev->md.uuid[UI_BITMAP] != (u64)0) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002867
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002868 if (first_peer_device(device)->connection->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002869 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002870
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002871 if ((device->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
2872 (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002873 drbd_info(device, "was SyncSource, missed the resync finished event, corrected myself:\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002874 drbd_uuid_move_history(device);
2875 device->ldev->md.uuid[UI_HISTORY_START] = device->ldev->md.uuid[UI_BITMAP];
2876 device->ldev->md.uuid[UI_BITMAP] = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002877
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002878 drbd_uuid_dump(device, "self", device->ldev->md.uuid,
2879 device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002880 *rule_nr = 34;
2881 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002882 drbd_info(device, "was SyncSource (peer failed to write sync_uuid)\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002883 *rule_nr = 36;
2884 }
2885
2886 return 1;
2887 }
2888
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002889 if (device->ldev->md.uuid[UI_BITMAP] == (u64)0 && device->p_uuid[UI_BITMAP] != (u64)0) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002890
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002891 if (first_peer_device(device)->connection->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002892 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002893
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002894 if ((device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_BITMAP] & ~((u64)1)) &&
2895 (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002896 drbd_info(device, "was SyncTarget, peer missed the resync finished event, corrected peer:\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002897
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002898 device->p_uuid[UI_HISTORY_START + 1] = device->p_uuid[UI_HISTORY_START];
2899 device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_BITMAP];
2900 device->p_uuid[UI_BITMAP] = 0UL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002901
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002902 drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002903 *rule_nr = 35;
2904 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002905 drbd_info(device, "was SyncTarget (failed to write sync_uuid)\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002906 *rule_nr = 37;
2907 }
2908
2909 return -1;
2910 }
2911
2912 /* Common power [off|failure] */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002913 rct = (test_bit(CRASHED_PRIMARY, &device->flags) ? 1 : 0) +
2914 (device->p_uuid[UI_FLAGS] & 2);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002915 /* lowest bit is set when we were primary,
2916 * next bit (weight 2) is set when peer was primary */
2917 *rule_nr = 40;
2918
2919 switch (rct) {
2920 case 0: /* !self_pri && !peer_pri */ return 0;
2921 case 1: /* self_pri && !peer_pri */ return 1;
2922 case 2: /* !self_pri && peer_pri */ return -1;
2923 case 3: /* self_pri && peer_pri */
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002924 dc = test_bit(RESOLVE_CONFLICTS, &first_peer_device(device)->connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002925 return dc ? -1 : 1;
2926 }
2927 }
2928
2929 *rule_nr = 50;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002930 peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002931 if (self == peer)
2932 return -1;
2933
2934 *rule_nr = 51;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002935 peer = device->p_uuid[UI_HISTORY_START] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002936 if (self == peer) {
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002937 if (first_peer_device(device)->connection->agreed_pro_version < 96 ?
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002938 (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
2939 (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
2940 peer + UUID_NEW_BM_OFFSET == (device->p_uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002941 /* The last P_SYNC_UUID did not get though. Undo the last start of
2942 resync as sync source modifications of the peer's UUIDs. */
2943
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002944 if (first_peer_device(device)->connection->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002945 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002946
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002947 device->p_uuid[UI_BITMAP] = device->p_uuid[UI_HISTORY_START];
2948 device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_HISTORY_START + 1];
Philipp Reisner4a23f262011-01-11 17:42:17 +01002949
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002950 drbd_info(device, "Lost last syncUUID packet, corrected:\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002951 drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
Philipp Reisner4a23f262011-01-11 17:42:17 +01002952
Philipp Reisnerb411b362009-09-25 16:07:19 -07002953 return -1;
2954 }
2955 }
2956
2957 *rule_nr = 60;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002958 self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002959 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002960 peer = device->p_uuid[i] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002961 if (self == peer)
2962 return -2;
2963 }
2964
2965 *rule_nr = 70;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002966 self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2967 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002968 if (self == peer)
2969 return 1;
2970
2971 *rule_nr = 71;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002972 self = device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002973 if (self == peer) {
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002974 if (first_peer_device(device)->connection->agreed_pro_version < 96 ?
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002975 (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
2976 (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
2977 self + UUID_NEW_BM_OFFSET == (device->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002978 /* The last P_SYNC_UUID did not get though. Undo the last start of
2979 resync as sync source modifications of our UUIDs. */
2980
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002981 if (first_peer_device(device)->connection->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002982 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002983
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002984 __drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_HISTORY_START]);
2985 __drbd_uuid_set(device, UI_HISTORY_START, device->ldev->md.uuid[UI_HISTORY_START + 1]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002986
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002987 drbd_info(device, "Last syncUUID did not get through, corrected:\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002988 drbd_uuid_dump(device, "self", device->ldev->md.uuid,
2989 device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002990
2991 return 1;
2992 }
2993 }
2994
2995
2996 *rule_nr = 80;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002997 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002998 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002999 self = device->ldev->md.uuid[i] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003000 if (self == peer)
3001 return 2;
3002 }
3003
3004 *rule_nr = 90;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003005 self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
3006 peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003007 if (self == peer && self != ((u64)0))
3008 return 100;
3009
3010 *rule_nr = 100;
3011 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003012 self = device->ldev->md.uuid[i] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003013 for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003014 peer = device->p_uuid[j] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003015 if (self == peer)
3016 return -100;
3017 }
3018 }
3019
3020 return -1000;
3021}
3022
3023/* drbd_sync_handshake() returns the new conn state on success, or
3024 CONN_MASK (-1) on failure.
3025 */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003026static enum drbd_conns drbd_sync_handshake(struct drbd_peer_device *peer_device,
3027 enum drbd_role peer_role,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003028 enum drbd_disk_state peer_disk) __must_hold(local)
3029{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003030 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003031 enum drbd_conns rv = C_MASK;
3032 enum drbd_disk_state mydisk;
Philipp Reisner44ed1672011-04-19 17:10:19 +02003033 struct net_conf *nc;
Andreas Gruenbacher6dff2902011-06-28 14:18:12 +02003034 int hg, rule_nr, rr_conflict, tentative;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003035
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003036 mydisk = device->state.disk;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003037 if (mydisk == D_NEGOTIATING)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003038 mydisk = device->new_state_tmp.disk;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003039
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003040 drbd_info(device, "drbd_sync_handshake:\n");
Philipp Reisner9f2247b2012-08-16 14:25:58 +02003041
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003042 spin_lock_irq(&device->ldev->md.uuid_lock);
3043 drbd_uuid_dump(device, "self", device->ldev->md.uuid, device->comm_bm_set, 0);
3044 drbd_uuid_dump(device, "peer", device->p_uuid,
3045 device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003046
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003047 hg = drbd_uuid_compare(device, &rule_nr);
3048 spin_unlock_irq(&device->ldev->md.uuid_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003049
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003050 drbd_info(device, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003051
3052 if (hg == -1000) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003053 drbd_alert(device, "Unrelated data, aborting!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003054 return C_MASK;
3055 }
Philipp Reisner4a23f262011-01-11 17:42:17 +01003056 if (hg < -1000) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003057 drbd_alert(device, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003058 return C_MASK;
3059 }
3060
3061 if ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) ||
3062 (peer_disk == D_INCONSISTENT && mydisk > D_INCONSISTENT)) {
3063 int f = (hg == -100) || abs(hg) == 2;
3064 hg = mydisk > D_INCONSISTENT ? 1 : -1;
3065 if (f)
3066 hg = hg*2;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003067 drbd_info(device, "Becoming sync %s due to disk states.\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07003068 hg > 0 ? "source" : "target");
3069 }
3070
Adam Gandelman3a11a482010-04-08 16:48:23 -07003071 if (abs(hg) == 100)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003072 drbd_khelper(device, "initial-split-brain");
Adam Gandelman3a11a482010-04-08 16:48:23 -07003073
Philipp Reisner44ed1672011-04-19 17:10:19 +02003074 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003075 nc = rcu_dereference(peer_device->connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +02003076
3077 if (hg == 100 || (hg == -100 && nc->always_asbp)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003078 int pcount = (device->state.role == R_PRIMARY)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003079 + (peer_role == R_PRIMARY);
3080 int forced = (hg == -100);
3081
3082 switch (pcount) {
3083 case 0:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003084 hg = drbd_asb_recover_0p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003085 break;
3086 case 1:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003087 hg = drbd_asb_recover_1p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003088 break;
3089 case 2:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003090 hg = drbd_asb_recover_2p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003091 break;
3092 }
3093 if (abs(hg) < 100) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003094 drbd_warn(device, "Split-Brain detected, %d primaries, "
Philipp Reisnerb411b362009-09-25 16:07:19 -07003095 "automatically solved. Sync from %s node\n",
3096 pcount, (hg < 0) ? "peer" : "this");
3097 if (forced) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003098 drbd_warn(device, "Doing a full sync, since"
Philipp Reisnerb411b362009-09-25 16:07:19 -07003099 " UUIDs where ambiguous.\n");
3100 hg = hg*2;
3101 }
3102 }
3103 }
3104
3105 if (hg == -100) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003106 if (test_bit(DISCARD_MY_DATA, &device->flags) && !(device->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003107 hg = -1;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003108 if (!test_bit(DISCARD_MY_DATA, &device->flags) && (device->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003109 hg = 1;
3110
3111 if (abs(hg) < 100)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003112 drbd_warn(device, "Split-Brain detected, manually solved. "
Philipp Reisnerb411b362009-09-25 16:07:19 -07003113 "Sync from %s node\n",
3114 (hg < 0) ? "peer" : "this");
3115 }
Philipp Reisner44ed1672011-04-19 17:10:19 +02003116 rr_conflict = nc->rr_conflict;
Andreas Gruenbacher6dff2902011-06-28 14:18:12 +02003117 tentative = nc->tentative;
Philipp Reisner44ed1672011-04-19 17:10:19 +02003118 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07003119
3120 if (hg == -100) {
Lars Ellenberg580b9762010-02-26 23:15:23 +01003121 /* FIXME this log message is not correct if we end up here
3122 * after an attempted attach on a diskless node.
3123 * We just refuse to attach -- well, we drop the "connection"
3124 * to that disk, in a way... */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003125 drbd_alert(device, "Split-Brain detected but unresolved, dropping connection!\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003126 drbd_khelper(device, "split-brain");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003127 return C_MASK;
3128 }
3129
3130 if (hg > 0 && mydisk <= D_INCONSISTENT) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003131 drbd_err(device, "I shall become SyncSource, but I am inconsistent!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003132 return C_MASK;
3133 }
3134
3135 if (hg < 0 && /* by intention we do not use mydisk here. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003136 device->state.role == R_PRIMARY && device->state.disk >= D_CONSISTENT) {
Philipp Reisner44ed1672011-04-19 17:10:19 +02003137 switch (rr_conflict) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003138 case ASB_CALL_HELPER:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003139 drbd_khelper(device, "pri-lost");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003140 /* fall through */
3141 case ASB_DISCONNECT:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003142 drbd_err(device, "I shall become SyncTarget, but I am primary!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003143 return C_MASK;
3144 case ASB_VIOLENTLY:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003145 drbd_warn(device, "Becoming SyncTarget, violating the stable-data"
Philipp Reisnerb411b362009-09-25 16:07:19 -07003146 "assumption\n");
3147 }
3148 }
3149
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003150 if (tentative || test_bit(CONN_DRY_RUN, &peer_device->connection->flags)) {
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003151 if (hg == 0)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003152 drbd_info(device, "dry-run connect: No resync, would become Connected immediately.\n");
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003153 else
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003154 drbd_info(device, "dry-run connect: Would become %s, doing a %s resync.",
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003155 drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET),
3156 abs(hg) >= 2 ? "full" : "bit-map based");
3157 return C_MASK;
3158 }
3159
Philipp Reisnerb411b362009-09-25 16:07:19 -07003160 if (abs(hg) >= 2) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003161 drbd_info(device, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003162 if (drbd_bitmap_io(device, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003163 BM_LOCKED_SET_ALLOWED))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003164 return C_MASK;
3165 }
3166
3167 if (hg > 0) { /* become sync source. */
3168 rv = C_WF_BITMAP_S;
3169 } else if (hg < 0) { /* become sync target */
3170 rv = C_WF_BITMAP_T;
3171 } else {
3172 rv = C_CONNECTED;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003173 if (drbd_bm_total_weight(device)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003174 drbd_info(device, "No resync, but %lu bits in bitmap!\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003175 drbd_bm_total_weight(device));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003176 }
3177 }
3178
3179 return rv;
3180}
3181
Philipp Reisnerf179d762011-05-16 17:31:47 +02003182static enum drbd_after_sb_p convert_after_sb(enum drbd_after_sb_p peer)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003183{
3184 /* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */
Philipp Reisnerf179d762011-05-16 17:31:47 +02003185 if (peer == ASB_DISCARD_REMOTE)
3186 return ASB_DISCARD_LOCAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003187
3188 /* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */
Philipp Reisnerf179d762011-05-16 17:31:47 +02003189 if (peer == ASB_DISCARD_LOCAL)
3190 return ASB_DISCARD_REMOTE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003191
3192 /* everything else is valid if they are equal on both sides. */
Philipp Reisnerf179d762011-05-16 17:31:47 +02003193 return peer;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003194}
3195
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003196static int receive_protocol(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003197{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003198 struct p_protocol *p = pi->data;
Philipp Reisner036b17e2011-05-16 17:38:11 +02003199 enum drbd_after_sb_p p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
3200 int p_proto, p_discard_my_data, p_two_primaries, cf;
3201 struct net_conf *nc, *old_net_conf, *new_net_conf = NULL;
3202 char integrity_alg[SHARED_SECRET_MAX] = "";
Andreas Gruenbacheraccdbcc2011-07-15 17:41:09 +02003203 struct crypto_hash *peer_integrity_tfm = NULL;
Philipp Reisner7aca6c72011-05-17 10:12:56 +02003204 void *int_dig_in = NULL, *int_dig_vv = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003205
Philipp Reisnerb411b362009-09-25 16:07:19 -07003206 p_proto = be32_to_cpu(p->protocol);
3207 p_after_sb_0p = be32_to_cpu(p->after_sb_0p);
3208 p_after_sb_1p = be32_to_cpu(p->after_sb_1p);
3209 p_after_sb_2p = be32_to_cpu(p->after_sb_2p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003210 p_two_primaries = be32_to_cpu(p->two_primaries);
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003211 cf = be32_to_cpu(p->conn_flags);
Andreas Gruenbacher6139f602011-05-06 20:00:02 +02003212 p_discard_my_data = cf & CF_DISCARD_MY_DATA;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003213
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003214 if (connection->agreed_pro_version >= 87) {
Andreas Gruenbacher86db0612011-04-28 15:24:18 +02003215 int err;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003216
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02003217 if (pi->size > sizeof(integrity_alg))
Andreas Gruenbacher86db0612011-04-28 15:24:18 +02003218 return -EIO;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003219 err = drbd_recv_all(connection, integrity_alg, pi->size);
Andreas Gruenbacher86db0612011-04-28 15:24:18 +02003220 if (err)
3221 return err;
Philipp Reisner036b17e2011-05-16 17:38:11 +02003222 integrity_alg[SHARED_SECRET_MAX - 1] = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003223 }
3224
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003225 if (pi->cmd != P_PROTOCOL_UPDATE) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003226 clear_bit(CONN_DRY_RUN, &connection->flags);
Philipp Reisner036b17e2011-05-16 17:38:11 +02003227
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003228 if (cf & CF_DRY_RUN)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003229 set_bit(CONN_DRY_RUN, &connection->flags);
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003230
3231 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003232 nc = rcu_dereference(connection->net_conf);
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003233
3234 if (p_proto != nc->wire_protocol) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003235 drbd_err(connection, "incompatible %s settings\n", "protocol");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003236 goto disconnect_rcu_unlock;
3237 }
3238
3239 if (convert_after_sb(p_after_sb_0p) != nc->after_sb_0p) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003240 drbd_err(connection, "incompatible %s settings\n", "after-sb-0pri");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003241 goto disconnect_rcu_unlock;
3242 }
3243
3244 if (convert_after_sb(p_after_sb_1p) != nc->after_sb_1p) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003245 drbd_err(connection, "incompatible %s settings\n", "after-sb-1pri");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003246 goto disconnect_rcu_unlock;
3247 }
3248
3249 if (convert_after_sb(p_after_sb_2p) != nc->after_sb_2p) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003250 drbd_err(connection, "incompatible %s settings\n", "after-sb-2pri");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003251 goto disconnect_rcu_unlock;
3252 }
3253
3254 if (p_discard_my_data && nc->discard_my_data) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003255 drbd_err(connection, "incompatible %s settings\n", "discard-my-data");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003256 goto disconnect_rcu_unlock;
3257 }
3258
3259 if (p_two_primaries != nc->two_primaries) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003260 drbd_err(connection, "incompatible %s settings\n", "allow-two-primaries");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003261 goto disconnect_rcu_unlock;
3262 }
3263
3264 if (strcmp(integrity_alg, nc->integrity_alg)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003265 drbd_err(connection, "incompatible %s settings\n", "data-integrity-alg");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003266 goto disconnect_rcu_unlock;
3267 }
3268
3269 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07003270 }
3271
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003272 if (integrity_alg[0]) {
3273 int hash_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003274
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003275 /*
3276 * We can only change the peer data integrity algorithm
3277 * here. Changing our own data integrity algorithm
3278 * requires that we send a P_PROTOCOL_UPDATE packet at
3279 * the same time; otherwise, the peer has no way to
3280 * tell between which packets the algorithm should
3281 * change.
3282 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003283
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003284 peer_integrity_tfm = crypto_alloc_hash(integrity_alg, 0, CRYPTO_ALG_ASYNC);
3285 if (!peer_integrity_tfm) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003286 drbd_err(connection, "peer data-integrity-alg %s not supported\n",
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003287 integrity_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003288 goto disconnect;
3289 }
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003290
3291 hash_size = crypto_hash_digestsize(peer_integrity_tfm);
3292 int_dig_in = kmalloc(hash_size, GFP_KERNEL);
3293 int_dig_vv = kmalloc(hash_size, GFP_KERNEL);
3294 if (!(int_dig_in && int_dig_vv)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003295 drbd_err(connection, "Allocation of buffers for data integrity checking failed\n");
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003296 goto disconnect;
3297 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003298 }
3299
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003300 new_net_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL);
3301 if (!new_net_conf) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003302 drbd_err(connection, "Allocation of new net_conf failed\n");
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003303 goto disconnect;
3304 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003305
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003306 mutex_lock(&connection->data.mutex);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003307 mutex_lock(&connection->resource->conf_update);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003308 old_net_conf = connection->net_conf;
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003309 *new_net_conf = *old_net_conf;
3310
3311 new_net_conf->wire_protocol = p_proto;
3312 new_net_conf->after_sb_0p = convert_after_sb(p_after_sb_0p);
3313 new_net_conf->after_sb_1p = convert_after_sb(p_after_sb_1p);
3314 new_net_conf->after_sb_2p = convert_after_sb(p_after_sb_2p);
3315 new_net_conf->two_primaries = p_two_primaries;
3316
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003317 rcu_assign_pointer(connection->net_conf, new_net_conf);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003318 mutex_unlock(&connection->resource->conf_update);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003319 mutex_unlock(&connection->data.mutex);
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003320
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003321 crypto_free_hash(connection->peer_integrity_tfm);
3322 kfree(connection->int_dig_in);
3323 kfree(connection->int_dig_vv);
3324 connection->peer_integrity_tfm = peer_integrity_tfm;
3325 connection->int_dig_in = int_dig_in;
3326 connection->int_dig_vv = int_dig_vv;
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003327
3328 if (strcmp(old_net_conf->integrity_alg, integrity_alg))
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003329 drbd_info(connection, "peer data-integrity-alg: %s\n",
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003330 integrity_alg[0] ? integrity_alg : "(none)");
3331
3332 synchronize_rcu();
3333 kfree(old_net_conf);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003334 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003335
Philipp Reisner44ed1672011-04-19 17:10:19 +02003336disconnect_rcu_unlock:
3337 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07003338disconnect:
Andreas Gruenbacherb792c352011-07-15 16:48:49 +02003339 crypto_free_hash(peer_integrity_tfm);
Philipp Reisner036b17e2011-05-16 17:38:11 +02003340 kfree(int_dig_in);
3341 kfree(int_dig_vv);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003342 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003343 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003344}
3345
3346/* helper function
3347 * input: alg name, feature name
3348 * return: NULL (alg name was "")
3349 * ERR_PTR(error) if something goes wrong
3350 * or the crypto hash ptr, if it worked out ok. */
Rashika Kheriaf63e6312013-12-19 15:11:09 +05303351static
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003352struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_device *device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003353 const char *alg, const char *name)
3354{
3355 struct crypto_hash *tfm;
3356
3357 if (!alg[0])
3358 return NULL;
3359
3360 tfm = crypto_alloc_hash(alg, 0, CRYPTO_ALG_ASYNC);
3361 if (IS_ERR(tfm)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003362 drbd_err(device, "Can not allocate \"%s\" as %s (reason: %ld)\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07003363 alg, name, PTR_ERR(tfm));
3364 return tfm;
3365 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003366 return tfm;
3367}
3368
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003369static int ignore_remaining_packet(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003370{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003371 void *buffer = connection->data.rbuf;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003372 int size = pi->size;
3373
3374 while (size) {
3375 int s = min_t(int, size, DRBD_SOCKET_BUFFER_SIZE);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003376 s = drbd_recv(connection, buffer, s);
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003377 if (s <= 0) {
3378 if (s < 0)
3379 return s;
3380 break;
3381 }
3382 size -= s;
3383 }
3384 if (size)
3385 return -EIO;
3386 return 0;
3387}
3388
3389/*
3390 * config_unknown_volume - device configuration command for unknown volume
3391 *
3392 * When a device is added to an existing connection, the node on which the
3393 * device is added first will send configuration commands to its peer but the
3394 * peer will not know about the device yet. It will warn and ignore these
3395 * commands. Once the device is added on the second node, the second node will
3396 * send the same device configuration commands, but in the other direction.
3397 *
3398 * (We can also end up here if drbd is misconfigured.)
3399 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003400static int config_unknown_volume(struct drbd_connection *connection, struct packet_info *pi)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003401{
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003402 drbd_warn(connection, "%s packet received for volume %u, which is not configured locally\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02003403 cmdname(pi->cmd), pi->vnr);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003404 return ignore_remaining_packet(connection, pi);
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003405}
3406
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003407static int receive_SyncParam(struct drbd_connection *connection, struct packet_info *pi)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003408{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003409 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003410 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003411 struct p_rs_param_95 *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003412 unsigned int header_size, data_size, exp_max_sz;
3413 struct crypto_hash *verify_tfm = NULL;
3414 struct crypto_hash *csums_tfm = NULL;
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003415 struct net_conf *old_net_conf, *new_net_conf = NULL;
Philipp Reisner813472c2011-05-03 16:47:02 +02003416 struct disk_conf *old_disk_conf = NULL, *new_disk_conf = NULL;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003417 const int apv = connection->agreed_pro_version;
Philipp Reisner813472c2011-05-03 16:47:02 +02003418 struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
Philipp Reisner778f2712010-07-06 11:14:00 +02003419 int fifo_size = 0;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003420 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003421
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003422 peer_device = conn_peer_device(connection, pi->vnr);
3423 if (!peer_device)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003424 return config_unknown_volume(connection, pi);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003425 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003426
3427 exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param)
3428 : apv == 88 ? sizeof(struct p_rs_param)
3429 + SHARED_SECRET_MAX
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003430 : apv <= 94 ? sizeof(struct p_rs_param_89)
3431 : /* apv >= 95 */ sizeof(struct p_rs_param_95);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003432
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003433 if (pi->size > exp_max_sz) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003434 drbd_err(device, "SyncParam packet too long: received %u, expected <= %u bytes\n",
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003435 pi->size, exp_max_sz);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003436 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003437 }
3438
3439 if (apv <= 88) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003440 header_size = sizeof(struct p_rs_param);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003441 data_size = pi->size - header_size;
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003442 } else if (apv <= 94) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003443 header_size = sizeof(struct p_rs_param_89);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003444 data_size = pi->size - header_size;
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02003445 D_ASSERT(device, data_size == 0);
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003446 } else {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003447 header_size = sizeof(struct p_rs_param_95);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003448 data_size = pi->size - header_size;
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02003449 D_ASSERT(device, data_size == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003450 }
3451
3452 /* initialize verify_alg and csums_alg */
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003453 p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003454 memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
3455
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003456 err = drbd_recv_all(peer_device->connection, p, header_size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003457 if (err)
3458 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003459
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003460 mutex_lock(&connection->resource->conf_update);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003461 old_net_conf = peer_device->connection->net_conf;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003462 if (get_ldev(device)) {
Philipp Reisner813472c2011-05-03 16:47:02 +02003463 new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3464 if (!new_disk_conf) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003465 put_ldev(device);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003466 mutex_unlock(&connection->resource->conf_update);
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003467 drbd_err(device, "Allocation of new disk_conf failed\n");
Philipp Reisner813472c2011-05-03 16:47:02 +02003468 return -ENOMEM;
3469 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003470
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003471 old_disk_conf = device->ldev->disk_conf;
Philipp Reisner813472c2011-05-03 16:47:02 +02003472 *new_disk_conf = *old_disk_conf;
3473
Andreas Gruenbacher6394b932011-05-11 14:29:52 +02003474 new_disk_conf->resync_rate = be32_to_cpu(p->resync_rate);
Philipp Reisner813472c2011-05-03 16:47:02 +02003475 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003476
3477 if (apv >= 88) {
3478 if (apv == 88) {
Philipp Reisner5de73822012-03-28 10:17:32 +02003479 if (data_size > SHARED_SECRET_MAX || data_size == 0) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003480 drbd_err(device, "verify-alg of wrong size, "
Philipp Reisner5de73822012-03-28 10:17:32 +02003481 "peer wants %u, accepting only up to %u byte\n",
3482 data_size, SHARED_SECRET_MAX);
Philipp Reisner813472c2011-05-03 16:47:02 +02003483 err = -EIO;
3484 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003485 }
3486
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003487 err = drbd_recv_all(peer_device->connection, p->verify_alg, data_size);
Philipp Reisner813472c2011-05-03 16:47:02 +02003488 if (err)
3489 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003490 /* we expect NUL terminated string */
3491 /* but just in case someone tries to be evil */
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02003492 D_ASSERT(device, p->verify_alg[data_size-1] == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003493 p->verify_alg[data_size-1] = 0;
3494
3495 } else /* apv >= 89 */ {
3496 /* we still expect NUL terminated strings */
3497 /* but just in case someone tries to be evil */
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02003498 D_ASSERT(device, p->verify_alg[SHARED_SECRET_MAX-1] == 0);
3499 D_ASSERT(device, p->csums_alg[SHARED_SECRET_MAX-1] == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003500 p->verify_alg[SHARED_SECRET_MAX-1] = 0;
3501 p->csums_alg[SHARED_SECRET_MAX-1] = 0;
3502 }
3503
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003504 if (strcmp(old_net_conf->verify_alg, p->verify_alg)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003505 if (device->state.conn == C_WF_REPORT_PARAMS) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003506 drbd_err(device, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003507 old_net_conf->verify_alg, p->verify_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003508 goto disconnect;
3509 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003510 verify_tfm = drbd_crypto_alloc_digest_safe(device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003511 p->verify_alg, "verify-alg");
3512 if (IS_ERR(verify_tfm)) {
3513 verify_tfm = NULL;
3514 goto disconnect;
3515 }
3516 }
3517
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003518 if (apv >= 89 && strcmp(old_net_conf->csums_alg, p->csums_alg)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003519 if (device->state.conn == C_WF_REPORT_PARAMS) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003520 drbd_err(device, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003521 old_net_conf->csums_alg, p->csums_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003522 goto disconnect;
3523 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003524 csums_tfm = drbd_crypto_alloc_digest_safe(device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003525 p->csums_alg, "csums-alg");
3526 if (IS_ERR(csums_tfm)) {
3527 csums_tfm = NULL;
3528 goto disconnect;
3529 }
3530 }
3531
Philipp Reisner813472c2011-05-03 16:47:02 +02003532 if (apv > 94 && new_disk_conf) {
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003533 new_disk_conf->c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
3534 new_disk_conf->c_delay_target = be32_to_cpu(p->c_delay_target);
3535 new_disk_conf->c_fill_target = be32_to_cpu(p->c_fill_target);
3536 new_disk_conf->c_max_rate = be32_to_cpu(p->c_max_rate);
Philipp Reisner778f2712010-07-06 11:14:00 +02003537
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003538 fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003539 if (fifo_size != device->rs_plan_s->size) {
Philipp Reisner813472c2011-05-03 16:47:02 +02003540 new_plan = fifo_alloc(fifo_size);
3541 if (!new_plan) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003542 drbd_err(device, "kmalloc of fifo_buffer failed");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003543 put_ldev(device);
Philipp Reisner778f2712010-07-06 11:14:00 +02003544 goto disconnect;
3545 }
3546 }
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003547 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003548
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003549 if (verify_tfm || csums_tfm) {
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003550 new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
3551 if (!new_net_conf) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003552 drbd_err(device, "Allocation of new net_conf failed\n");
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003553 goto disconnect;
3554 }
3555
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003556 *new_net_conf = *old_net_conf;
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003557
3558 if (verify_tfm) {
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003559 strcpy(new_net_conf->verify_alg, p->verify_alg);
3560 new_net_conf->verify_alg_len = strlen(p->verify_alg) + 1;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003561 crypto_free_hash(peer_device->connection->verify_tfm);
3562 peer_device->connection->verify_tfm = verify_tfm;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003563 drbd_info(device, "using verify-alg: \"%s\"\n", p->verify_alg);
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003564 }
3565 if (csums_tfm) {
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003566 strcpy(new_net_conf->csums_alg, p->csums_alg);
3567 new_net_conf->csums_alg_len = strlen(p->csums_alg) + 1;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003568 crypto_free_hash(peer_device->connection->csums_tfm);
3569 peer_device->connection->csums_tfm = csums_tfm;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003570 drbd_info(device, "using csums-alg: \"%s\"\n", p->csums_alg);
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003571 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003572 rcu_assign_pointer(connection->net_conf, new_net_conf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003573 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003574 }
3575
Philipp Reisner813472c2011-05-03 16:47:02 +02003576 if (new_disk_conf) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003577 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
3578 put_ldev(device);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003579 }
Philipp Reisner813472c2011-05-03 16:47:02 +02003580
3581 if (new_plan) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003582 old_plan = device->rs_plan_s;
3583 rcu_assign_pointer(device->rs_plan_s, new_plan);
Philipp Reisner813472c2011-05-03 16:47:02 +02003584 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003585
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003586 mutex_unlock(&connection->resource->conf_update);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003587 synchronize_rcu();
3588 if (new_net_conf)
3589 kfree(old_net_conf);
3590 kfree(old_disk_conf);
Philipp Reisner813472c2011-05-03 16:47:02 +02003591 kfree(old_plan);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003592
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003593 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003594
Philipp Reisner813472c2011-05-03 16:47:02 +02003595reconnect:
3596 if (new_disk_conf) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003597 put_ldev(device);
Philipp Reisner813472c2011-05-03 16:47:02 +02003598 kfree(new_disk_conf);
3599 }
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003600 mutex_unlock(&connection->resource->conf_update);
Philipp Reisner813472c2011-05-03 16:47:02 +02003601 return -EIO;
3602
Philipp Reisnerb411b362009-09-25 16:07:19 -07003603disconnect:
Philipp Reisner813472c2011-05-03 16:47:02 +02003604 kfree(new_plan);
3605 if (new_disk_conf) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003606 put_ldev(device);
Philipp Reisner813472c2011-05-03 16:47:02 +02003607 kfree(new_disk_conf);
3608 }
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003609 mutex_unlock(&connection->resource->conf_update);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003610 /* just for completeness: actually not needed,
3611 * as this is not reached if csums_tfm was ok. */
3612 crypto_free_hash(csums_tfm);
3613 /* but free the verify_tfm again, if csums_tfm did not work out */
3614 crypto_free_hash(verify_tfm);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003615 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003616 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003617}
3618
Philipp Reisnerb411b362009-09-25 16:07:19 -07003619/* warn if the arguments differ by more than 12.5% */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003620static void warn_if_differ_considerably(struct drbd_device *device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003621 const char *s, sector_t a, sector_t b)
3622{
3623 sector_t d;
3624 if (a == 0 || b == 0)
3625 return;
3626 d = (a > b) ? (a - b) : (b - a);
3627 if (d > (a>>3) || d > (b>>3))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003628 drbd_warn(device, "Considerable difference in %s: %llus vs. %llus\n", s,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003629 (unsigned long long)a, (unsigned long long)b);
3630}
3631
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003632static int receive_sizes(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003633{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003634 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003635 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003636 struct p_sizes *p = pi->data;
Philipp Reisnere96c9632013-06-25 16:50:07 +02003637 enum determine_dev_size dd = DS_UNCHANGED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003638 sector_t p_size, p_usize, my_usize;
3639 int ldsc = 0; /* local disk size changed */
Philipp Reisnere89b5912010-03-24 17:11:33 +01003640 enum dds_flags ddsf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003641
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003642 peer_device = conn_peer_device(connection, pi->vnr);
3643 if (!peer_device)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003644 return config_unknown_volume(connection, pi);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003645 device = peer_device->device;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003646
Philipp Reisnerb411b362009-09-25 16:07:19 -07003647 p_size = be64_to_cpu(p->d_size);
3648 p_usize = be64_to_cpu(p->u_size);
3649
Philipp Reisnerb411b362009-09-25 16:07:19 -07003650 /* just store the peer's disk size for now.
3651 * we still need to figure out whether we accept that. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003652 device->p_size = p_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003653
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003654 if (get_ldev(device)) {
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003655 rcu_read_lock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003656 my_usize = rcu_dereference(device->ldev->disk_conf)->disk_size;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003657 rcu_read_unlock();
3658
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003659 warn_if_differ_considerably(device, "lower level device sizes",
3660 p_size, drbd_get_max_capacity(device->ldev));
3661 warn_if_differ_considerably(device, "user requested size",
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003662 p_usize, my_usize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003663
3664 /* if this is the first connect, or an otherwise expected
3665 * param exchange, choose the minimum */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003666 if (device->state.conn == C_WF_REPORT_PARAMS)
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003667 p_usize = min_not_zero(my_usize, p_usize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003668
3669 /* Never shrink a device with usable data during connect.
3670 But allow online shrinking if we are connected. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003671 if (drbd_new_dev_size(device, device->ldev, p_usize, 0) <
3672 drbd_get_capacity(device->this_bdev) &&
3673 device->state.disk >= D_OUTDATED &&
3674 device->state.conn < C_CONNECTED) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003675 drbd_err(device, "The peer's disk size is too small!\n");
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003676 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003677 put_ldev(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003678 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003679 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003680
3681 if (my_usize != p_usize) {
3682 struct disk_conf *old_disk_conf, *new_disk_conf = NULL;
3683
3684 new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3685 if (!new_disk_conf) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003686 drbd_err(device, "Allocation of new disk_conf failed\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003687 put_ldev(device);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003688 return -ENOMEM;
3689 }
3690
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003691 mutex_lock(&connection->resource->conf_update);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003692 old_disk_conf = device->ldev->disk_conf;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003693 *new_disk_conf = *old_disk_conf;
3694 new_disk_conf->disk_size = p_usize;
3695
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003696 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003697 mutex_unlock(&connection->resource->conf_update);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003698 synchronize_rcu();
3699 kfree(old_disk_conf);
3700
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003701 drbd_info(device, "Peer sets u_size to %lu sectors\n",
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003702 (unsigned long)my_usize);
3703 }
3704
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003705 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003706 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003707
Philipp Reisnere89b5912010-03-24 17:11:33 +01003708 ddsf = be16_to_cpu(p->dds_flags);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003709 if (get_ldev(device)) {
3710 dd = drbd_determine_dev_size(device, ddsf, NULL);
3711 put_ldev(device);
Philipp Reisnere96c9632013-06-25 16:50:07 +02003712 if (dd == DS_ERROR)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003713 return -EIO;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003714 drbd_md_sync(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003715 } else {
3716 /* I am diskless, need to accept the peer's size. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003717 drbd_set_my_capacity(device, p_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003718 }
3719
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003720 device->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
3721 drbd_reconsider_max_bio_size(device);
Philipp Reisner99432fc2011-05-20 16:39:13 +02003722
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003723 if (get_ldev(device)) {
3724 if (device->ldev->known_size != drbd_get_capacity(device->ldev->backing_bdev)) {
3725 device->ldev->known_size = drbd_get_capacity(device->ldev->backing_bdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003726 ldsc = 1;
3727 }
3728
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003729 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003730 }
3731
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003732 if (device->state.conn > C_WF_REPORT_PARAMS) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003733 if (be64_to_cpu(p->c_size) !=
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003734 drbd_get_capacity(device->this_bdev) || ldsc) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003735 /* we have different sizes, probably peer
3736 * needs to know my new size... */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003737 drbd_send_sizes(peer_device, 0, ddsf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003738 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003739 if (test_and_clear_bit(RESIZE_PENDING, &device->flags) ||
3740 (dd == DS_GREW && device->state.conn == C_CONNECTED)) {
3741 if (device->state.pdsk >= D_INCONSISTENT &&
3742 device->state.disk >= D_INCONSISTENT) {
Philipp Reisnere89b5912010-03-24 17:11:33 +01003743 if (ddsf & DDSF_NO_RESYNC)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003744 drbd_info(device, "Resync of new storage suppressed with --assume-clean\n");
Philipp Reisnere89b5912010-03-24 17:11:33 +01003745 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003746 resync_after_online_grow(device);
Philipp Reisnere89b5912010-03-24 17:11:33 +01003747 } else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003748 set_bit(RESYNC_AFTER_NEG, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003749 }
3750 }
3751
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003752 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003753}
3754
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003755static int receive_uuids(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003756{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003757 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003758 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003759 struct p_uuids *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003760 u64 *p_uuid;
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003761 int i, updated_uuids = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003762
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003763 peer_device = conn_peer_device(connection, pi->vnr);
3764 if (!peer_device)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003765 return config_unknown_volume(connection, pi);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003766 device = peer_device->device;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003767
Philipp Reisnerb411b362009-09-25 16:07:19 -07003768 p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO);
Jing Wang063eacf2012-10-25 15:00:56 +08003769 if (!p_uuid) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003770 drbd_err(device, "kmalloc of p_uuid failed\n");
Jing Wang063eacf2012-10-25 15:00:56 +08003771 return false;
3772 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003773
3774 for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
3775 p_uuid[i] = be64_to_cpu(p->uuid[i]);
3776
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003777 kfree(device->p_uuid);
3778 device->p_uuid = p_uuid;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003779
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003780 if (device->state.conn < C_CONNECTED &&
3781 device->state.disk < D_INCONSISTENT &&
3782 device->state.role == R_PRIMARY &&
3783 (device->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003784 drbd_err(device, "Can only connect to data with current UUID=%016llX\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003785 (unsigned long long)device->ed_uuid);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003786 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003787 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003788 }
3789
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003790 if (get_ldev(device)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003791 int skip_initial_sync =
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003792 device->state.conn == C_CONNECTED &&
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003793 peer_device->connection->agreed_pro_version >= 90 &&
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003794 device->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003795 (p_uuid[UI_FLAGS] & 8);
3796 if (skip_initial_sync) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003797 drbd_info(device, "Accepted new current UUID, preparing to skip initial sync\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003798 drbd_bitmap_io(device, &drbd_bmio_clear_n_write,
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003799 "clear_n_write from receive_uuids",
3800 BM_LOCKED_TEST_ALLOWED);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003801 _drbd_uuid_set(device, UI_CURRENT, p_uuid[UI_CURRENT]);
3802 _drbd_uuid_set(device, UI_BITMAP, 0);
3803 _drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
Philipp Reisnerb411b362009-09-25 16:07:19 -07003804 CS_VERBOSE, NULL);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003805 drbd_md_sync(device);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003806 updated_uuids = 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003807 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003808 put_ldev(device);
3809 } else if (device->state.disk < D_INCONSISTENT &&
3810 device->state.role == R_PRIMARY) {
Philipp Reisner18a50fa2010-06-21 14:14:15 +02003811 /* I am a diskless primary, the peer just created a new current UUID
3812 for me. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003813 updated_uuids = drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003814 }
3815
3816 /* Before we test for the disk state, we should wait until an eventually
3817 ongoing cluster wide state change is finished. That is important if
3818 we are primary and are detaching from our disk. We need to see the
3819 new disk state... */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003820 mutex_lock(device->state_mutex);
3821 mutex_unlock(device->state_mutex);
3822 if (device->state.conn >= C_CONNECTED && device->state.disk < D_INCONSISTENT)
3823 updated_uuids |= drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003824
3825 if (updated_uuids)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003826 drbd_print_uuids(device, "receiver updated UUIDs to");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003827
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003828 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003829}
3830
3831/**
3832 * convert_state() - Converts the peer's view of the cluster state to our point of view
3833 * @ps: The state as seen by the peer.
3834 */
3835static union drbd_state convert_state(union drbd_state ps)
3836{
3837 union drbd_state ms;
3838
3839 static enum drbd_conns c_tab[] = {
Philipp Reisner369bea62011-07-06 23:04:44 +02003840 [C_WF_REPORT_PARAMS] = C_WF_REPORT_PARAMS,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003841 [C_CONNECTED] = C_CONNECTED,
3842
3843 [C_STARTING_SYNC_S] = C_STARTING_SYNC_T,
3844 [C_STARTING_SYNC_T] = C_STARTING_SYNC_S,
3845 [C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */
3846 [C_VERIFY_S] = C_VERIFY_T,
3847 [C_MASK] = C_MASK,
3848 };
3849
3850 ms.i = ps.i;
3851
3852 ms.conn = c_tab[ps.conn];
3853 ms.peer = ps.role;
3854 ms.role = ps.peer;
3855 ms.pdsk = ps.disk;
3856 ms.disk = ps.pdsk;
3857 ms.peer_isp = (ps.aftr_isp | ps.user_isp);
3858
3859 return ms;
3860}
3861
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003862static int receive_req_state(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003863{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003864 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003865 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003866 struct p_req_state *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003867 union drbd_state mask, val;
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +01003868 enum drbd_state_rv rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003869
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003870 peer_device = conn_peer_device(connection, pi->vnr);
3871 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003872 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003873 device = peer_device->device;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003874
Philipp Reisnerb411b362009-09-25 16:07:19 -07003875 mask.i = be32_to_cpu(p->mask);
3876 val.i = be32_to_cpu(p->val);
3877
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003878 if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags) &&
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003879 mutex_is_locked(device->state_mutex)) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003880 drbd_send_sr_reply(peer_device, SS_CONCURRENT_ST_CHG);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003881 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003882 }
3883
3884 mask = convert_state(mask);
3885 val = convert_state(val);
3886
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003887 rv = drbd_change_state(device, CS_VERBOSE, mask, val);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003888 drbd_send_sr_reply(peer_device, rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003889
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003890 drbd_md_sync(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003891
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003892 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003893}
3894
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003895static int receive_req_conn_state(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003896{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003897 struct p_req_state *p = pi->data;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003898 union drbd_state mask, val;
3899 enum drbd_state_rv rv;
3900
3901 mask.i = be32_to_cpu(p->mask);
3902 val.i = be32_to_cpu(p->val);
3903
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003904 if (test_bit(RESOLVE_CONFLICTS, &connection->flags) &&
3905 mutex_is_locked(&connection->cstate_mutex)) {
3906 conn_send_sr_reply(connection, SS_CONCURRENT_ST_CHG);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003907 return 0;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003908 }
3909
3910 mask = convert_state(mask);
3911 val = convert_state(val);
3912
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003913 rv = conn_request_state(connection, mask, val, CS_VERBOSE | CS_LOCAL_ONLY | CS_IGN_OUTD_FAIL);
3914 conn_send_sr_reply(connection, rv);
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003915
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003916 return 0;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003917}
3918
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003919static int receive_state(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003920{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003921 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003922 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003923 struct p_state *p = pi->data;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003924 union drbd_state os, ns, peer_state;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003925 enum drbd_disk_state real_peer_disk;
Philipp Reisner65d922c2010-06-16 16:18:09 +02003926 enum chg_state_flags cs_flags;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003927 int rv;
3928
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003929 peer_device = conn_peer_device(connection, pi->vnr);
3930 if (!peer_device)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003931 return config_unknown_volume(connection, pi);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003932 device = peer_device->device;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003933
Philipp Reisnerb411b362009-09-25 16:07:19 -07003934 peer_state.i = be32_to_cpu(p->state);
3935
3936 real_peer_disk = peer_state.disk;
3937 if (peer_state.disk == D_NEGOTIATING) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003938 real_peer_disk = device->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003939 drbd_info(device, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003940 }
3941
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003942 spin_lock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003943 retry:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003944 os = ns = drbd_read_state(device);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003945 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003946
Lars Ellenberg545752d2011-12-05 14:39:25 +01003947 /* If some other part of the code (asender thread, timeout)
3948 * already decided to close the connection again,
3949 * we must not "re-establish" it here. */
3950 if (os.conn <= C_TEAR_DOWN)
Lars Ellenberg58ffa582012-07-26 14:09:49 +02003951 return -ECONNRESET;
Lars Ellenberg545752d2011-12-05 14:39:25 +01003952
Lars Ellenberg40424e42011-09-26 15:24:56 +02003953 /* If this is the "end of sync" confirmation, usually the peer disk
3954 * transitions from D_INCONSISTENT to D_UP_TO_DATE. For empty (0 bits
3955 * set) resync started in PausedSyncT, or if the timing of pause-/
3956 * unpause-sync events has been "just right", the peer disk may
3957 * transition from D_CONSISTENT to D_UP_TO_DATE as well.
3958 */
3959 if ((os.pdsk == D_INCONSISTENT || os.pdsk == D_CONSISTENT) &&
3960 real_peer_disk == D_UP_TO_DATE &&
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003961 os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
3962 /* If we are (becoming) SyncSource, but peer is still in sync
3963 * preparation, ignore its uptodate-ness to avoid flapping, it
3964 * will change to inconsistent once the peer reaches active
3965 * syncing states.
3966 * It may have changed syncer-paused flags, however, so we
3967 * cannot ignore this completely. */
3968 if (peer_state.conn > C_CONNECTED &&
3969 peer_state.conn < C_SYNC_SOURCE)
3970 real_peer_disk = D_INCONSISTENT;
3971
3972 /* if peer_state changes to connected at the same time,
3973 * it explicitly notifies us that it finished resync.
3974 * Maybe we should finish it up, too? */
3975 else if (os.conn >= C_SYNC_SOURCE &&
3976 peer_state.conn == C_CONNECTED) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003977 if (drbd_bm_total_weight(device) <= device->rs_failed)
3978 drbd_resync_finished(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003979 return 0;
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003980 }
3981 }
3982
Lars Ellenberg02b91b52012-06-28 18:26:52 +02003983 /* explicit verify finished notification, stop sector reached. */
3984 if (os.conn == C_VERIFY_T && os.disk == D_UP_TO_DATE &&
3985 peer_state.conn == C_CONNECTED && real_peer_disk == D_UP_TO_DATE) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003986 ov_out_of_sync_print(device);
3987 drbd_resync_finished(device);
Lars Ellenberg58ffa582012-07-26 14:09:49 +02003988 return 0;
Lars Ellenberg02b91b52012-06-28 18:26:52 +02003989 }
3990
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003991 /* peer says his disk is inconsistent, while we think it is uptodate,
3992 * and this happens while the peer still thinks we have a sync going on,
3993 * but we think we are already done with the sync.
3994 * We ignore this to avoid flapping pdsk.
3995 * This should not happen, if the peer is a recent version of drbd. */
3996 if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT &&
3997 os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE)
3998 real_peer_disk = D_UP_TO_DATE;
3999
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004000 if (ns.conn == C_WF_REPORT_PARAMS)
4001 ns.conn = C_CONNECTED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004002
Philipp Reisner67531712010-10-27 12:21:30 +02004003 if (peer_state.conn == C_AHEAD)
4004 ns.conn = C_BEHIND;
4005
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004006 if (device->p_uuid && peer_state.disk >= D_NEGOTIATING &&
4007 get_ldev_if_state(device, D_NEGOTIATING)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004008 int cr; /* consider resync */
4009
4010 /* if we established a new connection */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004011 cr = (os.conn < C_CONNECTED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004012 /* if we had an established connection
4013 * and one of the nodes newly attaches a disk */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004014 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07004015 (peer_state.disk == D_NEGOTIATING ||
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004016 os.disk == D_NEGOTIATING));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004017 /* if we have both been inconsistent, and the peer has been
4018 * forced to be UpToDate with --overwrite-data */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004019 cr |= test_bit(CONSIDER_RESYNC, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004020 /* if we had been plain connected, and the admin requested to
4021 * start a sync by "invalidate" or "invalidate-remote" */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004022 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07004023 (peer_state.conn >= C_STARTING_SYNC_S &&
4024 peer_state.conn <= C_WF_BITMAP_T));
4025
4026 if (cr)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004027 ns.conn = drbd_sync_handshake(peer_device, peer_state.role, real_peer_disk);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004028
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004029 put_ldev(device);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004030 if (ns.conn == C_MASK) {
4031 ns.conn = C_CONNECTED;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004032 if (device->state.disk == D_NEGOTIATING) {
4033 drbd_force_state(device, NS(disk, D_FAILED));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004034 } else if (peer_state.disk == D_NEGOTIATING) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004035 drbd_err(device, "Disk attach process on the peer node was aborted.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004036 peer_state.disk = D_DISKLESS;
Lars Ellenberg580b9762010-02-26 23:15:23 +01004037 real_peer_disk = D_DISKLESS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004038 } else {
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004039 if (test_and_clear_bit(CONN_DRY_RUN, &peer_device->connection->flags))
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004040 return -EIO;
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02004041 D_ASSERT(device, os.conn == C_WF_REPORT_PARAMS);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004042 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004043 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004044 }
4045 }
4046 }
4047
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004048 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004049 if (os.i != drbd_read_state(device).i)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004050 goto retry;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004051 clear_bit(CONSIDER_RESYNC, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004052 ns.peer = peer_state.role;
4053 ns.pdsk = real_peer_disk;
4054 ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004055 if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004056 ns.disk = device->new_state_tmp.disk;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004057 cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004058 if (ns.pdsk == D_CONSISTENT && drbd_suspended(device) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
4059 test_bit(NEW_CUR_UUID, &device->flags)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004060 /* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this
Philipp Reisner481c6f52010-06-22 14:03:27 +02004061 for temporal network outages! */
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004062 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004063 drbd_err(device, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004064 tl_clear(peer_device->connection);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004065 drbd_uuid_new_current(device);
4066 clear_bit(NEW_CUR_UUID, &device->flags);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004067 conn_request_state(peer_device->connection, NS2(conn, C_PROTOCOL_ERROR, susp, 0), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004068 return -EIO;
Philipp Reisner481c6f52010-06-22 14:03:27 +02004069 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004070 rv = _drbd_set_state(device, ns, cs_flags, NULL);
4071 ns = drbd_read_state(device);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004072 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004073
4074 if (rv < SS_SUCCESS) {
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004075 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004076 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004077 }
4078
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004079 if (os.conn > C_WF_REPORT_PARAMS) {
4080 if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07004081 peer_state.disk != D_NEGOTIATING ) {
4082 /* we want resync, peer has not yet decided to sync... */
4083 /* Nowadays only used when forcing a node into primary role and
4084 setting its disk to UpToDate with that */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004085 drbd_send_uuids(peer_device);
4086 drbd_send_current_state(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004087 }
4088 }
4089
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004090 clear_bit(DISCARD_MY_DATA, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004091
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004092 drbd_md_sync(device); /* update connected indicator, la_size_sect, ... */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004093
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004094 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004095}
4096
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004097static int receive_sync_uuid(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004098{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004099 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004100 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004101 struct p_rs_uuid *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004102
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004103 peer_device = conn_peer_device(connection, pi->vnr);
4104 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004105 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004106 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004107
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004108 wait_event(device->misc_wait,
4109 device->state.conn == C_WF_SYNC_UUID ||
4110 device->state.conn == C_BEHIND ||
4111 device->state.conn < C_CONNECTED ||
4112 device->state.disk < D_NEGOTIATING);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004113
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02004114 /* D_ASSERT(device, device->state.conn == C_WF_SYNC_UUID ); */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004115
Philipp Reisnerb411b362009-09-25 16:07:19 -07004116 /* Here the _drbd_uuid_ functions are right, current should
4117 _not_ be rotated into the history */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004118 if (get_ldev_if_state(device, D_NEGOTIATING)) {
4119 _drbd_uuid_set(device, UI_CURRENT, be64_to_cpu(p->uuid));
4120 _drbd_uuid_set(device, UI_BITMAP, 0UL);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004121
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004122 drbd_print_uuids(device, "updated sync uuid");
4123 drbd_start_resync(device, C_SYNC_TARGET);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004124
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004125 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004126 } else
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004127 drbd_err(device, "Ignoring SyncUUID packet!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004128
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004129 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004130}
4131
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004132/**
4133 * receive_bitmap_plain
4134 *
4135 * Return 0 when done, 1 when another iteration is needed, and a negative error
4136 * code upon failure.
4137 */
4138static int
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004139receive_bitmap_plain(struct drbd_peer_device *peer_device, unsigned int size,
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004140 unsigned long *p, struct bm_xfer_ctx *c)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004141{
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004142 unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE -
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004143 drbd_header_size(peer_device->connection);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004144 unsigned int num_words = min_t(size_t, data_size / sizeof(*p),
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004145 c->bm_words - c->word_offset);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004146 unsigned int want = num_words * sizeof(*p);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004147 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004148
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004149 if (want != size) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004150 drbd_err(peer_device, "%s:want (%u) != size (%u)\n", __func__, want, size);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004151 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004152 }
4153 if (want == 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004154 return 0;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004155 err = drbd_recv_all(peer_device->connection, p, want);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004156 if (err)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004157 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004158
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004159 drbd_bm_merge_lel(peer_device->device, c->word_offset, num_words, p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004160
4161 c->word_offset += num_words;
4162 c->bit_offset = c->word_offset * BITS_PER_LONG;
4163 if (c->bit_offset > c->bm_bits)
4164 c->bit_offset = c->bm_bits;
4165
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004166 return 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004167}
4168
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004169static enum drbd_bitmap_code dcbp_get_code(struct p_compressed_bm *p)
4170{
4171 return (enum drbd_bitmap_code)(p->encoding & 0x0f);
4172}
4173
4174static int dcbp_get_start(struct p_compressed_bm *p)
4175{
4176 return (p->encoding & 0x80) != 0;
4177}
4178
4179static int dcbp_get_pad_bits(struct p_compressed_bm *p)
4180{
4181 return (p->encoding >> 4) & 0x7;
4182}
4183
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004184/**
4185 * recv_bm_rle_bits
4186 *
4187 * Return 0 when done, 1 when another iteration is needed, and a negative error
4188 * code upon failure.
4189 */
4190static int
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004191recv_bm_rle_bits(struct drbd_peer_device *peer_device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07004192 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01004193 struct bm_xfer_ctx *c,
4194 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004195{
4196 struct bitstream bs;
4197 u64 look_ahead;
4198 u64 rl;
4199 u64 tmp;
4200 unsigned long s = c->bit_offset;
4201 unsigned long e;
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004202 int toggle = dcbp_get_start(p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004203 int have;
4204 int bits;
4205
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004206 bitstream_init(&bs, p->code, len, dcbp_get_pad_bits(p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004207
4208 bits = bitstream_get_bits(&bs, &look_ahead, 64);
4209 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004210 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004211
4212 for (have = bits; have > 0; s += rl, toggle = !toggle) {
4213 bits = vli_decode_bits(&rl, look_ahead);
4214 if (bits <= 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004215 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004216
4217 if (toggle) {
4218 e = s + rl -1;
4219 if (e >= c->bm_bits) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004220 drbd_err(peer_device, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004221 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004222 }
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004223 _drbd_bm_set_bits(peer_device->device, s, e);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004224 }
4225
4226 if (have < bits) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004227 drbd_err(peer_device, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07004228 have, bits, look_ahead,
4229 (unsigned int)(bs.cur.b - p->code),
4230 (unsigned int)bs.buf_len);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004231 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004232 }
Lars Ellenbergd2da5b02013-10-23 10:59:18 +02004233 /* if we consumed all 64 bits, assign 0; >> 64 is "undefined"; */
4234 if (likely(bits < 64))
4235 look_ahead >>= bits;
4236 else
4237 look_ahead = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004238 have -= bits;
4239
4240 bits = bitstream_get_bits(&bs, &tmp, 64 - have);
4241 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004242 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004243 look_ahead |= tmp << have;
4244 have += bits;
4245 }
4246
4247 c->bit_offset = s;
4248 bm_xfer_ctx_bit_to_word_offset(c);
4249
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004250 return (s != c->bm_bits);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004251}
4252
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004253/**
4254 * decode_bitmap_c
4255 *
4256 * Return 0 when done, 1 when another iteration is needed, and a negative error
4257 * code upon failure.
4258 */
4259static int
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004260decode_bitmap_c(struct drbd_peer_device *peer_device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07004261 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01004262 struct bm_xfer_ctx *c,
4263 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004264{
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004265 if (dcbp_get_code(p) == RLE_VLI_Bits)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004266 return recv_bm_rle_bits(peer_device, p, c, len - sizeof(*p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004267
4268 /* other variants had been implemented for evaluation,
4269 * but have been dropped as this one turned out to be "best"
4270 * during all our tests. */
4271
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004272 drbd_err(peer_device, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
4273 conn_request_state(peer_device->connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004274 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004275}
4276
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004277void INFO_bm_xfer_stats(struct drbd_device *device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07004278 const char *direction, struct bm_xfer_ctx *c)
4279{
4280 /* what would it take to transfer it "plaintext" */
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02004281 unsigned int header_size = drbd_header_size(first_peer_device(device)->connection);
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004282 unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE - header_size;
4283 unsigned int plain =
4284 header_size * (DIV_ROUND_UP(c->bm_words, data_size) + 1) +
4285 c->bm_words * sizeof(unsigned long);
4286 unsigned int total = c->bytes[0] + c->bytes[1];
4287 unsigned int r;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004288
4289 /* total can not be zero. but just in case: */
4290 if (total == 0)
4291 return;
4292
4293 /* don't report if not compressed */
4294 if (total >= plain)
4295 return;
4296
4297 /* total < plain. check for overflow, still */
4298 r = (total > UINT_MAX/1000) ? (total / (plain/1000))
4299 : (1000 * total / plain);
4300
4301 if (r > 1000)
4302 r = 1000;
4303
4304 r = 1000 - r;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004305 drbd_info(device, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
Philipp Reisnerb411b362009-09-25 16:07:19 -07004306 "total %u; compression: %u.%u%%\n",
4307 direction,
4308 c->bytes[1], c->packets[1],
4309 c->bytes[0], c->packets[0],
4310 total, r/10, r % 10);
4311}
4312
4313/* Since we are processing the bitfield from lower addresses to higher,
4314 it does not matter if the process it in 32 bit chunks or 64 bit
4315 chunks as long as it is little endian. (Understand it as byte stream,
4316 beginning with the lowest byte...) If we would use big endian
4317 we would need to process it from the highest address to the lowest,
4318 in order to be agnostic to the 32 vs 64 bits issue.
4319
4320 returns 0 on failure, 1 if we successfully received it. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004321static int receive_bitmap(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004322{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004323 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004324 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004325 struct bm_xfer_ctx c;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004326 int err;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004327
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004328 peer_device = conn_peer_device(connection, pi->vnr);
4329 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004330 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004331 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004332
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004333 drbd_bm_lock(device, "receive bitmap", BM_LOCKED_SET_ALLOWED);
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004334 /* you are supposed to send additional out-of-sync information
4335 * if you actually set bits during this phase */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004336
Philipp Reisnerb411b362009-09-25 16:07:19 -07004337 c = (struct bm_xfer_ctx) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004338 .bm_bits = drbd_bm_bits(device),
4339 .bm_words = drbd_bm_words(device),
Philipp Reisnerb411b362009-09-25 16:07:19 -07004340 };
4341
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004342 for(;;) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004343 if (pi->cmd == P_BITMAP)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004344 err = receive_bitmap_plain(peer_device, pi->size, pi->data, &c);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004345 else if (pi->cmd == P_COMPRESSED_BITMAP) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004346 /* MAYBE: sanity check that we speak proto >= 90,
4347 * and the feature is enabled! */
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004348 struct p_compressed_bm *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004349
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004350 if (pi->size > DRBD_SOCKET_BUFFER_SIZE - drbd_header_size(connection)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004351 drbd_err(device, "ReportCBitmap packet too large\n");
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004352 err = -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004353 goto out;
4354 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004355 if (pi->size <= sizeof(*p)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004356 drbd_err(device, "ReportCBitmap packet too small (l:%u)\n", pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004357 err = -EIO;
Andreas Gruenbacher78fcbda2010-12-10 22:18:27 +01004358 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004359 }
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004360 err = drbd_recv_all(peer_device->connection, p, pi->size);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004361 if (err)
4362 goto out;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004363 err = decode_bitmap_c(peer_device, p, &c, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004364 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004365 drbd_warn(device, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", pi->cmd);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004366 err = -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004367 goto out;
4368 }
4369
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004370 c.packets[pi->cmd == P_BITMAP]++;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004371 c.bytes[pi->cmd == P_BITMAP] += drbd_header_size(connection) + pi->size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004372
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004373 if (err <= 0) {
4374 if (err < 0)
4375 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004376 break;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004377 }
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004378 err = drbd_recv_header(peer_device->connection, pi);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004379 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004380 goto out;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004381 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004382
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004383 INFO_bm_xfer_stats(device, "receive", &c);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004384
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004385 if (device->state.conn == C_WF_BITMAP_T) {
Andreas Gruenbacherde1f8e42010-12-10 21:04:00 +01004386 enum drbd_state_rv rv;
4387
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004388 err = drbd_send_bitmap(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004389 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004390 goto out;
4391 /* Omit CS_ORDERED with this state transition to avoid deadlocks. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004392 rv = _drbd_request_state(device, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02004393 D_ASSERT(device, rv == SS_SUCCESS);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004394 } else if (device->state.conn != C_WF_BITMAP_S) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004395 /* admin may have requested C_DISCONNECTING,
4396 * other threads may have noticed network errors */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004397 drbd_info(device, "unexpected cstate (%s) in receive_bitmap\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004398 drbd_conn_str(device->state.conn));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004399 }
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004400 err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004401
Philipp Reisnerb411b362009-09-25 16:07:19 -07004402 out:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004403 drbd_bm_unlock(device);
4404 if (!err && device->state.conn == C_WF_BITMAP_S)
4405 drbd_start_resync(device, C_SYNC_SOURCE);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004406 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004407}
4408
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004409static int receive_skip(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004410{
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004411 drbd_warn(connection, "skipping unknown optional packet type %d, l: %d!\n",
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004412 pi->cmd, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004413
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004414 return ignore_remaining_packet(connection, pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004415}
4416
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004417static int receive_UnplugRemote(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004418{
Philipp Reisnerb411b362009-09-25 16:07:19 -07004419 /* Make sure we've acked all the TCP data associated
4420 * with the data requests being unplugged */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004421 drbd_tcp_quickack(connection->data.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004422
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004423 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004424}
4425
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004426static int receive_out_of_sync(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisner73a01a12010-10-27 14:33:00 +02004427{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004428 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004429 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004430 struct p_block_desc *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004431
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004432 peer_device = conn_peer_device(connection, pi->vnr);
4433 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004434 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004435 device = peer_device->device;
Philipp Reisner73a01a12010-10-27 14:33:00 +02004436
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004437 switch (device->state.conn) {
Lars Ellenbergf735e3632010-12-17 21:06:18 +01004438 case C_WF_SYNC_UUID:
4439 case C_WF_BITMAP_T:
4440 case C_BEHIND:
4441 break;
4442 default:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004443 drbd_err(device, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004444 drbd_conn_str(device->state.conn));
Lars Ellenbergf735e3632010-12-17 21:06:18 +01004445 }
4446
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004447 drbd_set_out_of_sync(device, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
Philipp Reisner73a01a12010-10-27 14:33:00 +02004448
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004449 return 0;
Philipp Reisner73a01a12010-10-27 14:33:00 +02004450}
4451
Philipp Reisner02918be2010-08-20 14:35:10 +02004452struct data_cmd {
4453 int expect_payload;
4454 size_t pkt_size;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004455 int (*fn)(struct drbd_connection *, struct packet_info *);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004456};
4457
Philipp Reisner02918be2010-08-20 14:35:10 +02004458static struct data_cmd drbd_cmd_handler[] = {
4459 [P_DATA] = { 1, sizeof(struct p_data), receive_Data },
4460 [P_DATA_REPLY] = { 1, sizeof(struct p_data), receive_DataReply },
4461 [P_RS_DATA_REPLY] = { 1, sizeof(struct p_data), receive_RSDataReply } ,
4462 [P_BARRIER] = { 0, sizeof(struct p_barrier), receive_Barrier } ,
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004463 [P_BITMAP] = { 1, 0, receive_bitmap } ,
4464 [P_COMPRESSED_BITMAP] = { 1, 0, receive_bitmap } ,
4465 [P_UNPLUG_REMOTE] = { 0, 0, receive_UnplugRemote },
Philipp Reisner02918be2010-08-20 14:35:10 +02004466 [P_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
4467 [P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004468 [P_SYNC_PARAM] = { 1, 0, receive_SyncParam },
4469 [P_SYNC_PARAM89] = { 1, 0, receive_SyncParam },
Philipp Reisner02918be2010-08-20 14:35:10 +02004470 [P_PROTOCOL] = { 1, sizeof(struct p_protocol), receive_protocol },
4471 [P_UUIDS] = { 0, sizeof(struct p_uuids), receive_uuids },
4472 [P_SIZES] = { 0, sizeof(struct p_sizes), receive_sizes },
4473 [P_STATE] = { 0, sizeof(struct p_state), receive_state },
4474 [P_STATE_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_state },
4475 [P_SYNC_UUID] = { 0, sizeof(struct p_rs_uuid), receive_sync_uuid },
4476 [P_OV_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
4477 [P_OV_REPLY] = { 1, sizeof(struct p_block_req), receive_DataRequest },
4478 [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest },
4479 [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip },
Philipp Reisner73a01a12010-10-27 14:33:00 +02004480 [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004481 [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_conn_state },
Philipp Reisner036b17e2011-05-16 17:38:11 +02004482 [P_PROTOCOL_UPDATE] = { 1, sizeof(struct p_protocol), receive_protocol },
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02004483 [P_TRIM] = { 0, sizeof(struct p_trim), receive_Data },
Philipp Reisner02918be2010-08-20 14:35:10 +02004484};
4485
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004486static void drbdd(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004487{
Philipp Reisner77351055b2011-02-07 17:24:26 +01004488 struct packet_info pi;
Philipp Reisner02918be2010-08-20 14:35:10 +02004489 size_t shs; /* sub header size */
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004490 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004491
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004492 while (get_t_state(&connection->receiver) == RUNNING) {
Andreas Gruenbacherdeebe192011-03-25 00:01:04 +01004493 struct data_cmd *cmd;
4494
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004495 drbd_thread_current_set_cpu(&connection->receiver);
4496 if (drbd_recv_header(connection, &pi))
Philipp Reisner02918be2010-08-20 14:35:10 +02004497 goto err_out;
4498
Andreas Gruenbacherdeebe192011-03-25 00:01:04 +01004499 cmd = &drbd_cmd_handler[pi.cmd];
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004500 if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) || !cmd->fn)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004501 drbd_err(connection, "Unexpected data packet %s (0x%04x)",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004502 cmdname(pi.cmd), pi.cmd);
Philipp Reisner02918be2010-08-20 14:35:10 +02004503 goto err_out;
Lars Ellenberg0b33a912009-11-16 15:58:04 +01004504 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004505
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004506 shs = cmd->pkt_size;
4507 if (pi.size > shs && !cmd->expect_payload) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004508 drbd_err(connection, "No payload expected %s l:%d\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004509 cmdname(pi.cmd), pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02004510 goto err_out;
4511 }
4512
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02004513 if (shs) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004514 err = drbd_recv_all_warn(connection, pi.data, shs);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004515 if (err)
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02004516 goto err_out;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004517 pi.size -= shs;
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02004518 }
4519
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004520 err = cmd->fn(connection, &pi);
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004521 if (err) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004522 drbd_err(connection, "error receiving %s, e: %d l: %d!\n",
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004523 cmdname(pi.cmd), err, pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02004524 goto err_out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004525 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004526 }
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004527 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004528
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004529 err_out:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004530 conn_request_state(connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004531}
4532
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004533static void conn_disconnect(struct drbd_connection *connection)
Philipp Reisnerf70b35112010-06-24 14:34:40 +02004534{
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02004535 struct drbd_peer_device *peer_device;
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004536 enum drbd_conns oc;
Philipp Reisner376694a2011-11-07 10:54:28 +01004537 int vnr;
Philipp Reisnerf70b35112010-06-24 14:34:40 +02004538
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004539 if (connection->cstate == C_STANDALONE)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004540 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004541
Lars Ellenberg545752d2011-12-05 14:39:25 +01004542 /* We are about to start the cleanup after connection loss.
4543 * Make sure drbd_make_request knows about that.
4544 * Usually we should be in some network failure state already,
4545 * but just in case we are not, we fix it up here.
4546 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004547 conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
Lars Ellenberg545752d2011-12-05 14:39:25 +01004548
Philipp Reisnerb411b362009-09-25 16:07:19 -07004549 /* asender does not clean up anything. it must not interfere, either */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004550 drbd_thread_stop(&connection->asender);
4551 drbd_free_sock(connection);
Philipp Reisner360cc742011-02-08 14:29:53 +01004552
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02004553 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02004554 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
4555 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004556 kref_get(&device->kref);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02004557 rcu_read_unlock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004558 drbd_disconnected(peer_device);
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02004559 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02004560 rcu_read_lock();
4561 }
4562 rcu_read_unlock();
4563
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004564 if (!list_empty(&connection->current_epoch->list))
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004565 drbd_err(connection, "ASSERTION FAILED: connection->current_epoch->list not empty\n");
Philipp Reisner12038a32011-11-09 19:18:00 +01004566 /* ok, no more ee's on the fly, it is safe to reset the epoch_size */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004567 atomic_set(&connection->current_epoch->epoch_size, 0);
4568 connection->send.seen_any_write_yet = false;
Philipp Reisner12038a32011-11-09 19:18:00 +01004569
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004570 drbd_info(connection, "Connection closed\n");
Philipp Reisner360cc742011-02-08 14:29:53 +01004571
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004572 if (conn_highest_role(connection) == R_PRIMARY && conn_highest_pdsk(connection) >= D_UNKNOWN)
4573 conn_try_outdate_peer_async(connection);
Philipp Reisnercb703452011-03-24 11:03:07 +01004574
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004575 spin_lock_irq(&connection->resource->req_lock);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004576 oc = connection->cstate;
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004577 if (oc >= C_UNCONNECTED)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004578 _conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE);
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004579
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004580 spin_unlock_irq(&connection->resource->req_lock);
Philipp Reisner360cc742011-02-08 14:29:53 +01004581
Lars Ellenbergf3dfa402011-05-02 10:45:05 +02004582 if (oc == C_DISCONNECTING)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004583 conn_request_state(connection, NS(conn, C_STANDALONE), CS_VERBOSE | CS_HARD);
Philipp Reisner360cc742011-02-08 14:29:53 +01004584}
4585
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004586static int drbd_disconnected(struct drbd_peer_device *peer_device)
Philipp Reisner360cc742011-02-08 14:29:53 +01004587{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004588 struct drbd_device *device = peer_device->device;
Philipp Reisner360cc742011-02-08 14:29:53 +01004589 unsigned int i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004590
Philipp Reisner85719572010-07-21 10:20:17 +02004591 /* wait for current activity to cease. */
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004592 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004593 _drbd_wait_ee_list_empty(device, &device->active_ee);
4594 _drbd_wait_ee_list_empty(device, &device->sync_ee);
4595 _drbd_wait_ee_list_empty(device, &device->read_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004596 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004597
4598 /* We do not have data structures that would allow us to
4599 * get the rs_pending_cnt down to 0 again.
4600 * * On C_SYNC_TARGET we do not have any data structures describing
4601 * the pending RSDataRequest's we have sent.
4602 * * On C_SYNC_SOURCE there is no data structure that tracks
4603 * the P_RS_DATA_REPLY blocks that we sent to the SyncTarget.
4604 * And no, it is not the sum of the reference counts in the
4605 * resync_LRU. The resync_LRU tracks the whole operation including
4606 * the disk-IO, while the rs_pending_cnt only tracks the blocks
4607 * on the fly. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004608 drbd_rs_cancel_all(device);
4609 device->rs_total = 0;
4610 device->rs_failed = 0;
4611 atomic_set(&device->rs_pending_cnt, 0);
4612 wake_up(&device->misc_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004613
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004614 del_timer_sync(&device->resync_timer);
4615 resync_timer_fn((unsigned long)device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004616
Philipp Reisnerb411b362009-09-25 16:07:19 -07004617 /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
4618 * w_make_resync_request etc. which may still be on the worker queue
4619 * to be "canceled" */
Andreas Gruenbacherb5043c52011-07-28 15:56:02 +02004620 drbd_flush_workqueue(&peer_device->connection->sender_work);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004621
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004622 drbd_finish_peer_reqs(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004623
Philipp Reisnerd10b4ea2011-11-30 23:25:36 +01004624 /* This second workqueue flush is necessary, since drbd_finish_peer_reqs()
4625 might have issued a work again. The one before drbd_finish_peer_reqs() is
4626 necessary to reclain net_ee in drbd_finish_peer_reqs(). */
Andreas Gruenbacherb5043c52011-07-28 15:56:02 +02004627 drbd_flush_workqueue(&peer_device->connection->sender_work);
Philipp Reisnerd10b4ea2011-11-30 23:25:36 +01004628
Lars Ellenberg08332d72012-08-17 15:09:13 +02004629 /* need to do it again, drbd_finish_peer_reqs() may have populated it
4630 * again via drbd_try_clear_on_disk_bm(). */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004631 drbd_rs_cancel_all(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004632
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004633 kfree(device->p_uuid);
4634 device->p_uuid = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004635
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004636 if (!drbd_suspended(device))
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004637 tl_clear(peer_device->connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004638
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004639 drbd_md_sync(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004640
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004641 /* serialize with bitmap writeout triggered by the state change,
4642 * if any. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004643 wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004644
Philipp Reisnerb411b362009-09-25 16:07:19 -07004645 /* tcp_close and release of sendpage pages can be deferred. I don't
4646 * want to use SO_LINGER, because apparently it can be deferred for
4647 * more than 20 seconds (longest time I checked).
4648 *
4649 * Actually we don't care for exactly when the network stack does its
4650 * put_page(), but release our reference on these pages right here.
4651 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004652 i = drbd_free_peer_reqs(device, &device->net_ee);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004653 if (i)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004654 drbd_info(device, "net_ee not empty, killed %u entries\n", i);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004655 i = atomic_read(&device->pp_in_use_by_net);
Lars Ellenberg435f0742010-09-06 12:30:25 +02004656 if (i)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004657 drbd_info(device, "pp_in_use_by_net = %d, expected 0\n", i);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004658 i = atomic_read(&device->pp_in_use);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004659 if (i)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004660 drbd_info(device, "pp_in_use = %d, expected 0\n", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004661
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02004662 D_ASSERT(device, list_empty(&device->read_ee));
4663 D_ASSERT(device, list_empty(&device->active_ee));
4664 D_ASSERT(device, list_empty(&device->sync_ee));
4665 D_ASSERT(device, list_empty(&device->done_ee));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004666
Philipp Reisner360cc742011-02-08 14:29:53 +01004667 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004668}
4669
4670/*
4671 * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version
4672 * we can agree on is stored in agreed_pro_version.
4673 *
4674 * feature flags and the reserved array should be enough room for future
4675 * enhancements of the handshake protocol, and possible plugins...
4676 *
4677 * for now, they are expected to be zero, but ignored.
4678 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004679static int drbd_send_features(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004680{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004681 struct drbd_socket *sock;
4682 struct p_connection_features *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004683
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004684 sock = &connection->data;
4685 p = conn_prepare_command(connection, sock);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004686 if (!p)
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004687 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004688 memset(p, 0, sizeof(*p));
4689 p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
4690 p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004691 return conn_send_command(connection, sock, P_CONNECTION_FEATURES, sizeof(*p), NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004692}
4693
4694/*
4695 * return values:
4696 * 1 yes, we have a valid connection
4697 * 0 oops, did not work out, please try again
4698 * -1 peer talks different language,
4699 * no point in trying again, please go standalone.
4700 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004701static int drbd_do_features(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004702{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004703 /* ASSERT current == connection->receiver ... */
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004704 struct p_connection_features *p;
4705 const int expect = sizeof(struct p_connection_features);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004706 struct packet_info pi;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004707 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004708
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004709 err = drbd_send_features(connection);
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004710 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004711 return 0;
4712
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004713 err = drbd_recv_header(connection, &pi);
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004714 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004715 return 0;
4716
Andreas Gruenbacher60381782011-03-28 17:05:50 +02004717 if (pi.cmd != P_CONNECTION_FEATURES) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004718 drbd_err(connection, "expected ConnectionFeatures packet, received: %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004719 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004720 return -1;
4721 }
4722
Philipp Reisner77351055b2011-02-07 17:24:26 +01004723 if (pi.size != expect) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004724 drbd_err(connection, "expected ConnectionFeatures length: %u, received: %u\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004725 expect, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004726 return -1;
4727 }
4728
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004729 p = pi.data;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004730 err = drbd_recv_all_warn(connection, p, expect);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004731 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004732 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004733
Philipp Reisnerb411b362009-09-25 16:07:19 -07004734 p->protocol_min = be32_to_cpu(p->protocol_min);
4735 p->protocol_max = be32_to_cpu(p->protocol_max);
4736 if (p->protocol_max == 0)
4737 p->protocol_max = p->protocol_min;
4738
4739 if (PRO_VERSION_MAX < p->protocol_min ||
4740 PRO_VERSION_MIN > p->protocol_max)
4741 goto incompat;
4742
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004743 connection->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004744
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004745 drbd_info(connection, "Handshake successful: "
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004746 "Agreed network protocol version %d\n", connection->agreed_pro_version);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004747
4748 return 1;
4749
4750 incompat:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004751 drbd_err(connection, "incompatible DRBD dialects: "
Philipp Reisnerb411b362009-09-25 16:07:19 -07004752 "I support %d-%d, peer supports %d-%d\n",
4753 PRO_VERSION_MIN, PRO_VERSION_MAX,
4754 p->protocol_min, p->protocol_max);
4755 return -1;
4756}
4757
4758#if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004759static int drbd_do_auth(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004760{
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004761 drbd_err(connection, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
4762 drbd_err(connection, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004763 return -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004764}
4765#else
4766#define CHALLENGE_LEN 64
Johannes Thomab10d96c2010-01-07 16:02:50 +01004767
4768/* Return value:
4769 1 - auth succeeded,
4770 0 - failed, try again (network error),
4771 -1 - auth failed, don't try again.
4772*/
4773
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004774static int drbd_do_auth(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004775{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004776 struct drbd_socket *sock;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004777 char my_challenge[CHALLENGE_LEN]; /* 64 Bytes... */
4778 struct scatterlist sg;
4779 char *response = NULL;
4780 char *right_response = NULL;
4781 char *peers_ch = NULL;
Philipp Reisner44ed1672011-04-19 17:10:19 +02004782 unsigned int key_len;
4783 char secret[SHARED_SECRET_MAX]; /* 64 byte */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004784 unsigned int resp_size;
4785 struct hash_desc desc;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004786 struct packet_info pi;
Philipp Reisner44ed1672011-04-19 17:10:19 +02004787 struct net_conf *nc;
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004788 int err, rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004789
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004790 /* FIXME: Put the challenge/response into the preallocated socket buffer. */
4791
Philipp Reisner44ed1672011-04-19 17:10:19 +02004792 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004793 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +02004794 key_len = strlen(nc->shared_secret);
4795 memcpy(secret, nc->shared_secret, key_len);
4796 rcu_read_unlock();
4797
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004798 desc.tfm = connection->cram_hmac_tfm;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004799 desc.flags = 0;
4800
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004801 rv = crypto_hash_setkey(connection->cram_hmac_tfm, (u8 *)secret, key_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004802 if (rv) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004803 drbd_err(connection, "crypto_hash_setkey() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004804 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004805 goto fail;
4806 }
4807
4808 get_random_bytes(my_challenge, CHALLENGE_LEN);
4809
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004810 sock = &connection->data;
4811 if (!conn_prepare_command(connection, sock)) {
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004812 rv = 0;
4813 goto fail;
4814 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004815 rv = !conn_send_command(connection, sock, P_AUTH_CHALLENGE, 0,
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004816 my_challenge, CHALLENGE_LEN);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004817 if (!rv)
4818 goto fail;
4819
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004820 err = drbd_recv_header(connection, &pi);
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004821 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004822 rv = 0;
4823 goto fail;
4824 }
4825
Philipp Reisner77351055b2011-02-07 17:24:26 +01004826 if (pi.cmd != P_AUTH_CHALLENGE) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004827 drbd_err(connection, "expected AuthChallenge packet, received: %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004828 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004829 rv = 0;
4830 goto fail;
4831 }
4832
Philipp Reisner77351055b2011-02-07 17:24:26 +01004833 if (pi.size > CHALLENGE_LEN * 2) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004834 drbd_err(connection, "expected AuthChallenge payload too big.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004835 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004836 goto fail;
4837 }
4838
Philipp Reisner77351055b2011-02-07 17:24:26 +01004839 peers_ch = kmalloc(pi.size, GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004840 if (peers_ch == NULL) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004841 drbd_err(connection, "kmalloc of peers_ch failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004842 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004843 goto fail;
4844 }
4845
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004846 err = drbd_recv_all_warn(connection, peers_ch, pi.size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004847 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004848 rv = 0;
4849 goto fail;
4850 }
4851
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004852 resp_size = crypto_hash_digestsize(connection->cram_hmac_tfm);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004853 response = kmalloc(resp_size, GFP_NOIO);
4854 if (response == NULL) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004855 drbd_err(connection, "kmalloc of response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004856 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004857 goto fail;
4858 }
4859
4860 sg_init_table(&sg, 1);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004861 sg_set_buf(&sg, peers_ch, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004862
4863 rv = crypto_hash_digest(&desc, &sg, sg.length, response);
4864 if (rv) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004865 drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004866 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004867 goto fail;
4868 }
4869
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004870 if (!conn_prepare_command(connection, sock)) {
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004871 rv = 0;
4872 goto fail;
4873 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004874 rv = !conn_send_command(connection, sock, P_AUTH_RESPONSE, 0,
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004875 response, resp_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004876 if (!rv)
4877 goto fail;
4878
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004879 err = drbd_recv_header(connection, &pi);
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004880 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004881 rv = 0;
4882 goto fail;
4883 }
4884
Philipp Reisner77351055b2011-02-07 17:24:26 +01004885 if (pi.cmd != P_AUTH_RESPONSE) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004886 drbd_err(connection, "expected AuthResponse packet, received: %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004887 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004888 rv = 0;
4889 goto fail;
4890 }
4891
Philipp Reisner77351055b2011-02-07 17:24:26 +01004892 if (pi.size != resp_size) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004893 drbd_err(connection, "expected AuthResponse payload of wrong size\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004894 rv = 0;
4895 goto fail;
4896 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004897
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004898 err = drbd_recv_all_warn(connection, response , resp_size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004899 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004900 rv = 0;
4901 goto fail;
4902 }
4903
4904 right_response = kmalloc(resp_size, GFP_NOIO);
Julia Lawall2d1ee872009-12-27 22:27:11 +01004905 if (right_response == NULL) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004906 drbd_err(connection, "kmalloc of right_response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004907 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004908 goto fail;
4909 }
4910
4911 sg_set_buf(&sg, my_challenge, CHALLENGE_LEN);
4912
4913 rv = crypto_hash_digest(&desc, &sg, sg.length, right_response);
4914 if (rv) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004915 drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004916 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004917 goto fail;
4918 }
4919
4920 rv = !memcmp(response, right_response, resp_size);
4921
4922 if (rv)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004923 drbd_info(connection, "Peer authenticated using %d bytes HMAC\n",
Philipp Reisner44ed1672011-04-19 17:10:19 +02004924 resp_size);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004925 else
4926 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004927
4928 fail:
4929 kfree(peers_ch);
4930 kfree(response);
4931 kfree(right_response);
4932
4933 return rv;
4934}
4935#endif
4936
Andreas Gruenbacher8fe60552011-07-22 11:04:36 +02004937int drbd_receiver(struct drbd_thread *thi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004938{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004939 struct drbd_connection *connection = thi->connection;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004940 int h;
4941
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004942 drbd_info(connection, "receiver (re)started\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004943
4944 do {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004945 h = conn_connect(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004946 if (h == 0) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004947 conn_disconnect(connection);
Philipp Reisner20ee6392011-01-18 15:28:59 +01004948 schedule_timeout_interruptible(HZ);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004949 }
4950 if (h == -1) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004951 drbd_warn(connection, "Discarding network configuration.\n");
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004952 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004953 }
4954 } while (h == 0);
4955
Philipp Reisner91fd4da2011-04-20 17:47:29 +02004956 if (h > 0)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004957 drbdd(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004958
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004959 conn_disconnect(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004960
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004961 drbd_info(connection, "receiver terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004962 return 0;
4963}
4964
4965/* ********* acknowledge sender ******** */
4966
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004967static int got_conn_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004968{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004969 struct p_req_state_reply *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004970 int retcode = be32_to_cpu(p->retcode);
4971
4972 if (retcode >= SS_SUCCESS) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004973 set_bit(CONN_WD_ST_CHG_OKAY, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004974 } else {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004975 set_bit(CONN_WD_ST_CHG_FAIL, &connection->flags);
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004976 drbd_err(connection, "Requested state change failed by peer: %s (%d)\n",
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004977 drbd_set_st_err_str(retcode), retcode);
4978 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004979 wake_up(&connection->ping_wait);
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004980
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004981 return 0;
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004982}
4983
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004984static int got_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004985{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004986 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004987 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004988 struct p_req_state_reply *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004989 int retcode = be32_to_cpu(p->retcode);
4990
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004991 peer_device = conn_peer_device(connection, pi->vnr);
4992 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004993 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004994 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004995
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004996 if (test_bit(CONN_WD_ST_CHG_REQ, &connection->flags)) {
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02004997 D_ASSERT(device, connection->agreed_pro_version < 100);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004998 return got_conn_RqSReply(connection, pi);
Philipp Reisner4d0fc3f2012-01-20 13:52:27 +01004999 }
5000
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01005001 if (retcode >= SS_SUCCESS) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005002 set_bit(CL_ST_CHG_SUCCESS, &device->flags);
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01005003 } else {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005004 set_bit(CL_ST_CHG_FAIL, &device->flags);
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02005005 drbd_err(device, "Requested state change failed by peer: %s (%d)\n",
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01005006 drbd_set_st_err_str(retcode), retcode);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005007 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005008 wake_up(&device->state_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005009
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005010 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005011}
5012
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005013static int got_Ping(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005014{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005015 return drbd_send_ping_ack(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005016
5017}
5018
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005019static int got_PingAck(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005020{
5021 /* restore idle timeout */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005022 connection->meta.socket->sk->sk_rcvtimeo = connection->net_conf->ping_int*HZ;
5023 if (!test_and_set_bit(GOT_PING_ACK, &connection->flags))
5024 wake_up(&connection->ping_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005025
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005026 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005027}
5028
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005029static int got_IsInSync(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005030{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005031 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005032 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005033 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005034 sector_t sector = be64_to_cpu(p->sector);
5035 int blksize = be32_to_cpu(p->blksize);
5036
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005037 peer_device = conn_peer_device(connection, pi->vnr);
5038 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005039 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005040 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005041
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005042 D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005043
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005044 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005045
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005046 if (get_ldev(device)) {
5047 drbd_rs_complete_io(device, sector);
5048 drbd_set_in_sync(device, sector, blksize);
Lars Ellenberg1d53f092010-09-05 01:13:24 +02005049 /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005050 device->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
5051 put_ldev(device);
Lars Ellenberg1d53f092010-09-05 01:13:24 +02005052 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005053 dec_rs_pending(device);
5054 atomic_add(blksize >> 9, &device->rs_sect_in);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005055
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005056 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005057}
5058
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01005059static int
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005060validate_req_change_req_state(struct drbd_device *device, u64 id, sector_t sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01005061 struct rb_root *root, const char *func,
5062 enum drbd_req_event what, bool missing_ok)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005063{
5064 struct drbd_request *req;
5065 struct bio_and_error m;
5066
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005067 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005068 req = find_request(device, root, id, sector, missing_ok, func);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005069 if (unlikely(!req)) {
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005070 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005071 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005072 }
5073 __req_mod(req, what, &m);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005074 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005075
5076 if (m.bio)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005077 complete_master_bio(device, &m);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005078 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005079}
5080
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005081static int got_BlockAck(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005082{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005083 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005084 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005085 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005086 sector_t sector = be64_to_cpu(p->sector);
5087 int blksize = be32_to_cpu(p->blksize);
5088 enum drbd_req_event what;
5089
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005090 peer_device = conn_peer_device(connection, pi->vnr);
5091 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005092 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005093 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005094
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005095 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005096
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01005097 if (p->block_id == ID_SYNCER) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005098 drbd_set_in_sync(device, sector, blksize);
5099 dec_rs_pending(device);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005100 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005101 }
Andreas Gruenbachere05e1e52011-03-25 15:16:26 +01005102 switch (pi->cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07005103 case P_RS_WRITE_ACK:
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01005104 what = WRITE_ACKED_BY_PEER_AND_SIS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005105 break;
5106 case P_WRITE_ACK:
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01005107 what = WRITE_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005108 break;
5109 case P_RECV_ACK:
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01005110 what = RECV_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005111 break;
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02005112 case P_SUPERSEDED:
5113 what = CONFLICT_RESOLVED;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01005114 break;
5115 case P_RETRY_WRITE:
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01005116 what = POSTPONE_WRITE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005117 break;
5118 default:
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005119 BUG();
Philipp Reisnerb411b362009-09-25 16:07:19 -07005120 }
5121
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005122 return validate_req_change_req_state(device, p->block_id, sector,
5123 &device->write_requests, __func__,
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005124 what, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005125}
5126
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005127static int got_NegAck(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005128{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005129 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005130 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005131 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005132 sector_t sector = be64_to_cpu(p->sector);
Philipp Reisner2deb8332011-01-17 18:39:18 +01005133 int size = be32_to_cpu(p->blksize);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005134 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005135
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005136 peer_device = conn_peer_device(connection, pi->vnr);
5137 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005138 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005139 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005140
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005141 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005142
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01005143 if (p->block_id == ID_SYNCER) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005144 dec_rs_pending(device);
5145 drbd_rs_failed_io(device, sector, size);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005146 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005147 }
Philipp Reisner2deb8332011-01-17 18:39:18 +01005148
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005149 err = validate_req_change_req_state(device, p->block_id, sector,
5150 &device->write_requests, __func__,
Philipp Reisner303d1442011-04-13 16:24:47 -07005151 NEG_ACKED, true);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005152 if (err) {
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01005153 /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs.
5154 The master bio might already be completed, therefore the
5155 request is no longer in the collision hash. */
5156 /* In Protocol B we might already have got a P_RECV_ACK
5157 but then get a P_NEG_ACK afterwards. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005158 drbd_set_out_of_sync(device, sector, size);
Philipp Reisner2deb8332011-01-17 18:39:18 +01005159 }
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005160 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005161}
5162
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005163static int got_NegDReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005164{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005165 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005166 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005167 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005168 sector_t sector = be64_to_cpu(p->sector);
5169
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005170 peer_device = conn_peer_device(connection, pi->vnr);
5171 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005172 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005173 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005174
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005175 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01005176
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02005177 drbd_err(device, "Got NegDReply; Sector %llus, len %u.\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07005178 (unsigned long long)sector, be32_to_cpu(p->blksize));
5179
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005180 return validate_req_change_req_state(device, p->block_id, sector,
5181 &device->read_requests, __func__,
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005182 NEG_ACKED, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005183}
5184
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005185static int got_NegRSDReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005186{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005187 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005188 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005189 sector_t sector;
5190 int size;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005191 struct p_block_ack *p = pi->data;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005192
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005193 peer_device = conn_peer_device(connection, pi->vnr);
5194 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005195 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005196 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005197
5198 sector = be64_to_cpu(p->sector);
5199 size = be32_to_cpu(p->blksize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005200
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005201 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005202
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005203 dec_rs_pending(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005204
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005205 if (get_ldev_if_state(device, D_FAILED)) {
5206 drbd_rs_complete_io(device, sector);
Andreas Gruenbachere05e1e52011-03-25 15:16:26 +01005207 switch (pi->cmd) {
Philipp Reisnerd612d302010-12-27 10:53:28 +01005208 case P_NEG_RS_DREPLY:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005209 drbd_rs_failed_io(device, sector, size);
Philipp Reisnerd612d302010-12-27 10:53:28 +01005210 case P_RS_CANCEL:
5211 break;
5212 default:
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005213 BUG();
Philipp Reisnerd612d302010-12-27 10:53:28 +01005214 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005215 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005216 }
5217
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005218 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005219}
5220
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005221static int got_BarrierAck(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005222{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005223 struct p_barrier_ack *p = pi->data;
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005224 struct drbd_peer_device *peer_device;
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005225 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005226
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005227 tl_release(connection, p->barrier, be32_to_cpu(p->set_size));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005228
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005229 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005230 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5231 struct drbd_device *device = peer_device->device;
5232
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005233 if (device->state.conn == C_AHEAD &&
5234 atomic_read(&device->ap_in_flight) == 0 &&
5235 !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &device->flags)) {
5236 device->start_resync_timer.expires = jiffies + HZ;
5237 add_timer(&device->start_resync_timer);
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005238 }
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02005239 }
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005240 rcu_read_unlock();
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02005241
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005242 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005243}
5244
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005245static int got_OVResult(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005246{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005247 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005248 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005249 struct p_block_ack *p = pi->data;
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02005250 struct drbd_device_work *dw;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005251 sector_t sector;
5252 int size;
5253
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005254 peer_device = conn_peer_device(connection, pi->vnr);
5255 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005256 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005257 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005258
Philipp Reisnerb411b362009-09-25 16:07:19 -07005259 sector = be64_to_cpu(p->sector);
5260 size = be32_to_cpu(p->blksize);
5261
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005262 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005263
5264 if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005265 drbd_ov_out_of_sync_found(device, sector, size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005266 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005267 ov_out_of_sync_print(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005268
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005269 if (!get_ldev(device))
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005270 return 0;
Lars Ellenberg1d53f092010-09-05 01:13:24 +02005271
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005272 drbd_rs_complete_io(device, sector);
5273 dec_rs_pending(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005274
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005275 --device->ov_left;
Lars Ellenbergea5442a2010-11-05 09:48:01 +01005276
5277 /* let's advance progress step marks only for every other megabyte */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005278 if ((device->ov_left & 0x200) == 0x200)
5279 drbd_advance_rs_marks(device, device->ov_left);
Lars Ellenbergea5442a2010-11-05 09:48:01 +01005280
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005281 if (device->ov_left == 0) {
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02005282 dw = kmalloc(sizeof(*dw), GFP_NOIO);
5283 if (dw) {
5284 dw->w.cb = w_ov_finished;
5285 dw->device = device;
5286 drbd_queue_work(&peer_device->connection->sender_work, &dw->w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005287 } else {
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02005288 drbd_err(device, "kmalloc(dw) failed.");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005289 ov_out_of_sync_print(device);
5290 drbd_resync_finished(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005291 }
5292 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005293 put_ldev(device);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005294 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005295}
5296
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005297static int got_skip(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisner0ced55a2010-04-30 15:26:20 +02005298{
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005299 return 0;
Philipp Reisner0ced55a2010-04-30 15:26:20 +02005300}
5301
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005302static int connection_finish_peer_reqs(struct drbd_connection *connection)
Philipp Reisner32862ec2011-02-08 16:41:01 +01005303{
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005304 struct drbd_peer_device *peer_device;
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005305 int vnr, not_empty = 0;
Philipp Reisner32862ec2011-02-08 16:41:01 +01005306
5307 do {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005308 clear_bit(SIGNAL_ASENDER, &connection->flags);
Philipp Reisner32862ec2011-02-08 16:41:01 +01005309 flush_signals(current);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005310
5311 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005312 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5313 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005314 kref_get(&device->kref);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005315 rcu_read_unlock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005316 if (drbd_finish_peer_reqs(device)) {
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02005317 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005318 return 1;
Philipp Reisnerd3fcb492011-04-13 14:46:05 -07005319 }
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02005320 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005321 rcu_read_lock();
Philipp Reisner082a3432011-03-15 16:05:42 +01005322 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005323 set_bit(SIGNAL_ASENDER, &connection->flags);
Philipp Reisner082a3432011-03-15 16:05:42 +01005324
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005325 spin_lock_irq(&connection->resource->req_lock);
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005326 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5327 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005328 not_empty = !list_empty(&device->done_ee);
Philipp Reisner082a3432011-03-15 16:05:42 +01005329 if (not_empty)
5330 break;
5331 }
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005332 spin_unlock_irq(&connection->resource->req_lock);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005333 rcu_read_unlock();
Philipp Reisner32862ec2011-02-08 16:41:01 +01005334 } while (not_empty);
5335
5336 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005337}
5338
5339struct asender_cmd {
5340 size_t pkt_size;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005341 int (*fn)(struct drbd_connection *connection, struct packet_info *);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005342};
5343
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01005344static struct asender_cmd asender_tbl[] = {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005345 [P_PING] = { 0, got_Ping },
5346 [P_PING_ACK] = { 0, got_PingAck },
Philipp Reisnerb411b362009-09-25 16:07:19 -07005347 [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
5348 [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
5349 [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02005350 [P_SUPERSEDED] = { sizeof(struct p_block_ack), got_BlockAck },
Philipp Reisnerb411b362009-09-25 16:07:19 -07005351 [P_NEG_ACK] = { sizeof(struct p_block_ack), got_NegAck },
5352 [P_NEG_DREPLY] = { sizeof(struct p_block_ack), got_NegDReply },
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005353 [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), got_NegRSDReply },
Philipp Reisnerb411b362009-09-25 16:07:19 -07005354 [P_OV_RESULT] = { sizeof(struct p_block_ack), got_OVResult },
5355 [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck },
5356 [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
5357 [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync },
Philipp Reisner02918be2010-08-20 14:35:10 +02005358 [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip },
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005359 [P_RS_CANCEL] = { sizeof(struct p_block_ack), got_NegRSDReply },
5360 [P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), got_conn_RqSReply },
5361 [P_RETRY_WRITE] = { sizeof(struct p_block_ack), got_BlockAck },
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01005362};
Philipp Reisnerb411b362009-09-25 16:07:19 -07005363
5364int drbd_asender(struct drbd_thread *thi)
5365{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005366 struct drbd_connection *connection = thi->connection;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005367 struct asender_cmd *cmd = NULL;
Philipp Reisner77351055b2011-02-07 17:24:26 +01005368 struct packet_info pi;
Philipp Reisner257d0af2011-01-26 12:15:29 +01005369 int rv;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005370 void *buf = connection->meta.rbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005371 int received = 0;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005372 unsigned int header_size = drbd_header_size(connection);
Andreas Gruenbacher52b061a2011-03-30 11:38:49 +02005373 int expect = header_size;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005374 bool ping_timeout_active = false;
5375 struct net_conf *nc;
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005376 int ping_timeo, tcp_cork, ping_int;
Philipp Reisner3990e042013-03-27 14:08:48 +01005377 struct sched_param param = { .sched_priority = 2 };
Philipp Reisnerb411b362009-09-25 16:07:19 -07005378
Philipp Reisner3990e042013-03-27 14:08:48 +01005379 rv = sched_setscheduler(current, SCHED_RR, &param);
5380 if (rv < 0)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005381 drbd_err(connection, "drbd_asender: ERROR set priority, ret=%d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005382
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +01005383 while (get_t_state(thi) == RUNNING) {
Philipp Reisner80822282011-02-08 12:46:30 +01005384 drbd_thread_current_set_cpu(thi);
Philipp Reisner44ed1672011-04-19 17:10:19 +02005385
5386 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005387 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +02005388 ping_timeo = nc->ping_timeo;
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005389 tcp_cork = nc->tcp_cork;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005390 ping_int = nc->ping_int;
5391 rcu_read_unlock();
5392
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005393 if (test_and_clear_bit(SEND_PING, &connection->flags)) {
5394 if (drbd_send_ping(connection)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005395 drbd_err(connection, "drbd_send_ping has failed\n");
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01005396 goto reconnect;
5397 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005398 connection->meta.socket->sk->sk_rcvtimeo = ping_timeo * HZ / 10;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005399 ping_timeout_active = true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005400 }
5401
Philipp Reisner32862ec2011-02-08 16:41:01 +01005402 /* TODO: conditionally cork; it may hurt latency if we cork without
5403 much to send */
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005404 if (tcp_cork)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005405 drbd_tcp_cork(connection->meta.socket);
5406 if (connection_finish_peer_reqs(connection)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005407 drbd_err(connection, "connection_finish_peer_reqs() failed\n");
Philipp Reisner32862ec2011-02-08 16:41:01 +01005408 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005409 }
5410 /* but unconditionally uncork unless disabled */
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005411 if (tcp_cork)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005412 drbd_tcp_uncork(connection->meta.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005413
5414 /* short circuit, recv_msg would return EINTR anyways. */
5415 if (signal_pending(current))
5416 continue;
5417
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005418 rv = drbd_recv_short(connection->meta.socket, buf, expect-received, 0);
5419 clear_bit(SIGNAL_ASENDER, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005420
5421 flush_signals(current);
5422
5423 /* Note:
5424 * -EINTR (on meta) we got a signal
5425 * -EAGAIN (on meta) rcvtimeo expired
5426 * -ECONNRESET other side closed the connection
5427 * -ERESTARTSYS (on data) we got a signal
5428 * rv < 0 other than above: unexpected error!
5429 * rv == expected: full header or command
5430 * rv < expected: "woken" by signal during receive
5431 * rv == 0 : "connection shut down by peer"
5432 */
5433 if (likely(rv > 0)) {
5434 received += rv;
5435 buf += rv;
5436 } else if (rv == 0) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005437 if (test_bit(DISCONNECT_SENT, &connection->flags)) {
Philipp Reisnerb66623e2012-08-08 21:19:09 +02005438 long t;
5439 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005440 t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
Philipp Reisnerb66623e2012-08-08 21:19:09 +02005441 rcu_read_unlock();
5442
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005443 t = wait_event_timeout(connection->ping_wait,
5444 connection->cstate < C_WF_REPORT_PARAMS,
Philipp Reisnerb66623e2012-08-08 21:19:09 +02005445 t);
Philipp Reisner599377a2012-08-17 14:50:22 +02005446 if (t)
5447 break;
5448 }
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005449 drbd_err(connection, "meta connection shut down by peer.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005450 goto reconnect;
5451 } else if (rv == -EAGAIN) {
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02005452 /* If the data socket received something meanwhile,
5453 * that is good enough: peer is still alive. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005454 if (time_after(connection->last_received,
5455 jiffies - connection->meta.socket->sk->sk_rcvtimeo))
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02005456 continue;
Lars Ellenbergf36af182011-03-09 22:44:55 +01005457 if (ping_timeout_active) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005458 drbd_err(connection, "PingAck did not arrive in time.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005459 goto reconnect;
5460 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005461 set_bit(SEND_PING, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005462 continue;
5463 } else if (rv == -EINTR) {
5464 continue;
5465 } else {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005466 drbd_err(connection, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005467 goto reconnect;
5468 }
5469
5470 if (received == expect && cmd == NULL) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005471 if (decode_header(connection, connection->meta.rbuf, &pi))
Philipp Reisnerb411b362009-09-25 16:07:19 -07005472 goto reconnect;
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01005473 cmd = &asender_tbl[pi.cmd];
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005474 if (pi.cmd >= ARRAY_SIZE(asender_tbl) || !cmd->fn) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005475 drbd_err(connection, "Unexpected meta packet %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02005476 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005477 goto disconnect;
5478 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005479 expect = header_size + cmd->pkt_size;
Andreas Gruenbacher52b061a2011-03-30 11:38:49 +02005480 if (pi.size != expect - header_size) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005481 drbd_err(connection, "Wrong packet size on meta (c: %d, l: %d)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01005482 pi.cmd, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005483 goto reconnect;
Philipp Reisner257d0af2011-01-26 12:15:29 +01005484 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005485 }
5486 if (received == expect) {
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005487 bool err;
Philipp Reisnera4fbda82011-03-16 11:13:17 +01005488
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005489 err = cmd->fn(connection, &pi);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005490 if (err) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005491 drbd_err(connection, "%pf failed\n", cmd->fn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005492 goto reconnect;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005493 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005494
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005495 connection->last_received = jiffies;
Lars Ellenbergf36af182011-03-09 22:44:55 +01005496
Philipp Reisner44ed1672011-04-19 17:10:19 +02005497 if (cmd == &asender_tbl[P_PING_ACK]) {
5498 /* restore idle timeout */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005499 connection->meta.socket->sk->sk_rcvtimeo = ping_int * HZ;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005500 ping_timeout_active = false;
5501 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005502
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005503 buf = connection->meta.rbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005504 received = 0;
Andreas Gruenbacher52b061a2011-03-30 11:38:49 +02005505 expect = header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005506 cmd = NULL;
5507 }
5508 }
5509
5510 if (0) {
5511reconnect:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005512 conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
5513 conn_md_sync(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005514 }
5515 if (0) {
5516disconnect:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005517 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005518 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005519 clear_bit(SIGNAL_ASENDER, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005520
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005521 drbd_info(connection, "asender terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005522
5523 return 0;
5524}