blob: 942384f34e2284f699213e017349043a74e5f85d [file] [log] [blame]
Philipp Reisnerb411b362009-09-25 16:07:19 -07001/*
2 drbd_receiver.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23 */
24
25
Philipp Reisnerb411b362009-09-25 16:07:19 -070026#include <linux/module.h>
27
Fabian Frederick7e5fec32016-06-14 00:26:35 +020028#include <linux/uaccess.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070029#include <net/sock.h>
30
Philipp Reisnerb411b362009-09-25 16:07:19 -070031#include <linux/drbd.h>
32#include <linux/fs.h>
33#include <linux/file.h>
34#include <linux/in.h>
35#include <linux/mm.h>
36#include <linux/memcontrol.h>
37#include <linux/mm_inline.h>
38#include <linux/slab.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070039#include <linux/pkt_sched.h>
40#define __KERNEL_SYSCALLS__
41#include <linux/unistd.h>
42#include <linux/vmalloc.h>
43#include <linux/random.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070044#include <linux/string.h>
45#include <linux/scatterlist.h>
46#include "drbd_int.h"
Andreas Gruenbachera3603a62011-05-30 11:47:37 +020047#include "drbd_protocol.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070048#include "drbd_req.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070049#include "drbd_vli.h"
50
Lars Ellenberg9104d312016-06-14 00:26:31 +020051#define PRO_FEATURES (DRBD_FF_TRIM|DRBD_FF_THIN_RESYNC|DRBD_FF_WSAME)
Lars Ellenberg20c68fd2014-04-28 18:43:25 +020052
Philipp Reisner77351055b2011-02-07 17:24:26 +010053struct packet_info {
54 enum drbd_packet cmd;
Andreas Gruenbachere2857212011-03-25 00:57:38 +010055 unsigned int size;
56 unsigned int vnr;
Andreas Gruenbachere6589832011-03-30 12:54:42 +020057 void *data;
Philipp Reisner77351055b2011-02-07 17:24:26 +010058};
59
Philipp Reisnerb411b362009-09-25 16:07:19 -070060enum finish_epoch {
61 FE_STILL_LIVE,
62 FE_DESTROYED,
63 FE_RECYCLED,
64};
65
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +020066static int drbd_do_features(struct drbd_connection *connection);
67static int drbd_do_auth(struct drbd_connection *connection);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +020068static int drbd_disconnected(struct drbd_peer_device *);
Lars Ellenberga0fb3c42014-04-28 18:43:23 +020069static void conn_wait_active_ee_empty(struct drbd_connection *connection);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +020070static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *, struct drbd_epoch *, enum epoch_event);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +010071static int e_end_block(struct drbd_work *, int);
Philipp Reisnerb411b362009-09-25 16:07:19 -070072
Philipp Reisnerb411b362009-09-25 16:07:19 -070073
74#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
75
Lars Ellenberg45bb9122010-05-14 17:10:48 +020076/*
77 * some helper functions to deal with single linked page lists,
78 * page->private being our "next" pointer.
79 */
80
81/* If at least n pages are linked at head, get n pages off.
82 * Otherwise, don't modify head, and return NULL.
83 * Locking is the responsibility of the caller.
84 */
85static struct page *page_chain_del(struct page **head, int n)
86{
87 struct page *page;
88 struct page *tmp;
89
90 BUG_ON(!n);
91 BUG_ON(!head);
92
93 page = *head;
Philipp Reisner23ce4222010-05-20 13:35:31 +020094
95 if (!page)
96 return NULL;
97
Lars Ellenberg45bb9122010-05-14 17:10:48 +020098 while (page) {
99 tmp = page_chain_next(page);
100 if (--n == 0)
101 break; /* found sufficient pages */
102 if (tmp == NULL)
103 /* insufficient pages, don't use any of them. */
104 return NULL;
105 page = tmp;
106 }
107
108 /* add end of list marker for the returned list */
109 set_page_private(page, 0);
110 /* actual return value, and adjustment of head */
111 page = *head;
112 *head = tmp;
113 return page;
114}
115
116/* may be used outside of locks to find the tail of a (usually short)
117 * "private" page chain, before adding it back to a global chain head
118 * with page_chain_add() under a spinlock. */
119static struct page *page_chain_tail(struct page *page, int *len)
120{
121 struct page *tmp;
122 int i = 1;
123 while ((tmp = page_chain_next(page)))
124 ++i, page = tmp;
125 if (len)
126 *len = i;
127 return page;
128}
129
130static int page_chain_free(struct page *page)
131{
132 struct page *tmp;
133 int i = 0;
134 page_chain_for_each_safe(page, tmp) {
135 put_page(page);
136 ++i;
137 }
138 return i;
139}
140
141static void page_chain_add(struct page **head,
142 struct page *chain_first, struct page *chain_last)
143{
144#if 1
145 struct page *tmp;
146 tmp = page_chain_tail(chain_first, NULL);
147 BUG_ON(tmp != chain_last);
148#endif
149
150 /* add chain to head */
151 set_page_private(chain_last, (unsigned long)*head);
152 *head = chain_first;
153}
154
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200155static struct page *__drbd_alloc_pages(struct drbd_device *device,
Andreas Gruenbacher18c2d522011-04-07 21:08:50 +0200156 unsigned int number)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700157{
158 struct page *page = NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200159 struct page *tmp = NULL;
Andreas Gruenbacher18c2d522011-04-07 21:08:50 +0200160 unsigned int i = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700161
162 /* Yes, testing drbd_pp_vacant outside the lock is racy.
163 * So what. It saves a spin_lock. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200164 if (drbd_pp_vacant >= number) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700165 spin_lock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200166 page = page_chain_del(&drbd_pp_pool, number);
167 if (page)
168 drbd_pp_vacant -= number;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700169 spin_unlock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200170 if (page)
171 return page;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700172 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200173
Philipp Reisnerb411b362009-09-25 16:07:19 -0700174 /* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD
175 * "criss-cross" setup, that might cause write-out on some other DRBD,
176 * which in turn might block on the other node at this very place. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200177 for (i = 0; i < number; i++) {
178 tmp = alloc_page(GFP_TRY);
179 if (!tmp)
180 break;
181 set_page_private(tmp, (unsigned long)page);
182 page = tmp;
183 }
184
185 if (i == number)
186 return page;
187
188 /* Not enough pages immediately available this time.
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200189 * No need to jump around here, drbd_alloc_pages will retry this
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200190 * function "soon". */
191 if (page) {
192 tmp = page_chain_tail(page, NULL);
193 spin_lock(&drbd_pp_lock);
194 page_chain_add(&drbd_pp_pool, page, tmp);
195 drbd_pp_vacant += i;
196 spin_unlock(&drbd_pp_lock);
197 }
198 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700199}
200
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200201static void reclaim_finished_net_peer_reqs(struct drbd_device *device,
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200202 struct list_head *to_be_freed)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700203{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200204 struct drbd_peer_request *peer_req, *tmp;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700205
206 /* The EEs are always appended to the end of the list. Since
207 they are sent in order over the wire, they have to finish
208 in order. As soon as we see the first not finished we can
209 stop to examine the list... */
210
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200211 list_for_each_entry_safe(peer_req, tmp, &device->net_ee, w.list) {
Andreas Gruenbacher045417f2011-04-07 21:34:24 +0200212 if (drbd_peer_req_has_active_page(peer_req))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700213 break;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200214 list_move(&peer_req->w.list, to_be_freed);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700215 }
216}
217
Philipp Reisner668700b2015-03-16 16:08:29 +0100218static void drbd_reclaim_net_peer_reqs(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700219{
220 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100221 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700222
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200223 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200224 reclaim_finished_net_peer_reqs(device, &reclaimed);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200225 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200226 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200227 drbd_free_net_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700228}
229
Philipp Reisner668700b2015-03-16 16:08:29 +0100230static void conn_reclaim_net_peer_reqs(struct drbd_connection *connection)
231{
232 struct drbd_peer_device *peer_device;
233 int vnr;
234
235 rcu_read_lock();
236 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
237 struct drbd_device *device = peer_device->device;
238 if (!atomic_read(&device->pp_in_use_by_net))
239 continue;
240
241 kref_get(&device->kref);
242 rcu_read_unlock();
243 drbd_reclaim_net_peer_reqs(device);
244 kref_put(&device->kref, drbd_destroy_device);
245 rcu_read_lock();
246 }
247 rcu_read_unlock();
248}
249
Philipp Reisnerb411b362009-09-25 16:07:19 -0700250/**
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200251 * drbd_alloc_pages() - Returns @number pages, retries forever (or until signalled)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200252 * @device: DRBD device.
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200253 * @number: number of pages requested
254 * @retry: whether to retry, if not enough pages are available right now
Philipp Reisnerb411b362009-09-25 16:07:19 -0700255 *
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200256 * Tries to allocate number pages, first from our own page pool, then from
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200257 * the kernel.
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200258 * Possibly retry until DRBD frees sufficient pages somewhere else.
259 *
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200260 * If this allocation would exceed the max_buffers setting, we throttle
261 * allocation (schedule_timeout) to give the system some room to breathe.
262 *
263 * We do not use max-buffers as hard limit, because it could lead to
264 * congestion and further to a distributed deadlock during online-verify or
265 * (checksum based) resync, if the max-buffers, socket buffer sizes and
266 * resync-rate settings are mis-configured.
267 *
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200268 * Returns a page chain linked via page->private.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700269 */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200270struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int number,
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200271 bool retry)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700272{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200273 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700274 struct page *page = NULL;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200275 struct net_conf *nc;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700276 DEFINE_WAIT(wait);
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200277 unsigned int mxb;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700278
Philipp Reisner44ed1672011-04-19 17:10:19 +0200279 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200280 nc = rcu_dereference(peer_device->connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200281 mxb = nc ? nc->max_buffers : 1000000;
282 rcu_read_unlock();
283
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200284 if (atomic_read(&device->pp_in_use) < mxb)
285 page = __drbd_alloc_pages(device, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700286
Philipp Reisner668700b2015-03-16 16:08:29 +0100287 /* Try to keep the fast path fast, but occasionally we need
288 * to reclaim the pages we lended to the network stack. */
289 if (page && atomic_read(&device->pp_in_use_by_net) > 512)
290 drbd_reclaim_net_peer_reqs(device);
291
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200292 while (page == NULL) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700293 prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
294
Philipp Reisner668700b2015-03-16 16:08:29 +0100295 drbd_reclaim_net_peer_reqs(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700296
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200297 if (atomic_read(&device->pp_in_use) < mxb) {
298 page = __drbd_alloc_pages(device, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700299 if (page)
300 break;
301 }
302
303 if (!retry)
304 break;
305
306 if (signal_pending(current)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200307 drbd_warn(device, "drbd_alloc_pages interrupted!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700308 break;
309 }
310
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200311 if (schedule_timeout(HZ/10) == 0)
312 mxb = UINT_MAX;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700313 }
314 finish_wait(&drbd_pp_wait, &wait);
315
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200316 if (page)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200317 atomic_add(number, &device->pp_in_use);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700318 return page;
319}
320
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200321/* Must not be used from irq, as that may deadlock: see drbd_alloc_pages.
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200322 * Is also used from inside an other spin_lock_irq(&resource->req_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200323 * Either links the page chain back to the global pool,
324 * or returns all pages to the system. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200325static void drbd_free_pages(struct drbd_device *device, struct page *page, int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700326{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200327 atomic_t *a = is_net ? &device->pp_in_use_by_net : &device->pp_in_use;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700328 int i;
Lars Ellenberg435f0742010-09-06 12:30:25 +0200329
Lars Ellenberga73ff322012-06-25 19:15:38 +0200330 if (page == NULL)
331 return;
332
Philipp Reisner81a5d602011-02-22 19:53:16 -0500333 if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200334 i = page_chain_free(page);
335 else {
336 struct page *tmp;
337 tmp = page_chain_tail(page, &i);
338 spin_lock(&drbd_pp_lock);
339 page_chain_add(&drbd_pp_pool, page, tmp);
340 drbd_pp_vacant += i;
341 spin_unlock(&drbd_pp_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700342 }
Lars Ellenberg435f0742010-09-06 12:30:25 +0200343 i = atomic_sub_return(i, a);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200344 if (i < 0)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200345 drbd_warn(device, "ASSERTION FAILED: %s: %d < 0\n",
Lars Ellenberg435f0742010-09-06 12:30:25 +0200346 is_net ? "pp_in_use_by_net" : "pp_in_use", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700347 wake_up(&drbd_pp_wait);
348}
349
350/*
351You need to hold the req_lock:
352 _drbd_wait_ee_list_empty()
353
354You must not have the req_lock:
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +0200355 drbd_free_peer_req()
Andreas Gruenbacher0db55362011-04-06 16:09:15 +0200356 drbd_alloc_peer_req()
Andreas Gruenbacher7721f562011-04-06 17:14:02 +0200357 drbd_free_peer_reqs()
Philipp Reisnerb411b362009-09-25 16:07:19 -0700358 drbd_ee_fix_bhs()
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200359 drbd_finish_peer_reqs()
Philipp Reisnerb411b362009-09-25 16:07:19 -0700360 drbd_clear_done_ee()
361 drbd_wait_ee_list_empty()
362*/
363
Lars Ellenberg9104d312016-06-14 00:26:31 +0200364/* normal: payload_size == request size (bi_size)
365 * w_same: payload_size == logical_block_size
366 * trim: payload_size == 0 */
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100367struct drbd_peer_request *
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200368drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
Lars Ellenberg9104d312016-06-14 00:26:31 +0200369 unsigned int request_size, unsigned int payload_size, gfp_t gfp_mask) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700370{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200371 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100372 struct drbd_peer_request *peer_req;
Lars Ellenberga73ff322012-06-25 19:15:38 +0200373 struct page *page = NULL;
Lars Ellenberg9104d312016-06-14 00:26:31 +0200374 unsigned nr_pages = (payload_size + PAGE_SIZE -1) >> PAGE_SHIFT;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700375
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200376 if (drbd_insert_fault(device, DRBD_FAULT_AL_EE))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700377 return NULL;
378
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100379 peer_req = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
380 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700381 if (!(gfp_mask & __GFP_NOWARN))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200382 drbd_err(device, "%s: allocation failed\n", __func__);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700383 return NULL;
384 }
385
Lars Ellenberg9104d312016-06-14 00:26:31 +0200386 if (nr_pages) {
Mel Gormand0164ad2015-11-06 16:28:21 -0800387 page = drbd_alloc_pages(peer_device, nr_pages,
388 gfpflags_allow_blocking(gfp_mask));
Lars Ellenberga73ff322012-06-25 19:15:38 +0200389 if (!page)
390 goto fail;
391 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700392
Lars Ellenbergc5a2c152014-05-08 10:08:05 +0200393 memset(peer_req, 0, sizeof(*peer_req));
394 INIT_LIST_HEAD(&peer_req->w.list);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100395 drbd_clear_interval(&peer_req->i);
Lars Ellenberg9104d312016-06-14 00:26:31 +0200396 peer_req->i.size = request_size;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100397 peer_req->i.sector = sector;
Lars Ellenbergc5a2c152014-05-08 10:08:05 +0200398 peer_req->submit_jif = jiffies;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200399 peer_req->peer_device = peer_device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100400 peer_req->pages = page;
Andreas Gruenbacher9a8e7752011-01-11 14:04:09 +0100401 /*
402 * The block_id is opaque to the receiver. It is not endianness
403 * converted, and sent back to the sender unchanged.
404 */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100405 peer_req->block_id = id;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700406
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100407 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700408
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200409 fail:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100410 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700411 return NULL;
412}
413
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200414void __drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request *peer_req,
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100415 int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700416{
Lars Ellenberg21ae5d72014-05-05 23:42:24 +0200417 might_sleep();
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100418 if (peer_req->flags & EE_HAS_DIGEST)
419 kfree(peer_req->digest);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200420 drbd_free_pages(device, peer_req->pages, is_net);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +0200421 D_ASSERT(device, atomic_read(&peer_req->pending_bios) == 0);
422 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
Lars Ellenberg21ae5d72014-05-05 23:42:24 +0200423 if (!expect(!(peer_req->flags & EE_CALL_AL_COMPLETE_IO))) {
424 peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO;
425 drbd_al_complete_io(device, &peer_req->i);
426 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100427 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700428}
429
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200430int drbd_free_peer_reqs(struct drbd_device *device, struct list_head *list)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700431{
432 LIST_HEAD(work_list);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100433 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700434 int count = 0;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200435 int is_net = list == &device->net_ee;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700436
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200437 spin_lock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700438 list_splice_init(list, &work_list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200439 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700440
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200441 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200442 __drbd_free_peer_req(device, peer_req, is_net);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700443 count++;
444 }
445 return count;
446}
447
Philipp Reisnerb411b362009-09-25 16:07:19 -0700448/*
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200449 * See also comments in _req_mod(,BARRIER_ACKED) and receive_Barrier.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700450 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200451static int drbd_finish_peer_reqs(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700452{
453 LIST_HEAD(work_list);
454 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100455 struct drbd_peer_request *peer_req, *t;
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100456 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700457
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200458 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200459 reclaim_finished_net_peer_reqs(device, &reclaimed);
460 list_splice_init(&device->done_ee, &work_list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200461 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700462
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200463 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200464 drbd_free_net_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700465
466 /* possible callbacks here:
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +0200467 * e_end_block, and e_end_resync_block, e_send_superseded.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700468 * all ignore the last argument.
469 */
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200470 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100471 int err2;
472
Philipp Reisnerb411b362009-09-25 16:07:19 -0700473 /* list_del not necessary, next/prev members not touched */
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200474 err2 = peer_req->w.cb(&peer_req->w, !!err);
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100475 if (!err)
476 err = err2;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200477 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700478 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200479 wake_up(&device->ee_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700480
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100481 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700482}
483
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200484static void _drbd_wait_ee_list_empty(struct drbd_device *device,
Andreas Gruenbacherd4da1532011-04-07 00:06:56 +0200485 struct list_head *head)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700486{
487 DEFINE_WAIT(wait);
488
489 /* avoids spin_lock/unlock
490 * and calling prepare_to_wait in the fast path */
491 while (!list_empty(head)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200492 prepare_to_wait(&device->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200493 spin_unlock_irq(&device->resource->req_lock);
Jens Axboe7eaceac2011-03-10 08:52:07 +0100494 io_schedule();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200495 finish_wait(&device->ee_wait, &wait);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200496 spin_lock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700497 }
498}
499
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200500static void drbd_wait_ee_list_empty(struct drbd_device *device,
Andreas Gruenbacherd4da1532011-04-07 00:06:56 +0200501 struct list_head *head)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700502{
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200503 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200504 _drbd_wait_ee_list_empty(device, head);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200505 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700506}
507
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100508static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700509{
Philipp Reisnerb411b362009-09-25 16:07:19 -0700510 struct kvec iov = {
511 .iov_base = buf,
512 .iov_len = size,
513 };
514 struct msghdr msg = {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700515 .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
516 };
Al Virof730c842014-02-08 21:07:38 -0500517 return kernel_recvmsg(sock, &msg, &iov, 1, size, msg.msg_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700518}
519
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200520static int drbd_recv(struct drbd_connection *connection, void *buf, size_t size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700521{
Philipp Reisnerb411b362009-09-25 16:07:19 -0700522 int rv;
523
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200524 rv = drbd_recv_short(connection->data.socket, buf, size, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700525
Philipp Reisnerdbd08202012-08-17 16:55:47 +0200526 if (rv < 0) {
527 if (rv == -ECONNRESET)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200528 drbd_info(connection, "sock was reset by peer\n");
Philipp Reisnerdbd08202012-08-17 16:55:47 +0200529 else if (rv != -ERESTARTSYS)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200530 drbd_err(connection, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerdbd08202012-08-17 16:55:47 +0200531 } else if (rv == 0) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200532 if (test_bit(DISCONNECT_SENT, &connection->flags)) {
Philipp Reisnerb66623e2012-08-08 21:19:09 +0200533 long t;
534 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200535 t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
Philipp Reisnerb66623e2012-08-08 21:19:09 +0200536 rcu_read_unlock();
537
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200538 t = wait_event_timeout(connection->ping_wait, connection->cstate < C_WF_REPORT_PARAMS, t);
Philipp Reisnerb66623e2012-08-08 21:19:09 +0200539
Philipp Reisner599377a2012-08-17 14:50:22 +0200540 if (t)
541 goto out;
542 }
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200543 drbd_info(connection, "sock was shut down by peer\n");
Philipp Reisner599377a2012-08-17 14:50:22 +0200544 }
545
Philipp Reisnerb411b362009-09-25 16:07:19 -0700546 if (rv != size)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200547 conn_request_state(connection, NS(conn, C_BROKEN_PIPE), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700548
Philipp Reisner599377a2012-08-17 14:50:22 +0200549out:
Philipp Reisnerb411b362009-09-25 16:07:19 -0700550 return rv;
551}
552
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200553static int drbd_recv_all(struct drbd_connection *connection, void *buf, size_t size)
Andreas Gruenbacherc6967742011-03-17 17:15:20 +0100554{
555 int err;
556
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200557 err = drbd_recv(connection, buf, size);
Andreas Gruenbacherc6967742011-03-17 17:15:20 +0100558 if (err != size) {
559 if (err >= 0)
560 err = -EIO;
561 } else
562 err = 0;
563 return err;
564}
565
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200566static int drbd_recv_all_warn(struct drbd_connection *connection, void *buf, size_t size)
Andreas Gruenbachera5c31902011-03-24 03:28:04 +0100567{
568 int err;
569
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200570 err = drbd_recv_all(connection, buf, size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +0100571 if (err && !signal_pending(current))
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200572 drbd_warn(connection, "short read (expected size %d)\n", (int)size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +0100573 return err;
574}
575
Lars Ellenberg5dbf1672010-05-25 16:18:01 +0200576/* quoting tcp(7):
577 * On individual connections, the socket buffer size must be set prior to the
578 * listen(2) or connect(2) calls in order to have it take effect.
579 * This is our wrapper to do so.
580 */
581static void drbd_setbufsize(struct socket *sock, unsigned int snd,
582 unsigned int rcv)
583{
584 /* open coded SO_SNDBUF, SO_RCVBUF */
585 if (snd) {
586 sock->sk->sk_sndbuf = snd;
587 sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
588 }
589 if (rcv) {
590 sock->sk->sk_rcvbuf = rcv;
591 sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
592 }
593}
594
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200595static struct socket *drbd_try_connect(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700596{
597 const char *what;
598 struct socket *sock;
599 struct sockaddr_in6 src_in6;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200600 struct sockaddr_in6 peer_in6;
601 struct net_conf *nc;
602 int err, peer_addr_len, my_addr_len;
Andreas Gruenbacher69ef82d2011-05-11 14:34:35 +0200603 int sndbuf_size, rcvbuf_size, connect_int;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700604 int disconnect_on_error = 1;
605
Philipp Reisner44ed1672011-04-19 17:10:19 +0200606 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200607 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200608 if (!nc) {
609 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700610 return NULL;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200611 }
Philipp Reisner44ed1672011-04-19 17:10:19 +0200612 sndbuf_size = nc->sndbuf_size;
613 rcvbuf_size = nc->rcvbuf_size;
Andreas Gruenbacher69ef82d2011-05-11 14:34:35 +0200614 connect_int = nc->connect_int;
Andreas Gruenbacher089c0752011-06-14 18:28:09 +0200615 rcu_read_unlock();
Philipp Reisner44ed1672011-04-19 17:10:19 +0200616
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200617 my_addr_len = min_t(int, connection->my_addr_len, sizeof(src_in6));
618 memcpy(&src_in6, &connection->my_addr, my_addr_len);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200619
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200620 if (((struct sockaddr *)&connection->my_addr)->sa_family == AF_INET6)
Philipp Reisner44ed1672011-04-19 17:10:19 +0200621 src_in6.sin6_port = 0;
622 else
623 ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
624
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200625 peer_addr_len = min_t(int, connection->peer_addr_len, sizeof(src_in6));
626 memcpy(&peer_in6, &connection->peer_addr, peer_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700627
628 what = "sock_create_kern";
Eric W. Biedermaneeb1bd52015-05-08 21:08:05 -0500629 err = sock_create_kern(&init_net, ((struct sockaddr *)&src_in6)->sa_family,
Philipp Reisner44ed1672011-04-19 17:10:19 +0200630 SOCK_STREAM, IPPROTO_TCP, &sock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700631 if (err < 0) {
632 sock = NULL;
633 goto out;
634 }
635
636 sock->sk->sk_rcvtimeo =
Andreas Gruenbacher69ef82d2011-05-11 14:34:35 +0200637 sock->sk->sk_sndtimeo = connect_int * HZ;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200638 drbd_setbufsize(sock, sndbuf_size, rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700639
640 /* explicitly bind to the configured IP as source IP
641 * for the outgoing connections.
642 * This is needed for multihomed hosts and to be
643 * able to use lo: interfaces for drbd.
644 * Make sure to use 0 as port number, so linux selects
645 * a free one dynamically.
646 */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700647 what = "bind before connect";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200648 err = sock->ops->bind(sock, (struct sockaddr *) &src_in6, my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700649 if (err < 0)
650 goto out;
651
652 /* connect may fail, peer not yet available.
653 * stay C_WF_CONNECTION, don't go Disconnecting! */
654 disconnect_on_error = 0;
655 what = "connect";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200656 err = sock->ops->connect(sock, (struct sockaddr *) &peer_in6, peer_addr_len, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700657
658out:
659 if (err < 0) {
660 if (sock) {
661 sock_release(sock);
662 sock = NULL;
663 }
664 switch (-err) {
665 /* timeout, busy, signal pending */
666 case ETIMEDOUT: case EAGAIN: case EINPROGRESS:
667 case EINTR: case ERESTARTSYS:
668 /* peer not (yet) available, network problem */
669 case ECONNREFUSED: case ENETUNREACH:
670 case EHOSTDOWN: case EHOSTUNREACH:
671 disconnect_on_error = 0;
672 break;
673 default:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200674 drbd_err(connection, "%s failed, err = %d\n", what, err);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700675 }
676 if (disconnect_on_error)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200677 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700678 }
Philipp Reisner44ed1672011-04-19 17:10:19 +0200679
Philipp Reisnerb411b362009-09-25 16:07:19 -0700680 return sock;
681}
682
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200683struct accept_wait_data {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200684 struct drbd_connection *connection;
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200685 struct socket *s_listen;
686 struct completion door_bell;
687 void (*original_sk_state_change)(struct sock *sk);
688
689};
690
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200691static void drbd_incoming_connection(struct sock *sk)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700692{
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200693 struct accept_wait_data *ad = sk->sk_user_data;
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200694 void (*state_change)(struct sock *sk);
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200695
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200696 state_change = ad->original_sk_state_change;
697 if (sk->sk_state == TCP_ESTABLISHED)
698 complete(&ad->door_bell);
699 state_change(sk);
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200700}
701
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200702static int prepare_listen_socket(struct drbd_connection *connection, struct accept_wait_data *ad)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700703{
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200704 int err, sndbuf_size, rcvbuf_size, my_addr_len;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200705 struct sockaddr_in6 my_addr;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200706 struct socket *s_listen;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200707 struct net_conf *nc;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700708 const char *what;
709
Philipp Reisner44ed1672011-04-19 17:10:19 +0200710 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200711 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200712 if (!nc) {
713 rcu_read_unlock();
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200714 return -EIO;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200715 }
Philipp Reisner44ed1672011-04-19 17:10:19 +0200716 sndbuf_size = nc->sndbuf_size;
717 rcvbuf_size = nc->rcvbuf_size;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200718 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700719
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200720 my_addr_len = min_t(int, connection->my_addr_len, sizeof(struct sockaddr_in6));
721 memcpy(&my_addr, &connection->my_addr, my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700722
723 what = "sock_create_kern";
Eric W. Biedermaneeb1bd52015-05-08 21:08:05 -0500724 err = sock_create_kern(&init_net, ((struct sockaddr *)&my_addr)->sa_family,
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200725 SOCK_STREAM, IPPROTO_TCP, &s_listen);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700726 if (err) {
727 s_listen = NULL;
728 goto out;
729 }
730
Philipp Reisner98683652012-11-09 14:18:43 +0100731 s_listen->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
Philipp Reisner44ed1672011-04-19 17:10:19 +0200732 drbd_setbufsize(s_listen, sndbuf_size, rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700733
734 what = "bind before listen";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200735 err = s_listen->ops->bind(s_listen, (struct sockaddr *)&my_addr, my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700736 if (err < 0)
737 goto out;
738
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200739 ad->s_listen = s_listen;
740 write_lock_bh(&s_listen->sk->sk_callback_lock);
741 ad->original_sk_state_change = s_listen->sk->sk_state_change;
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200742 s_listen->sk->sk_state_change = drbd_incoming_connection;
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200743 s_listen->sk->sk_user_data = ad;
744 write_unlock_bh(&s_listen->sk->sk_callback_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700745
Philipp Reisner2820fd32012-07-12 10:22:48 +0200746 what = "listen";
747 err = s_listen->ops->listen(s_listen, 5);
748 if (err < 0)
749 goto out;
750
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200751 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700752out:
753 if (s_listen)
754 sock_release(s_listen);
755 if (err < 0) {
756 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200757 drbd_err(connection, "%s failed, err = %d\n", what, err);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200758 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700759 }
760 }
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200761
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200762 return -EIO;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200763}
764
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200765static void unregister_state_change(struct sock *sk, struct accept_wait_data *ad)
766{
767 write_lock_bh(&sk->sk_callback_lock);
768 sk->sk_state_change = ad->original_sk_state_change;
769 sk->sk_user_data = NULL;
770 write_unlock_bh(&sk->sk_callback_lock);
771}
772
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200773static struct socket *drbd_wait_for_connect(struct drbd_connection *connection, struct accept_wait_data *ad)
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200774{
775 int timeo, connect_int, err = 0;
776 struct socket *s_estab = NULL;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200777 struct net_conf *nc;
778
779 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200780 nc = rcu_dereference(connection->net_conf);
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200781 if (!nc) {
782 rcu_read_unlock();
783 return NULL;
784 }
785 connect_int = nc->connect_int;
786 rcu_read_unlock();
787
788 timeo = connect_int * HZ;
Akinobu Mita38b682b22013-04-29 16:21:31 -0700789 /* 28.5% random jitter */
790 timeo += (prandom_u32() & 1) ? timeo / 7 : -timeo / 7;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200791
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200792 err = wait_for_completion_interruptible_timeout(&ad->door_bell, timeo);
793 if (err <= 0)
794 return NULL;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200795
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200796 err = kernel_accept(ad->s_listen, &s_estab, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700797 if (err < 0) {
798 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200799 drbd_err(connection, "accept failed, err = %d\n", err);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200800 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700801 }
802 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700803
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200804 if (s_estab)
805 unregister_state_change(s_estab->sk, ad);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700806
807 return s_estab;
808}
809
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200810static int decode_header(struct drbd_connection *, void *, struct packet_info *);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700811
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200812static int send_first_packet(struct drbd_connection *connection, struct drbd_socket *sock,
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200813 enum drbd_packet cmd)
814{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200815 if (!conn_prepare_command(connection, sock))
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200816 return -EIO;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200817 return conn_send_command(connection, sock, cmd, 0, NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700818}
819
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200820static int receive_first_packet(struct drbd_connection *connection, struct socket *sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700821{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200822 unsigned int header_size = drbd_header_size(connection);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200823 struct packet_info pi;
Philipp Reisner4920e372014-03-18 14:40:13 +0100824 struct net_conf *nc;
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200825 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700826
Philipp Reisner4920e372014-03-18 14:40:13 +0100827 rcu_read_lock();
828 nc = rcu_dereference(connection->net_conf);
829 if (!nc) {
830 rcu_read_unlock();
831 return -EIO;
832 }
833 sock->sk->sk_rcvtimeo = nc->ping_timeo * 4 * HZ / 10;
834 rcu_read_unlock();
835
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200836 err = drbd_recv_short(sock, connection->data.rbuf, header_size, 0);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200837 if (err != header_size) {
838 if (err >= 0)
839 err = -EIO;
840 return err;
841 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200842 err = decode_header(connection, connection->data.rbuf, &pi);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200843 if (err)
844 return err;
845 return pi.cmd;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700846}
847
848/**
849 * drbd_socket_okay() - Free the socket if its connection is not okay
Philipp Reisnerb411b362009-09-25 16:07:19 -0700850 * @sock: pointer to the pointer to the socket.
851 */
Philipp Reisner5d0b17f2014-03-18 14:24:35 +0100852static bool drbd_socket_okay(struct socket **sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700853{
854 int rr;
855 char tb[4];
856
857 if (!*sock)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100858 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700859
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100860 rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700861
862 if (rr > 0 || rr == -EAGAIN) {
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100863 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700864 } else {
865 sock_release(*sock);
866 *sock = NULL;
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100867 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700868 }
869}
Philipp Reisner5d0b17f2014-03-18 14:24:35 +0100870
871static bool connection_established(struct drbd_connection *connection,
872 struct socket **sock1,
873 struct socket **sock2)
874{
875 struct net_conf *nc;
876 int timeout;
877 bool ok;
878
879 if (!*sock1 || !*sock2)
880 return false;
881
882 rcu_read_lock();
883 nc = rcu_dereference(connection->net_conf);
884 timeout = (nc->sock_check_timeo ?: nc->ping_timeo) * HZ / 10;
885 rcu_read_unlock();
886 schedule_timeout_interruptible(timeout);
887
888 ok = drbd_socket_okay(sock1);
889 ok = drbd_socket_okay(sock2) && ok;
890
891 return ok;
892}
893
Philipp Reisner2325eb62011-03-15 16:56:18 +0100894/* Gets called if a connection is established, or if a new minor gets created
895 in a connection */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200896int drbd_connected(struct drbd_peer_device *peer_device)
Philipp Reisner907599e2011-02-08 11:25:37 +0100897{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200898 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100899 int err;
Philipp Reisner907599e2011-02-08 11:25:37 +0100900
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200901 atomic_set(&device->packet_seq, 0);
902 device->peer_seq = 0;
Philipp Reisner907599e2011-02-08 11:25:37 +0100903
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200904 device->state_mutex = peer_device->connection->agreed_pro_version < 100 ?
905 &peer_device->connection->cstate_mutex :
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200906 &device->own_state_mutex;
Philipp Reisner8410da82011-02-11 20:11:10 +0100907
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200908 err = drbd_send_sync_param(peer_device);
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100909 if (!err)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200910 err = drbd_send_sizes(peer_device, 0, 0);
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100911 if (!err)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200912 err = drbd_send_uuids(peer_device);
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100913 if (!err)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200914 err = drbd_send_current_state(peer_device);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200915 clear_bit(USE_DEGR_WFC_T, &device->flags);
916 clear_bit(RESIZE_PENDING, &device->flags);
917 atomic_set(&device->ap_in_flight, 0);
918 mod_timer(&device->request_timer, jiffies + HZ); /* just start it here. */
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100919 return err;
Philipp Reisner907599e2011-02-08 11:25:37 +0100920}
Philipp Reisnerb411b362009-09-25 16:07:19 -0700921
922/*
923 * return values:
924 * 1 yes, we have a valid connection
925 * 0 oops, did not work out, please try again
926 * -1 peer talks different language,
927 * no point in trying again, please go standalone.
928 * -2 We do not have a network config...
929 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200930static int conn_connect(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700931{
Philipp Reisner7da35862011-12-19 22:42:56 +0100932 struct drbd_socket sock, msock;
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +0200933 struct drbd_peer_device *peer_device;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200934 struct net_conf *nc;
Philipp Reisner5d0b17f2014-03-18 14:24:35 +0100935 int vnr, timeout, h;
936 bool discard_my_data, ok;
Philipp Reisner197296f2012-03-26 16:47:11 +0200937 enum drbd_state_rv rv;
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200938 struct accept_wait_data ad = {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200939 .connection = connection,
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200940 .door_bell = COMPLETION_INITIALIZER_ONSTACK(ad.door_bell),
941 };
Philipp Reisnerb411b362009-09-25 16:07:19 -0700942
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200943 clear_bit(DISCONNECT_SENT, &connection->flags);
944 if (conn_request_state(connection, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700945 return -2;
946
Philipp Reisner7da35862011-12-19 22:42:56 +0100947 mutex_init(&sock.mutex);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200948 sock.sbuf = connection->data.sbuf;
949 sock.rbuf = connection->data.rbuf;
Philipp Reisner7da35862011-12-19 22:42:56 +0100950 sock.socket = NULL;
951 mutex_init(&msock.mutex);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200952 msock.sbuf = connection->meta.sbuf;
953 msock.rbuf = connection->meta.rbuf;
Philipp Reisner7da35862011-12-19 22:42:56 +0100954 msock.socket = NULL;
955
Andreas Gruenbacher0916e0e2011-03-21 14:10:15 +0100956 /* Assume that the peer only understands protocol 80 until we know better. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200957 connection->agreed_pro_version = 80;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700958
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200959 if (prepare_listen_socket(connection, &ad))
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200960 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700961
962 do {
Andreas Gruenbacher2bf89622011-03-28 16:33:12 +0200963 struct socket *s;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700964
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200965 s = drbd_try_connect(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700966 if (s) {
Philipp Reisner7da35862011-12-19 22:42:56 +0100967 if (!sock.socket) {
968 sock.socket = s;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200969 send_first_packet(connection, &sock, P_INITIAL_DATA);
Philipp Reisner7da35862011-12-19 22:42:56 +0100970 } else if (!msock.socket) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200971 clear_bit(RESOLVE_CONFLICTS, &connection->flags);
Philipp Reisner7da35862011-12-19 22:42:56 +0100972 msock.socket = s;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200973 send_first_packet(connection, &msock, P_INITIAL_META);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700974 } else {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200975 drbd_err(connection, "Logic error in conn_connect()\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700976 goto out_release_sockets;
977 }
978 }
979
Philipp Reisner5d0b17f2014-03-18 14:24:35 +0100980 if (connection_established(connection, &sock.socket, &msock.socket))
981 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700982
983retry:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200984 s = drbd_wait_for_connect(connection, &ad);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700985 if (s) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200986 int fp = receive_first_packet(connection, s);
Philipp Reisner7da35862011-12-19 22:42:56 +0100987 drbd_socket_okay(&sock.socket);
988 drbd_socket_okay(&msock.socket);
Philipp Reisner92f14952012-08-01 11:41:01 +0200989 switch (fp) {
Andreas Gruenbachere5d6f332011-03-28 16:44:40 +0200990 case P_INITIAL_DATA:
Philipp Reisner7da35862011-12-19 22:42:56 +0100991 if (sock.socket) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200992 drbd_warn(connection, "initial packet S crossed\n");
Philipp Reisner7da35862011-12-19 22:42:56 +0100993 sock_release(sock.socket);
Philipp Reisner80c6eed2012-08-01 14:53:39 +0200994 sock.socket = s;
995 goto randomize;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700996 }
Philipp Reisner7da35862011-12-19 22:42:56 +0100997 sock.socket = s;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700998 break;
Andreas Gruenbachere5d6f332011-03-28 16:44:40 +0200999 case P_INITIAL_META:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001000 set_bit(RESOLVE_CONFLICTS, &connection->flags);
Philipp Reisner7da35862011-12-19 22:42:56 +01001001 if (msock.socket) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001002 drbd_warn(connection, "initial packet M crossed\n");
Philipp Reisner7da35862011-12-19 22:42:56 +01001003 sock_release(msock.socket);
Philipp Reisner80c6eed2012-08-01 14:53:39 +02001004 msock.socket = s;
1005 goto randomize;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001006 }
Philipp Reisner7da35862011-12-19 22:42:56 +01001007 msock.socket = s;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001008 break;
1009 default:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001010 drbd_warn(connection, "Error receiving initial packet\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001011 sock_release(s);
Philipp Reisner80c6eed2012-08-01 14:53:39 +02001012randomize:
Akinobu Mita38b682b22013-04-29 16:21:31 -07001013 if (prandom_u32() & 1)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001014 goto retry;
1015 }
1016 }
1017
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001018 if (connection->cstate <= C_DISCONNECTING)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001019 goto out_release_sockets;
1020 if (signal_pending(current)) {
1021 flush_signals(current);
1022 smp_rmb();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001023 if (get_t_state(&connection->receiver) == EXITING)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001024 goto out_release_sockets;
1025 }
1026
Philipp Reisner5d0b17f2014-03-18 14:24:35 +01001027 ok = connection_established(connection, &sock.socket, &msock.socket);
Philipp Reisnerb666dbf2012-07-26 14:12:59 +02001028 } while (!ok);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001029
Philipp Reisner7a426fd2012-07-12 14:22:37 +02001030 if (ad.s_listen)
1031 sock_release(ad.s_listen);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001032
Philipp Reisner98683652012-11-09 14:18:43 +01001033 sock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
1034 msock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001035
Philipp Reisner7da35862011-12-19 22:42:56 +01001036 sock.socket->sk->sk_allocation = GFP_NOIO;
1037 msock.socket->sk->sk_allocation = GFP_NOIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001038
Philipp Reisner7da35862011-12-19 22:42:56 +01001039 sock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
1040 msock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001041
Philipp Reisnerb411b362009-09-25 16:07:19 -07001042 /* NOT YET ...
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001043 * sock.socket->sk->sk_sndtimeo = connection->net_conf->timeout*HZ/10;
Philipp Reisner7da35862011-12-19 22:42:56 +01001044 * sock.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
Andreas Gruenbacher60381782011-03-28 17:05:50 +02001045 * first set it to the P_CONNECTION_FEATURES timeout,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001046 * which we set to 4x the configured ping_timeout. */
Philipp Reisner44ed1672011-04-19 17:10:19 +02001047 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001048 nc = rcu_dereference(connection->net_conf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001049
Philipp Reisner7da35862011-12-19 22:42:56 +01001050 sock.socket->sk->sk_sndtimeo =
1051 sock.socket->sk->sk_rcvtimeo = nc->ping_timeo*4*HZ/10;
Philipp Reisner44ed1672011-04-19 17:10:19 +02001052
Philipp Reisner7da35862011-12-19 22:42:56 +01001053 msock.socket->sk->sk_rcvtimeo = nc->ping_int*HZ;
Philipp Reisner44ed1672011-04-19 17:10:19 +02001054 timeout = nc->timeout * HZ / 10;
Philipp Reisner08b165b2011-09-05 16:22:33 +02001055 discard_my_data = nc->discard_my_data;
Philipp Reisner44ed1672011-04-19 17:10:19 +02001056 rcu_read_unlock();
1057
Philipp Reisner7da35862011-12-19 22:42:56 +01001058 msock.socket->sk->sk_sndtimeo = timeout;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001059
1060 /* we don't want delays.
Lucas De Marchi25985ed2011-03-30 22:57:33 -03001061 * we use TCP_CORK where appropriate, though */
Philipp Reisner7da35862011-12-19 22:42:56 +01001062 drbd_tcp_nodelay(sock.socket);
1063 drbd_tcp_nodelay(msock.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001064
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001065 connection->data.socket = sock.socket;
1066 connection->meta.socket = msock.socket;
1067 connection->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001068
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001069 h = drbd_do_features(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001070 if (h <= 0)
1071 return h;
1072
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001073 if (connection->cram_hmac_tfm) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001074 /* drbd_request_state(device, NS(conn, WFAuth)); */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001075 switch (drbd_do_auth(connection)) {
Johannes Thomab10d96c2010-01-07 16:02:50 +01001076 case -1:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001077 drbd_err(connection, "Authentication of peer failed\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001078 return -1;
Johannes Thomab10d96c2010-01-07 16:02:50 +01001079 case 0:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001080 drbd_err(connection, "Authentication of peer failed, trying again.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01001081 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001082 }
1083 }
1084
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001085 connection->data.socket->sk->sk_sndtimeo = timeout;
1086 connection->data.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001087
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001088 if (drbd_send_protocol(connection) == -EOPNOTSUPP)
Philipp Reisner7e2455c2010-04-22 14:50:23 +02001089 return -1;
Philipp Reisner1e86ac42011-08-04 10:33:08 +02001090
Philipp Reisner31007742014-04-28 18:43:12 +02001091 /* Prevent a race between resync-handshake and
1092 * being promoted to Primary.
1093 *
1094 * Grab and release the state mutex, so we know that any current
1095 * drbd_set_role() is finished, and any incoming drbd_set_role
1096 * will see the STATE_SENT flag, and wait for it to be cleared.
1097 */
1098 idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
1099 mutex_lock(peer_device->device->state_mutex);
1100
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001101 set_bit(STATE_SENT, &connection->flags);
Philipp Reisner197296f2012-03-26 16:47:11 +02001102
Philipp Reisner31007742014-04-28 18:43:12 +02001103 idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
1104 mutex_unlock(peer_device->device->state_mutex);
1105
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02001106 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001107 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1108 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001109 kref_get(&device->kref);
Andreas Gruenbacher26ea8f92013-06-25 16:50:03 +02001110 rcu_read_unlock();
1111
Philipp Reisner08b165b2011-09-05 16:22:33 +02001112 if (discard_my_data)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001113 set_bit(DISCARD_MY_DATA, &device->flags);
Philipp Reisner08b165b2011-09-05 16:22:33 +02001114 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001115 clear_bit(DISCARD_MY_DATA, &device->flags);
Philipp Reisner08b165b2011-09-05 16:22:33 +02001116
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001117 drbd_connected(peer_device);
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001118 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02001119 rcu_read_lock();
1120 }
1121 rcu_read_unlock();
1122
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001123 rv = conn_request_state(connection, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE);
1124 if (rv < SS_SUCCESS || connection->cstate != C_WF_REPORT_PARAMS) {
1125 clear_bit(STATE_SENT, &connection->flags);
Philipp Reisner1e86ac42011-08-04 10:33:08 +02001126 return 0;
Philipp Reisnera1096a62012-04-06 12:07:34 +02001127 }
Philipp Reisner1e86ac42011-08-04 10:33:08 +02001128
Philipp Reisner1c03e522015-03-16 15:01:00 +01001129 drbd_thread_start(&connection->ack_receiver);
Lars Ellenberg39e91a62015-03-24 10:40:26 +01001130 /* opencoded create_singlethread_workqueue(),
1131 * to be able to use format string arguments */
1132 connection->ack_sender =
1133 alloc_ordered_workqueue("drbd_as_%s", WQ_MEM_RECLAIM, connection->resource->name);
Philipp Reisner668700b2015-03-16 16:08:29 +01001134 if (!connection->ack_sender) {
1135 drbd_err(connection, "Failed to create workqueue ack_sender\n");
1136 return 0;
1137 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001138
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001139 mutex_lock(&connection->resource->conf_update);
Philipp Reisner08b165b2011-09-05 16:22:33 +02001140 /* The discard_my_data flag is a single-shot modifier to the next
1141 * connection attempt, the handshake of which is now well underway.
1142 * No need for rcu style copying of the whole struct
1143 * just to clear a single value. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001144 connection->net_conf->discard_my_data = 0;
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001145 mutex_unlock(&connection->resource->conf_update);
Philipp Reisner08b165b2011-09-05 16:22:33 +02001146
Philipp Reisnerd3fcb492011-04-13 14:46:05 -07001147 return h;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001148
1149out_release_sockets:
Philipp Reisner7a426fd2012-07-12 14:22:37 +02001150 if (ad.s_listen)
1151 sock_release(ad.s_listen);
Philipp Reisner7da35862011-12-19 22:42:56 +01001152 if (sock.socket)
1153 sock_release(sock.socket);
1154 if (msock.socket)
1155 sock_release(msock.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001156 return -1;
1157}
1158
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001159static int decode_header(struct drbd_connection *connection, void *header, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001160{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001161 unsigned int header_size = drbd_header_size(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001162
Andreas Gruenbacher0c8e36d2011-03-30 16:00:17 +02001163 if (header_size == sizeof(struct p_header100) &&
1164 *(__be32 *)header == cpu_to_be32(DRBD_MAGIC_100)) {
1165 struct p_header100 *h = header;
1166 if (h->pad != 0) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001167 drbd_err(connection, "Header padding is not zero\n");
Andreas Gruenbacher0c8e36d2011-03-30 16:00:17 +02001168 return -EINVAL;
1169 }
1170 pi->vnr = be16_to_cpu(h->volume);
1171 pi->cmd = be16_to_cpu(h->command);
1172 pi->size = be32_to_cpu(h->length);
1173 } else if (header_size == sizeof(struct p_header95) &&
1174 *(__be16 *)header == cpu_to_be16(DRBD_MAGIC_BIG)) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001175 struct p_header95 *h = header;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001176 pi->cmd = be16_to_cpu(h->command);
Andreas Gruenbacherb55d84b2011-03-22 13:17:47 +01001177 pi->size = be32_to_cpu(h->length);
1178 pi->vnr = 0;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001179 } else if (header_size == sizeof(struct p_header80) &&
1180 *(__be32 *)header == cpu_to_be32(DRBD_MAGIC)) {
1181 struct p_header80 *h = header;
1182 pi->cmd = be16_to_cpu(h->command);
1183 pi->size = be16_to_cpu(h->length);
Philipp Reisner77351055b2011-02-07 17:24:26 +01001184 pi->vnr = 0;
Philipp Reisner02918be2010-08-20 14:35:10 +02001185 } else {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001186 drbd_err(connection, "Wrong magic value 0x%08x in protocol version %d\n",
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001187 be32_to_cpu(*(__be32 *)header),
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001188 connection->agreed_pro_version);
Andreas Gruenbacher8172f3e2011-03-16 17:22:39 +01001189 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001190 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001191 pi->data = header + header_size;
Andreas Gruenbacher8172f3e2011-03-16 17:22:39 +01001192 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001193}
1194
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001195static int drbd_recv_header(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisner257d0af2011-01-26 12:15:29 +01001196{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001197 void *buffer = connection->data.rbuf;
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001198 int err;
Philipp Reisner257d0af2011-01-26 12:15:29 +01001199
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001200 err = drbd_recv_all_warn(connection, buffer, drbd_header_size(connection));
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001201 if (err)
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001202 return err;
Philipp Reisner257d0af2011-01-26 12:15:29 +01001203
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001204 err = decode_header(connection, buffer, pi);
1205 connection->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001206
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001207 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001208}
1209
Lars Ellenbergf9ff0da2016-06-14 00:26:19 +02001210/* This is blkdev_issue_flush, but asynchronous.
1211 * We want to submit to all component volumes in parallel,
1212 * then wait for all completions.
1213 */
1214struct issue_flush_context {
1215 atomic_t pending;
1216 int error;
1217 struct completion done;
1218};
1219struct one_flush_context {
1220 struct drbd_device *device;
1221 struct issue_flush_context *ctx;
1222};
1223
1224void one_flush_endio(struct bio *bio)
1225{
1226 struct one_flush_context *octx = bio->bi_private;
1227 struct drbd_device *device = octx->device;
1228 struct issue_flush_context *ctx = octx->ctx;
1229
1230 if (bio->bi_error) {
1231 ctx->error = bio->bi_error;
1232 drbd_info(device, "local disk FLUSH FAILED with status %d\n", bio->bi_error);
1233 }
1234 kfree(octx);
1235 bio_put(bio);
1236
1237 clear_bit(FLUSH_PENDING, &device->flags);
1238 put_ldev(device);
1239 kref_put(&device->kref, drbd_destroy_device);
1240
1241 if (atomic_dec_and_test(&ctx->pending))
1242 complete(&ctx->done);
1243}
1244
1245static void submit_one_flush(struct drbd_device *device, struct issue_flush_context *ctx)
1246{
1247 struct bio *bio = bio_alloc(GFP_NOIO, 0);
1248 struct one_flush_context *octx = kmalloc(sizeof(*octx), GFP_NOIO);
1249 if (!bio || !octx) {
1250 drbd_warn(device, "Could not allocate a bio, CANNOT ISSUE FLUSH\n");
1251 /* FIXME: what else can I do now? disconnecting or detaching
1252 * really does not help to improve the state of the world, either.
1253 */
1254 kfree(octx);
1255 if (bio)
1256 bio_put(bio);
1257
1258 ctx->error = -ENOMEM;
1259 put_ldev(device);
1260 kref_put(&device->kref, drbd_destroy_device);
1261 return;
1262 }
1263
1264 octx->device = device;
1265 octx->ctx = ctx;
1266 bio->bi_bdev = device->ldev->backing_bdev;
1267 bio->bi_private = octx;
1268 bio->bi_end_io = one_flush_endio;
1269 bio_set_op_attrs(bio, REQ_OP_FLUSH, WRITE_FLUSH);
1270
1271 device->flush_jif = jiffies;
1272 set_bit(FLUSH_PENDING, &device->flags);
1273 atomic_inc(&ctx->pending);
1274 submit_bio(bio);
1275}
1276
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001277static void drbd_flush(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001278{
Andreas Gruenbacherf6ba8632014-08-13 18:33:55 +02001279 if (connection->resource->write_ordering >= WO_BDEV_FLUSH) {
Lars Ellenbergf9ff0da2016-06-14 00:26:19 +02001280 struct drbd_peer_device *peer_device;
1281 struct issue_flush_context ctx;
1282 int vnr;
1283
1284 atomic_set(&ctx.pending, 1);
1285 ctx.error = 0;
1286 init_completion(&ctx.done);
1287
Lars Ellenberg615e0872011-11-17 14:32:12 +01001288 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001289 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1290 struct drbd_device *device = peer_device->device;
1291
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001292 if (!get_ldev(device))
Lars Ellenberg615e0872011-11-17 14:32:12 +01001293 continue;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001294 kref_get(&device->kref);
Lars Ellenberg615e0872011-11-17 14:32:12 +01001295 rcu_read_unlock();
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001296
Lars Ellenbergf9ff0da2016-06-14 00:26:19 +02001297 submit_one_flush(device, &ctx);
Lars Ellenberg615e0872011-11-17 14:32:12 +01001298
1299 rcu_read_lock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07001300 }
Lars Ellenberg615e0872011-11-17 14:32:12 +01001301 rcu_read_unlock();
Lars Ellenbergf9ff0da2016-06-14 00:26:19 +02001302
1303 /* Do we want to add a timeout,
1304 * if disk-timeout is set? */
1305 if (!atomic_dec_and_test(&ctx.pending))
1306 wait_for_completion(&ctx.done);
1307
1308 if (ctx.error) {
1309 /* would rather check on EOPNOTSUPP, but that is not reliable.
1310 * don't try again for ANY return value != 0
1311 * if (rv == -EOPNOTSUPP) */
1312 /* Any error is already reported by bio_endio callback. */
1313 drbd_bump_write_ordering(connection->resource, NULL, WO_DRAIN_IO);
1314 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001315 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001316}
1317
1318/**
1319 * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it.
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001320 * @device: DRBD device.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001321 * @epoch: Epoch object.
1322 * @ev: Epoch event.
1323 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001324static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *connection,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001325 struct drbd_epoch *epoch,
1326 enum epoch_event ev)
1327{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001328 int epoch_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001329 struct drbd_epoch *next_epoch;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001330 enum finish_epoch rv = FE_STILL_LIVE;
1331
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001332 spin_lock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001333 do {
1334 next_epoch = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001335
1336 epoch_size = atomic_read(&epoch->epoch_size);
1337
1338 switch (ev & ~EV_CLEANUP) {
1339 case EV_PUT:
1340 atomic_dec(&epoch->active);
1341 break;
1342 case EV_GOT_BARRIER_NR:
1343 set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001344 break;
1345 case EV_BECAME_LAST:
1346 /* nothing to do*/
1347 break;
1348 }
1349
Philipp Reisnerb411b362009-09-25 16:07:19 -07001350 if (epoch_size != 0 &&
1351 atomic_read(&epoch->active) == 0 &&
Philipp Reisner80f9fd52011-07-18 15:45:15 +02001352 (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) || ev & EV_CLEANUP)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001353 if (!(ev & EV_CLEANUP)) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001354 spin_unlock(&connection->epoch_lock);
1355 drbd_send_b_ack(epoch->connection, epoch->barrier_nr, epoch_size);
1356 spin_lock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001357 }
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001358#if 0
1359 /* FIXME: dec unacked on connection, once we have
1360 * something to count pending connection packets in. */
Philipp Reisner80f9fd52011-07-18 15:45:15 +02001361 if (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags))
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001362 dec_unacked(epoch->connection);
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001363#endif
Philipp Reisnerb411b362009-09-25 16:07:19 -07001364
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001365 if (connection->current_epoch != epoch) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001366 next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
1367 list_del(&epoch->list);
1368 ev = EV_BECAME_LAST | (ev & EV_CLEANUP);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001369 connection->epochs--;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001370 kfree(epoch);
1371
1372 if (rv == FE_STILL_LIVE)
1373 rv = FE_DESTROYED;
1374 } else {
1375 epoch->flags = 0;
1376 atomic_set(&epoch->epoch_size, 0);
Uwe Kleine-König698f9312010-07-02 20:41:51 +02001377 /* atomic_set(&epoch->active, 0); is already zero */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001378 if (rv == FE_STILL_LIVE)
1379 rv = FE_RECYCLED;
1380 }
1381 }
1382
1383 if (!next_epoch)
1384 break;
1385
1386 epoch = next_epoch;
1387 } while (1);
1388
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001389 spin_unlock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001390
Philipp Reisnerb411b362009-09-25 16:07:19 -07001391 return rv;
1392}
1393
Philipp Reisner8fe39aa2013-11-22 13:22:13 +01001394static enum write_ordering_e
1395max_allowed_wo(struct drbd_backing_dev *bdev, enum write_ordering_e wo)
1396{
1397 struct disk_conf *dc;
1398
1399 dc = rcu_dereference(bdev->disk_conf);
1400
Andreas Gruenbacherf6ba8632014-08-13 18:33:55 +02001401 if (wo == WO_BDEV_FLUSH && !dc->disk_flushes)
1402 wo = WO_DRAIN_IO;
1403 if (wo == WO_DRAIN_IO && !dc->disk_drain)
1404 wo = WO_NONE;
Philipp Reisner8fe39aa2013-11-22 13:22:13 +01001405
1406 return wo;
1407}
1408
Philipp Reisnerb411b362009-09-25 16:07:19 -07001409/**
1410 * drbd_bump_write_ordering() - Fall back to an other write ordering method
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001411 * @connection: DRBD connection.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001412 * @wo: Write ordering method to try.
1413 */
Philipp Reisner8fe39aa2013-11-22 13:22:13 +01001414void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backing_dev *bdev,
1415 enum write_ordering_e wo)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001416{
Philipp Reisnere9526582013-11-22 15:53:41 +01001417 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001418 enum write_ordering_e pwo;
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001419 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001420 static char *write_ordering_str[] = {
Andreas Gruenbacherf6ba8632014-08-13 18:33:55 +02001421 [WO_NONE] = "none",
1422 [WO_DRAIN_IO] = "drain",
1423 [WO_BDEV_FLUSH] = "flush",
Philipp Reisnerb411b362009-09-25 16:07:19 -07001424 };
1425
Philipp Reisnere9526582013-11-22 15:53:41 +01001426 pwo = resource->write_ordering;
Andreas Gruenbacherf6ba8632014-08-13 18:33:55 +02001427 if (wo != WO_BDEV_FLUSH)
Lars Ellenberg70df7092013-12-20 11:17:02 +01001428 wo = min(pwo, wo);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001429 rcu_read_lock();
Philipp Reisnere9526582013-11-22 15:53:41 +01001430 idr_for_each_entry(&resource->devices, device, vnr) {
Philipp Reisner8fe39aa2013-11-22 13:22:13 +01001431 if (get_ldev(device)) {
1432 wo = max_allowed_wo(device->ldev, wo);
1433 if (device->ldev == bdev)
1434 bdev = NULL;
1435 put_ldev(device);
1436 }
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001437 }
Philipp Reisner8fe39aa2013-11-22 13:22:13 +01001438
1439 if (bdev)
1440 wo = max_allowed_wo(bdev, wo);
1441
Lars Ellenberg70df7092013-12-20 11:17:02 +01001442 rcu_read_unlock();
1443
Philipp Reisnere9526582013-11-22 15:53:41 +01001444 resource->write_ordering = wo;
Andreas Gruenbacherf6ba8632014-08-13 18:33:55 +02001445 if (pwo != resource->write_ordering || wo == WO_BDEV_FLUSH)
Philipp Reisnere9526582013-11-22 15:53:41 +01001446 drbd_info(resource, "Method to ensure write ordering: %s\n", write_ordering_str[resource->write_ordering]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001447}
1448
Lars Ellenbergdd4f6992016-06-14 00:26:20 +02001449/*
1450 * We *may* ignore the discard-zeroes-data setting, if so configured.
1451 *
1452 * Assumption is that it "discard_zeroes_data=0" is only because the backend
1453 * may ignore partial unaligned discards.
1454 *
1455 * LVM/DM thin as of at least
1456 * LVM version: 2.02.115(2)-RHEL7 (2015-01-28)
1457 * Library version: 1.02.93-RHEL7 (2015-01-28)
1458 * Driver version: 4.29.0
1459 * still behaves this way.
1460 *
1461 * For unaligned (wrt. alignment and granularity) or too small discards,
1462 * we zero-out the initial (and/or) trailing unaligned partial chunks,
1463 * but discard all the aligned full chunks.
1464 *
1465 * At least for LVM/DM thin, the result is effectively "discard_zeroes_data=1".
1466 */
1467int drbd_issue_discard_or_zero_out(struct drbd_device *device, sector_t start, unsigned int nr_sectors, bool discard)
1468{
1469 struct block_device *bdev = device->ldev->backing_bdev;
1470 struct request_queue *q = bdev_get_queue(bdev);
1471 sector_t tmp, nr;
1472 unsigned int max_discard_sectors, granularity;
1473 int alignment;
1474 int err = 0;
1475
1476 if (!discard)
1477 goto zero_out;
1478
1479 /* Zero-sector (unknown) and one-sector granularities are the same. */
1480 granularity = max(q->limits.discard_granularity >> 9, 1U);
1481 alignment = (bdev_discard_alignment(bdev) >> 9) % granularity;
1482
1483 max_discard_sectors = min(q->limits.max_discard_sectors, (1U << 22));
1484 max_discard_sectors -= max_discard_sectors % granularity;
1485 if (unlikely(!max_discard_sectors))
1486 goto zero_out;
1487
1488 if (nr_sectors < granularity)
1489 goto zero_out;
1490
1491 tmp = start;
1492 if (sector_div(tmp, granularity) != alignment) {
1493 if (nr_sectors < 2*granularity)
1494 goto zero_out;
1495 /* start + gran - (start + gran - align) % gran */
1496 tmp = start + granularity - alignment;
1497 tmp = start + granularity - sector_div(tmp, granularity);
1498
1499 nr = tmp - start;
1500 err |= blkdev_issue_zeroout(bdev, start, nr, GFP_NOIO, 0);
1501 nr_sectors -= nr;
1502 start = tmp;
1503 }
1504 while (nr_sectors >= granularity) {
1505 nr = min_t(sector_t, nr_sectors, max_discard_sectors);
1506 err |= blkdev_issue_discard(bdev, start, nr, GFP_NOIO, 0);
1507 nr_sectors -= nr;
1508 start += nr;
1509 }
1510 zero_out:
1511 if (nr_sectors) {
1512 err |= blkdev_issue_zeroout(bdev, start, nr_sectors, GFP_NOIO, 0);
1513 }
1514 return err != 0;
1515}
1516
1517static bool can_do_reliable_discards(struct drbd_device *device)
1518{
1519 struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev);
1520 struct disk_conf *dc;
1521 bool can_do;
1522
1523 if (!blk_queue_discard(q))
1524 return false;
1525
1526 if (q->limits.discard_zeroes_data)
1527 return true;
1528
1529 rcu_read_lock();
1530 dc = rcu_dereference(device->ldev->disk_conf);
1531 can_do = dc->discard_zeroes_if_aligned;
1532 rcu_read_unlock();
1533 return can_do;
1534}
1535
Lars Ellenberg9104d312016-06-14 00:26:31 +02001536static void drbd_issue_peer_discard(struct drbd_device *device, struct drbd_peer_request *peer_req)
Lars Ellenbergdd4f6992016-06-14 00:26:20 +02001537{
1538 /* If the backend cannot discard, or does not guarantee
1539 * read-back zeroes in discarded ranges, we fall back to
1540 * zero-out. Unless configuration specifically requested
1541 * otherwise. */
1542 if (!can_do_reliable_discards(device))
1543 peer_req->flags |= EE_IS_TRIM_USE_ZEROOUT;
1544
1545 if (drbd_issue_discard_or_zero_out(device, peer_req->i.sector,
1546 peer_req->i.size >> 9, !(peer_req->flags & EE_IS_TRIM_USE_ZEROOUT)))
1547 peer_req->flags |= EE_WAS_ERROR;
1548 drbd_endio_write_sec_final(peer_req);
1549}
1550
Lars Ellenberg9104d312016-06-14 00:26:31 +02001551static void drbd_issue_peer_wsame(struct drbd_device *device,
1552 struct drbd_peer_request *peer_req)
1553{
1554 struct block_device *bdev = device->ldev->backing_bdev;
1555 sector_t s = peer_req->i.sector;
1556 sector_t nr = peer_req->i.size >> 9;
1557 if (blkdev_issue_write_same(bdev, s, nr, GFP_NOIO, peer_req->pages))
1558 peer_req->flags |= EE_WAS_ERROR;
1559 drbd_endio_write_sec_final(peer_req);
1560}
1561
1562
Philipp Reisnerb411b362009-09-25 16:07:19 -07001563/**
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001564 * drbd_submit_peer_request()
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001565 * @device: DRBD device.
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001566 * @peer_req: peer request
Jens Axboe1eff9d32016-08-05 15:35:16 -06001567 * @rw: flag field, see bio->bi_opf
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001568 *
1569 * May spread the pages to multiple bios,
1570 * depending on bio_add_page restrictions.
1571 *
1572 * Returns 0 if all bios have been submitted,
1573 * -ENOMEM if we could not allocate enough bios,
1574 * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a
1575 * single page to an empty bio (which should never happen and likely indicates
1576 * that the lower level IO stack is in some way broken). This has been observed
1577 * on certain Xen deployments.
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001578 */
1579/* TODO allocate from our own bio_set. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001580int drbd_submit_peer_request(struct drbd_device *device,
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001581 struct drbd_peer_request *peer_req,
Mike Christiebb3cc852016-06-05 14:32:06 -05001582 const unsigned op, const unsigned op_flags,
1583 const int fault_type)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001584{
1585 struct bio *bios = NULL;
1586 struct bio *bio;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001587 struct page *page = peer_req->pages;
1588 sector_t sector = peer_req->i.sector;
Andreas Gruenbacher11f8b2b2014-09-11 14:29:05 +02001589 unsigned data_size = peer_req->i.size;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001590 unsigned n_bios = 0;
Andreas Gruenbacher11f8b2b2014-09-11 14:29:05 +02001591 unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001592 int err = -ENOMEM;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001593
Lars Ellenbergdd4f6992016-06-14 00:26:20 +02001594 /* TRIM/DISCARD: for now, always use the helper function
1595 * blkdev_issue_zeroout(..., discard=true).
1596 * It's synchronous, but it does the right thing wrt. bio splitting.
1597 * Correctness first, performance later. Next step is to code an
1598 * asynchronous variant of the same.
1599 */
Lars Ellenberg9104d312016-06-14 00:26:31 +02001600 if (peer_req->flags & (EE_IS_TRIM|EE_WRITE_SAME)) {
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001601 /* wait for all pending IO completions, before we start
1602 * zeroing things out. */
Andreas Gruenbacher5dd2ca12014-08-11 16:59:23 +02001603 conn_wait_active_ee_empty(peer_req->peer_device->connection);
Lars Ellenberg45d29332014-04-23 12:25:23 +02001604 /* add it to the active list now,
1605 * so we can find it to present it in debugfs */
Lars Ellenberg21ae5d72014-05-05 23:42:24 +02001606 peer_req->submit_jif = jiffies;
1607 peer_req->flags |= EE_SUBMITTED;
Philipp Reisner700ca8c2016-06-14 00:26:13 +02001608
1609 /* If this was a resync request from receive_rs_deallocated(),
1610 * it is already on the sync_ee list */
1611 if (list_empty(&peer_req->w.list)) {
1612 spin_lock_irq(&device->resource->req_lock);
1613 list_add_tail(&peer_req->w.list, &device->active_ee);
1614 spin_unlock_irq(&device->resource->req_lock);
1615 }
1616
Lars Ellenberg9104d312016-06-14 00:26:31 +02001617 if (peer_req->flags & EE_IS_TRIM)
1618 drbd_issue_peer_discard(device, peer_req);
1619 else /* EE_WRITE_SAME */
1620 drbd_issue_peer_wsame(device, peer_req);
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001621 return 0;
1622 }
1623
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001624 /* In most cases, we will only need one bio. But in case the lower
1625 * level restrictions happen to be different at this offset on this
1626 * side than those of the sending peer, we may need to submit the
Lars Ellenberg9476f392011-02-23 17:02:01 +01001627 * request in more than one bio.
1628 *
1629 * Plain bio_alloc is good enough here, this is no DRBD internally
1630 * generated bio, but a bio allocated on behalf of the peer.
1631 */
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001632next_bio:
1633 bio = bio_alloc(GFP_NOIO, nr_pages);
1634 if (!bio) {
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001635 drbd_err(device, "submit_ee: Allocation of a bio failed (nr_pages=%u)\n", nr_pages);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001636 goto fail;
1637 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001638 /* > peer_req->i.sector, unless this is the first bio */
Kent Overstreet4f024f32013-10-11 15:44:27 -07001639 bio->bi_iter.bi_sector = sector;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001640 bio->bi_bdev = device->ldev->backing_bdev;
Mike Christiebb3cc852016-06-05 14:32:06 -05001641 bio_set_op_attrs(bio, op, op_flags);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001642 bio->bi_private = peer_req;
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001643 bio->bi_end_io = drbd_peer_request_endio;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001644
1645 bio->bi_next = bios;
1646 bios = bio;
1647 ++n_bios;
1648
1649 page_chain_for_each(page) {
Andreas Gruenbacher11f8b2b2014-09-11 14:29:05 +02001650 unsigned len = min_t(unsigned, data_size, PAGE_SIZE);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001651 if (!bio_add_page(bio, page, len, 0)) {
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001652 /* A single page must always be possible!
1653 * But in case it fails anyways,
1654 * we deal with it, and complain (below). */
1655 if (bio->bi_vcnt == 0) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001656 drbd_err(device,
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001657 "bio_add_page failed for len=%u, "
1658 "bi_vcnt=0 (bi_sector=%llu)\n",
Kent Overstreet4f024f32013-10-11 15:44:27 -07001659 len, (uint64_t)bio->bi_iter.bi_sector);
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001660 err = -ENOSPC;
1661 goto fail;
1662 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001663 goto next_bio;
1664 }
Andreas Gruenbacher11f8b2b2014-09-11 14:29:05 +02001665 data_size -= len;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001666 sector += len >> 9;
1667 --nr_pages;
1668 }
Andreas Gruenbacher11f8b2b2014-09-11 14:29:05 +02001669 D_ASSERT(device, data_size == 0);
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001670 D_ASSERT(device, page == NULL);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001671
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001672 atomic_set(&peer_req->pending_bios, n_bios);
Lars Ellenberg21ae5d72014-05-05 23:42:24 +02001673 /* for debugfs: update timestamp, mark as submitted */
1674 peer_req->submit_jif = jiffies;
1675 peer_req->flags |= EE_SUBMITTED;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001676 do {
1677 bio = bios;
1678 bios = bios->bi_next;
1679 bio->bi_next = NULL;
1680
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001681 drbd_generic_make_request(device, fault_type, bio);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001682 } while (bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001683 return 0;
1684
1685fail:
1686 while (bios) {
1687 bio = bios;
1688 bios = bios->bi_next;
1689 bio_put(bio);
1690 }
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001691 return err;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001692}
1693
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001694static void drbd_remove_epoch_entry_interval(struct drbd_device *device,
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001695 struct drbd_peer_request *peer_req)
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001696{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001697 struct drbd_interval *i = &peer_req->i;
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001698
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001699 drbd_remove_interval(&device->write_requests, i);
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001700 drbd_clear_interval(i);
1701
Andreas Gruenbacher6c852be2011-02-04 15:38:52 +01001702 /* Wake up any processes waiting for this peer request to complete. */
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001703 if (i->waiting)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001704 wake_up(&device->misc_wait);
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001705}
1706
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001707static void conn_wait_active_ee_empty(struct drbd_connection *connection)
Philipp Reisner77fede52011-11-10 21:19:11 +01001708{
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001709 struct drbd_peer_device *peer_device;
Philipp Reisner77fede52011-11-10 21:19:11 +01001710 int vnr;
1711
1712 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001713 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1714 struct drbd_device *device = peer_device->device;
1715
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001716 kref_get(&device->kref);
Philipp Reisner77fede52011-11-10 21:19:11 +01001717 rcu_read_unlock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001718 drbd_wait_ee_list_empty(device, &device->active_ee);
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001719 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisner77fede52011-11-10 21:19:11 +01001720 rcu_read_lock();
1721 }
1722 rcu_read_unlock();
1723}
1724
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001725static int receive_Barrier(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001726{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001727 int rv;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001728 struct p_barrier *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001729 struct drbd_epoch *epoch;
1730
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001731 /* FIXME these are unacked on connection,
1732 * not a specific (peer)device.
1733 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001734 connection->current_epoch->barrier_nr = p->barrier;
1735 connection->current_epoch->connection = connection;
1736 rv = drbd_may_finish_epoch(connection, connection->current_epoch, EV_GOT_BARRIER_NR);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001737
1738 /* P_BARRIER_ACK may imply that the corresponding extent is dropped from
1739 * the activity log, which means it would not be resynced in case the
1740 * R_PRIMARY crashes now.
1741 * Therefore we must send the barrier_ack after the barrier request was
1742 * completed. */
Philipp Reisnere9526582013-11-22 15:53:41 +01001743 switch (connection->resource->write_ordering) {
Andreas Gruenbacherf6ba8632014-08-13 18:33:55 +02001744 case WO_NONE:
Philipp Reisnerb411b362009-09-25 16:07:19 -07001745 if (rv == FE_RECYCLED)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001746 return 0;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001747
1748 /* receiver context, in the writeout path of the other node.
1749 * avoid potential distributed deadlock */
1750 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1751 if (epoch)
1752 break;
1753 else
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001754 drbd_warn(connection, "Allocation of an epoch failed, slowing down\n");
Philipp Reisner2451fc32010-08-24 13:43:11 +02001755 /* Fall through */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001756
Andreas Gruenbacherf6ba8632014-08-13 18:33:55 +02001757 case WO_BDEV_FLUSH:
1758 case WO_DRAIN_IO:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001759 conn_wait_active_ee_empty(connection);
1760 drbd_flush(connection);
Philipp Reisner2451fc32010-08-24 13:43:11 +02001761
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001762 if (atomic_read(&connection->current_epoch->epoch_size)) {
Philipp Reisner2451fc32010-08-24 13:43:11 +02001763 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1764 if (epoch)
1765 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001766 }
1767
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001768 return 0;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001769 default:
Philipp Reisnere9526582013-11-22 15:53:41 +01001770 drbd_err(connection, "Strangeness in connection->write_ordering %d\n",
1771 connection->resource->write_ordering);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001772 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001773 }
1774
1775 epoch->flags = 0;
1776 atomic_set(&epoch->epoch_size, 0);
1777 atomic_set(&epoch->active, 0);
1778
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001779 spin_lock(&connection->epoch_lock);
1780 if (atomic_read(&connection->current_epoch->epoch_size)) {
1781 list_add(&epoch->list, &connection->current_epoch->list);
1782 connection->current_epoch = epoch;
1783 connection->epochs++;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001784 } else {
1785 /* The current_epoch got recycled while we allocated this one... */
1786 kfree(epoch);
1787 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001788 spin_unlock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001789
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001790 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001791}
1792
Lars Ellenberg9104d312016-06-14 00:26:31 +02001793/* quick wrapper in case payload size != request_size (write same) */
1794static void drbd_csum_ee_size(struct crypto_ahash *h,
1795 struct drbd_peer_request *r, void *d,
1796 unsigned int payload_size)
1797{
1798 unsigned int tmp = r->i.size;
1799 r->i.size = payload_size;
1800 drbd_csum_ee(h, r, d);
1801 r->i.size = tmp;
1802}
1803
Philipp Reisnerb411b362009-09-25 16:07:19 -07001804/* used from receive_RSDataReply (recv_resync_read)
Lars Ellenberg9104d312016-06-14 00:26:31 +02001805 * and from receive_Data.
1806 * data_size: actual payload ("data in")
1807 * for normal writes that is bi_size.
1808 * for discards, that is zero.
1809 * for write same, it is logical_block_size.
1810 * both trim and write same have the bi_size ("data len to be affected")
1811 * as extra argument in the packet header.
1812 */
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +01001813static struct drbd_peer_request *
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001814read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001815 struct packet_info *pi) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001816{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001817 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001818 const sector_t capacity = drbd_get_capacity(device->this_bdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001819 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001820 struct page *page;
Andreas Gruenbacher11f8b2b2014-09-11 14:29:05 +02001821 int digest_size, err;
1822 unsigned int data_size = pi->size, ds;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001823 void *dig_in = peer_device->connection->int_dig_in;
1824 void *dig_vv = peer_device->connection->int_dig_vv;
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001825 unsigned long *data;
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001826 struct p_trim *trim = (pi->cmd == P_TRIM) ? pi->data : NULL;
Lars Ellenberg9104d312016-06-14 00:26:31 +02001827 struct p_trim *wsame = (pi->cmd == P_WSAME) ? pi->data : NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001828
Andreas Gruenbacher11f8b2b2014-09-11 14:29:05 +02001829 digest_size = 0;
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001830 if (!trim && peer_device->connection->peer_integrity_tfm) {
Herbert Xu9534d672016-01-24 21:19:21 +08001831 digest_size = crypto_ahash_digestsize(peer_device->connection->peer_integrity_tfm);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001832 /*
1833 * FIXME: Receive the incoming digest into the receive buffer
1834 * here, together with its struct p_data?
1835 */
Andreas Gruenbacher11f8b2b2014-09-11 14:29:05 +02001836 err = drbd_recv_all_warn(peer_device->connection, dig_in, digest_size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001837 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001838 return NULL;
Andreas Gruenbacher11f8b2b2014-09-11 14:29:05 +02001839 data_size -= digest_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001840 }
1841
Lars Ellenberg9104d312016-06-14 00:26:31 +02001842 /* assume request_size == data_size, but special case trim and wsame. */
1843 ds = data_size;
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001844 if (trim) {
Lars Ellenberg9104d312016-06-14 00:26:31 +02001845 if (!expect(data_size == 0))
1846 return NULL;
1847 ds = be32_to_cpu(trim->size);
1848 } else if (wsame) {
1849 if (data_size != queue_logical_block_size(device->rq_queue)) {
1850 drbd_err(peer_device, "data size (%u) != drbd logical block size (%u)\n",
1851 data_size, queue_logical_block_size(device->rq_queue));
1852 return NULL;
1853 }
1854 if (data_size != bdev_logical_block_size(device->ldev->backing_bdev)) {
1855 drbd_err(peer_device, "data size (%u) != backend logical block size (%u)\n",
1856 data_size, bdev_logical_block_size(device->ldev->backing_bdev));
1857 return NULL;
1858 }
1859 ds = be32_to_cpu(wsame->size);
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001860 }
1861
Lars Ellenberg9104d312016-06-14 00:26:31 +02001862 if (!expect(IS_ALIGNED(ds, 512)))
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01001863 return NULL;
Lars Ellenberg9104d312016-06-14 00:26:31 +02001864 if (trim || wsame) {
1865 if (!expect(ds <= (DRBD_MAX_BBIO_SECTORS << 9)))
1866 return NULL;
1867 } else if (!expect(ds <= DRBD_MAX_BIO_SIZE))
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01001868 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001869
Lars Ellenberg66660322010-04-06 12:15:04 +02001870 /* even though we trust out peer,
1871 * we sometimes have to double check. */
Lars Ellenberg9104d312016-06-14 00:26:31 +02001872 if (sector + (ds>>9) > capacity) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001873 drbd_err(device, "request from peer beyond end of local disk: "
Lars Ellenbergfdda6542011-01-24 15:11:01 +01001874 "capacity: %llus < sector: %llus + size: %u\n",
Lars Ellenberg66660322010-04-06 12:15:04 +02001875 (unsigned long long)capacity,
Lars Ellenberg9104d312016-06-14 00:26:31 +02001876 (unsigned long long)sector, ds);
Lars Ellenberg66660322010-04-06 12:15:04 +02001877 return NULL;
1878 }
1879
Philipp Reisnerb411b362009-09-25 16:07:19 -07001880 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
1881 * "criss-cross" setup, that might cause write-out on some other DRBD,
1882 * which in turn might block on the other node at this very place. */
Lars Ellenberg9104d312016-06-14 00:26:31 +02001883 peer_req = drbd_alloc_peer_req(peer_device, id, sector, ds, data_size, GFP_NOIO);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001884 if (!peer_req)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001885 return NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001886
Lars Ellenberg21ae5d72014-05-05 23:42:24 +02001887 peer_req->flags |= EE_WRITE;
Lars Ellenberg9104d312016-06-14 00:26:31 +02001888 if (trim) {
1889 peer_req->flags |= EE_IS_TRIM;
Lars Ellenberg81a35372012-07-30 09:00:54 +02001890 return peer_req;
Lars Ellenberg9104d312016-06-14 00:26:31 +02001891 }
1892 if (wsame)
1893 peer_req->flags |= EE_WRITE_SAME;
Lars Ellenberga73ff322012-06-25 19:15:38 +02001894
Lars Ellenberg9104d312016-06-14 00:26:31 +02001895 /* receive payload size bytes into page chain */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001896 ds = data_size;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001897 page = peer_req->pages;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001898 page_chain_for_each(page) {
1899 unsigned len = min_t(int, ds, PAGE_SIZE);
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001900 data = kmap(page);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001901 err = drbd_recv_all_warn(peer_device->connection, data, len);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001902 if (drbd_insert_fault(device, DRBD_FAULT_RECEIVE)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001903 drbd_err(device, "Fault injection: Corrupting data on receive\n");
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001904 data[0] = data[0] ^ (unsigned long)-1;
1905 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001906 kunmap(page);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001907 if (err) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001908 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001909 return NULL;
1910 }
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001911 ds -= len;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001912 }
1913
Andreas Gruenbacher11f8b2b2014-09-11 14:29:05 +02001914 if (digest_size) {
Lars Ellenberg9104d312016-06-14 00:26:31 +02001915 drbd_csum_ee_size(peer_device->connection->peer_integrity_tfm, peer_req, dig_vv, data_size);
Andreas Gruenbacher11f8b2b2014-09-11 14:29:05 +02001916 if (memcmp(dig_in, dig_vv, digest_size)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001917 drbd_err(device, "Digest integrity check FAILED: %llus +%u\n",
Lars Ellenberg470be442010-11-10 10:36:52 +01001918 (unsigned long long)sector, data_size);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001919 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001920 return NULL;
1921 }
1922 }
Andreas Gruenbacher11f8b2b2014-09-11 14:29:05 +02001923 device->recv_cnt += data_size >> 9;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001924 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001925}
1926
1927/* drbd_drain_block() just takes a data block
1928 * out of the socket input buffer, and discards it.
1929 */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001930static int drbd_drain_block(struct drbd_peer_device *peer_device, int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001931{
1932 struct page *page;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001933 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001934 void *data;
1935
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001936 if (!data_size)
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001937 return 0;
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001938
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001939 page = drbd_alloc_pages(peer_device, 1, 1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001940
1941 data = kmap(page);
1942 while (data_size) {
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001943 unsigned int len = min_t(int, data_size, PAGE_SIZE);
1944
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001945 err = drbd_recv_all_warn(peer_device->connection, data, len);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001946 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001947 break;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001948 data_size -= len;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001949 }
1950 kunmap(page);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001951 drbd_free_pages(peer_device->device, page, 0);
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001952 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001953}
1954
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001955static int recv_dless_read(struct drbd_peer_device *peer_device, struct drbd_request *req,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001956 sector_t sector, int data_size)
1957{
Kent Overstreet79886132013-11-23 17:19:00 -08001958 struct bio_vec bvec;
1959 struct bvec_iter iter;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001960 struct bio *bio;
Andreas Gruenbacher11f8b2b2014-09-11 14:29:05 +02001961 int digest_size, err, expect;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001962 void *dig_in = peer_device->connection->int_dig_in;
1963 void *dig_vv = peer_device->connection->int_dig_vv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001964
Andreas Gruenbacher11f8b2b2014-09-11 14:29:05 +02001965 digest_size = 0;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001966 if (peer_device->connection->peer_integrity_tfm) {
Herbert Xu9534d672016-01-24 21:19:21 +08001967 digest_size = crypto_ahash_digestsize(peer_device->connection->peer_integrity_tfm);
Andreas Gruenbacher11f8b2b2014-09-11 14:29:05 +02001968 err = drbd_recv_all_warn(peer_device->connection, dig_in, digest_size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001969 if (err)
1970 return err;
Andreas Gruenbacher11f8b2b2014-09-11 14:29:05 +02001971 data_size -= digest_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001972 }
1973
Philipp Reisnerb411b362009-09-25 16:07:19 -07001974 /* optimistically update recv_cnt. if receiving fails below,
1975 * we disconnect anyways, and counters will be reset. */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001976 peer_device->device->recv_cnt += data_size>>9;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001977
1978 bio = req->master_bio;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001979 D_ASSERT(peer_device->device, sector == bio->bi_iter.bi_sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001980
Kent Overstreet79886132013-11-23 17:19:00 -08001981 bio_for_each_segment(bvec, bio, iter) {
1982 void *mapped = kmap(bvec.bv_page) + bvec.bv_offset;
1983 expect = min_t(int, data_size, bvec.bv_len);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001984 err = drbd_recv_all_warn(peer_device->connection, mapped, expect);
Kent Overstreet79886132013-11-23 17:19:00 -08001985 kunmap(bvec.bv_page);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001986 if (err)
1987 return err;
1988 data_size -= expect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001989 }
1990
Andreas Gruenbacher11f8b2b2014-09-11 14:29:05 +02001991 if (digest_size) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001992 drbd_csum_bio(peer_device->connection->peer_integrity_tfm, bio, dig_vv);
Andreas Gruenbacher11f8b2b2014-09-11 14:29:05 +02001993 if (memcmp(dig_in, dig_vv, digest_size)) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001994 drbd_err(peer_device, "Digest integrity check FAILED. Broken NICs?\n");
Andreas Gruenbacher28284ce2011-03-16 17:54:02 +01001995 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001996 }
1997 }
1998
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001999 D_ASSERT(peer_device->device, data_size == 0);
Andreas Gruenbacher28284ce2011-03-16 17:54:02 +01002000 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002001}
2002
Andreas Gruenbachera990be42011-04-06 17:56:48 +02002003/*
Philipp Reisner668700b2015-03-16 16:08:29 +01002004 * e_end_resync_block() is called in ack_sender context via
Andreas Gruenbachera990be42011-04-06 17:56:48 +02002005 * drbd_finish_peer_reqs().
2006 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01002007static int e_end_resync_block(struct drbd_work *w, int unused)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002008{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01002009 struct drbd_peer_request *peer_req =
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002010 container_of(w, struct drbd_peer_request, w);
2011 struct drbd_peer_device *peer_device = peer_req->peer_device;
2012 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002013 sector_t sector = peer_req->i.sector;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01002014 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002015
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02002016 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07002017
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002018 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002019 drbd_set_in_sync(device, sector, peer_req->i.size);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002020 err = drbd_send_ack(peer_device, P_RS_WRITE_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002021 } else {
2022 /* Record failure to sync */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002023 drbd_rs_failed_io(device, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002024
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002025 err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002026 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002027 dec_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002028
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01002029 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002030}
2031
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002032static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t sector,
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02002033 struct packet_info *pi) __releases(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002034{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002035 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002036 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002037
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02002038 peer_req = read_in_block(peer_device, ID_SYNCER, sector, pi);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002039 if (!peer_req)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02002040 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002041
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002042 dec_rs_pending(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002043
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002044 inc_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002045 /* corresponding dec_unacked() in e_end_resync_block()
2046 * respective _drbd_clear_done_ee */
2047
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002048 peer_req->w.cb = e_end_resync_block;
Lars Ellenberg21ae5d72014-05-05 23:42:24 +02002049 peer_req->submit_jif = jiffies;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02002050
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002051 spin_lock_irq(&device->resource->req_lock);
Lars Ellenbergb9ed7082014-04-23 12:15:35 +02002052 list_add_tail(&peer_req->w.list, &device->sync_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002053 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002054
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02002055 atomic_add(pi->size >> 9, &device->rs_sect_ev);
Mike Christiebb3cc852016-06-05 14:32:06 -05002056 if (drbd_submit_peer_request(device, peer_req, REQ_OP_WRITE, 0,
2057 DRBD_FAULT_RS_WR) == 0)
Andreas Gruenbachere1c1b0f2011-03-16 17:58:27 +01002058 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002059
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002060 /* don't care for the reason here */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002061 drbd_err(device, "submit failed, triggering re-connect\n");
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002062 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002063 list_del(&peer_req->w.list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002064 spin_unlock_irq(&device->resource->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002065
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002066 drbd_free_peer_req(device, peer_req);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02002067fail:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002068 put_ldev(device);
Andreas Gruenbachere1c1b0f2011-03-16 17:58:27 +01002069 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002070}
2071
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01002072static struct drbd_request *
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002073find_request(struct drbd_device *device, struct rb_root *root, u64 id,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01002074 sector_t sector, bool missing_ok, const char *func)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002075{
2076 struct drbd_request *req;
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01002077
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01002078 /* Request object according to our peer */
2079 req = (struct drbd_request *)(unsigned long)id;
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01002080 if (drbd_contains_interval(root, sector, &req->i) && req->i.local)
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01002081 return req;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01002082 if (!missing_ok) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002083 drbd_err(device, "%s: failed to find request 0x%lx, sector %llus\n", func,
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01002084 (unsigned long)id, (unsigned long long)sector);
2085 }
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01002086 return NULL;
2087}
2088
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002089static int receive_DataReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002090{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002091 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002092 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002093 struct drbd_request *req;
2094 sector_t sector;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002095 int err;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02002096 struct p_data *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002097
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002098 peer_device = conn_peer_device(connection, pi->vnr);
2099 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002100 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002101 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002102
2103 sector = be64_to_cpu(p->sector);
2104
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002105 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002106 req = find_request(device, &device->read_requests, p->block_id, sector, false, __func__);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002107 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01002108 if (unlikely(!req))
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002109 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002110
Bart Van Assche24c48302011-05-21 18:32:29 +02002111 /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid
Philipp Reisnerb411b362009-09-25 16:07:19 -07002112 * special casing it there for the various failure cases.
2113 * still no race with drbd_fail_pending_reads */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002114 err = recv_dless_read(peer_device, req, sector, pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002115 if (!err)
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01002116 req_mod(req, DATA_RECEIVED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002117 /* else: nothing. handled from drbd_disconnect...
2118 * I don't think we may complete this just yet
2119 * in case we are "on-disconnect: freeze" */
2120
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002121 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002122}
2123
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002124static int receive_RSDataReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002125{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002126 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002127 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002128 sector_t sector;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002129 int err;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02002130 struct p_data *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002131
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002132 peer_device = conn_peer_device(connection, pi->vnr);
2133 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002134 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002135 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002136
2137 sector = be64_to_cpu(p->sector);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02002138 D_ASSERT(device, p->block_id == ID_SYNCER);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002139
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002140 if (get_ldev(device)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002141 /* data is submitted to disk within recv_resync_read.
2142 * corresponding put_ldev done below on error,
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01002143 * or in drbd_peer_request_endio. */
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02002144 err = recv_resync_read(peer_device, sector, pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002145 } else {
2146 if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002147 drbd_err(device, "Can not write resync data to local disk.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002148
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002149 err = drbd_drain_block(peer_device, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002150
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002151 drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002152 }
2153
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002154 atomic_add(pi->size >> 9, &device->rs_sect_in);
Philipp Reisner778f2712010-07-06 11:14:00 +02002155
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002156 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002157}
2158
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002159static void restart_conflicting_writes(struct drbd_device *device,
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002160 sector_t sector, int size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002161{
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002162 struct drbd_interval *i;
2163 struct drbd_request *req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002164
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002165 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002166 if (!i->local)
2167 continue;
2168 req = container_of(i, struct drbd_request, i);
2169 if (req->rq_state & RQ_LOCAL_PENDING ||
2170 !(req->rq_state & RQ_POSTPONED))
2171 continue;
Lars Ellenberg2312f0b32011-11-24 10:36:25 +01002172 /* as it is RQ_POSTPONED, this will cause it to
2173 * be queued on the retry workqueue. */
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002174 __req_mod(req, CONFLICT_RESOLVED, NULL);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002175 }
2176}
2177
Andreas Gruenbachera990be42011-04-06 17:56:48 +02002178/*
Philipp Reisner668700b2015-03-16 16:08:29 +01002179 * e_end_block() is called in ack_sender context via drbd_finish_peer_reqs().
Philipp Reisnerb411b362009-09-25 16:07:19 -07002180 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01002181static int e_end_block(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002182{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01002183 struct drbd_peer_request *peer_req =
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002184 container_of(w, struct drbd_peer_request, w);
2185 struct drbd_peer_device *peer_device = peer_req->peer_device;
2186 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002187 sector_t sector = peer_req->i.sector;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01002188 int err = 0, pcmd;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002189
Philipp Reisner303d1442011-04-13 16:24:47 -07002190 if (peer_req->flags & EE_SEND_WRITE_ACK) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002191 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002192 pcmd = (device->state.conn >= C_SYNC_SOURCE &&
2193 device->state.conn <= C_PAUSED_SYNC_T &&
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002194 peer_req->flags & EE_MAY_SET_IN_SYNC) ?
Philipp Reisnerb411b362009-09-25 16:07:19 -07002195 P_RS_WRITE_ACK : P_WRITE_ACK;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002196 err = drbd_send_ack(peer_device, pcmd, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002197 if (pcmd == P_RS_WRITE_ACK)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002198 drbd_set_in_sync(device, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002199 } else {
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002200 err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002201 /* we expect it to be marked out of sync anyways...
2202 * maybe assert this? */
2203 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002204 dec_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002205 }
Lars Ellenberg08d0dab2014-03-20 11:19:22 +01002206
Philipp Reisnerb411b362009-09-25 16:07:19 -07002207 /* we delete from the conflict detection hash _after_ we sent out the
2208 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */
Philipp Reisner302bdea2011-04-21 11:36:49 +02002209 if (peer_req->flags & EE_IN_INTERVAL_TREE) {
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002210 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02002211 D_ASSERT(device, !drbd_interval_empty(&peer_req->i));
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002212 drbd_remove_epoch_entry_interval(device, peer_req);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002213 if (peer_req->flags & EE_RESTART_REQUESTS)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002214 restart_conflicting_writes(device, sector, peer_req->i.size);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002215 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +01002216 } else
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02002217 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07002218
Andreas Gruenbacher5dd2ca12014-08-11 16:59:23 +02002219 drbd_may_finish_epoch(peer_device->connection, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
Philipp Reisnerb411b362009-09-25 16:07:19 -07002220
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01002221 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002222}
2223
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002224static int e_send_ack(struct drbd_work *w, enum drbd_packet ack)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002225{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01002226 struct drbd_peer_request *peer_req =
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002227 container_of(w, struct drbd_peer_request, w);
2228 struct drbd_peer_device *peer_device = peer_req->peer_device;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01002229 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002230
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002231 err = drbd_send_ack(peer_device, ack, peer_req);
2232 dec_unacked(peer_device->device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002233
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01002234 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002235}
2236
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002237static int e_send_superseded(struct drbd_work *w, int unused)
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01002238{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002239 return e_send_ack(w, P_SUPERSEDED);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002240}
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01002241
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01002242static int e_send_retry_write(struct drbd_work *w, int unused)
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002243{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002244 struct drbd_peer_request *peer_req =
2245 container_of(w, struct drbd_peer_request, w);
2246 struct drbd_connection *connection = peer_req->peer_device->connection;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002247
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002248 return e_send_ack(w, connection->agreed_pro_version >= 100 ?
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002249 P_RETRY_WRITE : P_SUPERSEDED);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002250}
2251
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01002252static bool seq_greater(u32 a, u32 b)
2253{
2254 /*
2255 * We assume 32-bit wrap-around here.
2256 * For 24-bit wrap-around, we would have to shift:
2257 * a <<= 8; b <<= 8;
2258 */
2259 return (s32)a - (s32)b > 0;
2260}
2261
2262static u32 seq_max(u32 a, u32 b)
2263{
2264 return seq_greater(a, b) ? a : b;
2265}
2266
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002267static void update_peer_seq(struct drbd_peer_device *peer_device, unsigned int peer_seq)
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01002268{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002269 struct drbd_device *device = peer_device->device;
Lars Ellenberg3c13b682011-02-23 16:10:01 +01002270 unsigned int newest_peer_seq;
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01002271
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002272 if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002273 spin_lock(&device->peer_seq_lock);
2274 newest_peer_seq = seq_max(device->peer_seq, peer_seq);
2275 device->peer_seq = newest_peer_seq;
2276 spin_unlock(&device->peer_seq_lock);
2277 /* wake up only if we actually changed device->peer_seq */
Lars Ellenberg3c13b682011-02-23 16:10:01 +01002278 if (peer_seq == newest_peer_seq)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002279 wake_up(&device->seq_wait);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002280 }
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01002281}
2282
Lars Ellenbergd93f6302012-03-26 15:49:13 +02002283static inline int overlaps(sector_t s1, int l1, sector_t s2, int l2)
2284{
2285 return !((s1 + (l1>>9) <= s2) || (s1 >= s2 + (l2>>9)));
2286}
2287
2288/* maybe change sync_ee into interval trees as well? */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002289static bool overlapping_resync_write(struct drbd_device *device, struct drbd_peer_request *peer_req)
Lars Ellenbergd93f6302012-03-26 15:49:13 +02002290{
2291 struct drbd_peer_request *rs_req;
Fabian Frederick7e5fec32016-06-14 00:26:35 +02002292 bool rv = false;
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01002293
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002294 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002295 list_for_each_entry(rs_req, &device->sync_ee, w.list) {
Lars Ellenbergd93f6302012-03-26 15:49:13 +02002296 if (overlaps(peer_req->i.sector, peer_req->i.size,
2297 rs_req->i.sector, rs_req->i.size)) {
Fabian Frederick7e5fec32016-06-14 00:26:35 +02002298 rv = true;
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01002299 break;
2300 }
2301 }
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002302 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01002303
2304 return rv;
2305}
2306
Philipp Reisnerb411b362009-09-25 16:07:19 -07002307/* Called from receive_Data.
2308 * Synchronize packets on sock with packets on msock.
2309 *
2310 * This is here so even when a P_DATA packet traveling via sock overtook an Ack
2311 * packet traveling on msock, they are still processed in the order they have
2312 * been sent.
2313 *
2314 * Note: we don't care for Ack packets overtaking P_DATA packets.
2315 *
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002316 * In case packet_seq is larger than device->peer_seq number, there are
Philipp Reisnerb411b362009-09-25 16:07:19 -07002317 * outstanding packets on the msock. We wait for them to arrive.
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002318 * In case we are the logically next packet, we update device->peer_seq
Philipp Reisnerb411b362009-09-25 16:07:19 -07002319 * ourselves. Correctly handles 32bit wrap around.
2320 *
2321 * Assume we have a 10 GBit connection, that is about 1<<30 byte per second,
2322 * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds
2323 * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have
2324 * 1<<9 == 512 seconds aka ages for the 32bit wrap around...
2325 *
2326 * returns 0 if we may process the packet,
2327 * -ERESTARTSYS if we were interrupted (by disconnect signal). */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002328static int wait_for_and_update_peer_seq(struct drbd_peer_device *peer_device, const u32 peer_seq)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002329{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002330 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002331 DEFINE_WAIT(wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002332 long timeout;
Philipp Reisnerb874d232013-10-23 10:59:16 +02002333 int ret = 0, tp;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002334
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002335 if (!test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags))
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002336 return 0;
2337
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002338 spin_lock(&device->peer_seq_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002339 for (;;) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002340 if (!seq_greater(peer_seq - 1, device->peer_seq)) {
2341 device->peer_seq = seq_max(device->peer_seq, peer_seq);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002342 break;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002343 }
Philipp Reisnerb874d232013-10-23 10:59:16 +02002344
Philipp Reisnerb411b362009-09-25 16:07:19 -07002345 if (signal_pending(current)) {
2346 ret = -ERESTARTSYS;
2347 break;
2348 }
Philipp Reisnerb874d232013-10-23 10:59:16 +02002349
2350 rcu_read_lock();
Andreas Gruenbacher5dd2ca12014-08-11 16:59:23 +02002351 tp = rcu_dereference(peer_device->connection->net_conf)->two_primaries;
Philipp Reisnerb874d232013-10-23 10:59:16 +02002352 rcu_read_unlock();
2353
2354 if (!tp)
2355 break;
2356
2357 /* Only need to wait if two_primaries is enabled */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002358 prepare_to_wait(&device->seq_wait, &wait, TASK_INTERRUPTIBLE);
2359 spin_unlock(&device->peer_seq_lock);
Philipp Reisner44ed1672011-04-19 17:10:19 +02002360 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002361 timeout = rcu_dereference(peer_device->connection->net_conf)->ping_timeo*HZ/10;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002362 rcu_read_unlock();
Andreas Gruenbacher71b1c1e2011-03-01 15:40:43 +01002363 timeout = schedule_timeout(timeout);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002364 spin_lock(&device->peer_seq_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002365 if (!timeout) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002366 ret = -ETIMEDOUT;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002367 drbd_err(device, "Timed out waiting for missing ack packets; disconnecting\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002368 break;
2369 }
2370 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002371 spin_unlock(&device->peer_seq_lock);
2372 finish_wait(&device->seq_wait, &wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002373 return ret;
2374}
2375
Lars Ellenberg688593c2010-11-17 22:25:03 +01002376/* see also bio_flags_to_wire()
2377 * DRBD_REQ_*, because we need to semantically map the flags to data packet
2378 * flags and back. We may replicate to other kernel versions. */
Mike Christiebb3cc852016-06-05 14:32:06 -05002379static unsigned long wire_flags_to_bio_flags(u32 dpf)
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02002380{
Lars Ellenberg688593c2010-11-17 22:25:03 +01002381 return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
2382 (dpf & DP_FUA ? REQ_FUA : 0) |
Mike Christie28a8f0d2016-06-05 14:32:25 -05002383 (dpf & DP_FLUSH ? REQ_PREFLUSH : 0);
Mike Christiebb3cc852016-06-05 14:32:06 -05002384}
2385
2386static unsigned long wire_flags_to_bio_op(u32 dpf)
2387{
2388 if (dpf & DP_DISCARD)
2389 return REQ_OP_DISCARD;
2390 else
2391 return REQ_OP_WRITE;
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02002392}
2393
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002394static void fail_postponed_requests(struct drbd_device *device, sector_t sector,
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002395 unsigned int size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002396{
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002397 struct drbd_interval *i;
2398
2399 repeat:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002400 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002401 struct drbd_request *req;
2402 struct bio_and_error m;
2403
2404 if (!i->local)
2405 continue;
2406 req = container_of(i, struct drbd_request, i);
2407 if (!(req->rq_state & RQ_POSTPONED))
2408 continue;
2409 req->rq_state &= ~RQ_POSTPONED;
2410 __req_mod(req, NEG_ACKED, &m);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002411 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002412 if (m.bio)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002413 complete_master_bio(device, &m);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002414 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002415 goto repeat;
2416 }
2417}
2418
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002419static int handle_write_conflicts(struct drbd_device *device,
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002420 struct drbd_peer_request *peer_req)
2421{
Andreas Gruenbachere33b32d2011-08-30 15:38:04 +02002422 struct drbd_connection *connection = peer_req->peer_device->connection;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002423 bool resolve_conflicts = test_bit(RESOLVE_CONFLICTS, &connection->flags);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002424 sector_t sector = peer_req->i.sector;
2425 const unsigned int size = peer_req->i.size;
2426 struct drbd_interval *i;
2427 bool equal;
2428 int err;
2429
2430 /*
2431 * Inserting the peer request into the write_requests tree will prevent
2432 * new conflicting local requests from being added.
2433 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002434 drbd_insert_interval(&device->write_requests, &peer_req->i);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002435
2436 repeat:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002437 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002438 if (i == &peer_req->i)
2439 continue;
Lars Ellenberg08d0dab2014-03-20 11:19:22 +01002440 if (i->completed)
2441 continue;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002442
2443 if (!i->local) {
2444 /*
2445 * Our peer has sent a conflicting remote request; this
2446 * should not happen in a two-node setup. Wait for the
2447 * earlier peer request to complete.
2448 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002449 err = drbd_wait_misc(device, i);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002450 if (err)
2451 goto out;
2452 goto repeat;
2453 }
2454
2455 equal = i->sector == sector && i->size == size;
2456 if (resolve_conflicts) {
2457 /*
2458 * If the peer request is fully contained within the
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002459 * overlapping request, it can be considered overwritten
2460 * and thus superseded; otherwise, it will be retried
2461 * once all overlapping requests have completed.
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002462 */
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002463 bool superseded = i->sector <= sector && i->sector +
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002464 (i->size >> 9) >= sector + (size >> 9);
2465
2466 if (!equal)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002467 drbd_alert(device, "Concurrent writes detected: "
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002468 "local=%llus +%u, remote=%llus +%u, "
2469 "assuming %s came first\n",
2470 (unsigned long long)i->sector, i->size,
2471 (unsigned long long)sector, size,
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002472 superseded ? "local" : "remote");
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002473
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002474 peer_req->w.cb = superseded ? e_send_superseded :
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002475 e_send_retry_write;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002476 list_add_tail(&peer_req->w.list, &device->done_ee);
Philipp Reisner668700b2015-03-16 16:08:29 +01002477 queue_work(connection->ack_sender, &peer_req->peer_device->send_acks_work);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002478
2479 err = -ENOENT;
2480 goto out;
2481 } else {
2482 struct drbd_request *req =
2483 container_of(i, struct drbd_request, i);
2484
2485 if (!equal)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002486 drbd_alert(device, "Concurrent writes detected: "
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002487 "local=%llus +%u, remote=%llus +%u\n",
2488 (unsigned long long)i->sector, i->size,
2489 (unsigned long long)sector, size);
2490
2491 if (req->rq_state & RQ_LOCAL_PENDING ||
2492 !(req->rq_state & RQ_POSTPONED)) {
2493 /*
2494 * Wait for the node with the discard flag to
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002495 * decide if this request has been superseded
2496 * or needs to be retried.
2497 * Requests that have been superseded will
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002498 * disappear from the write_requests tree.
2499 *
2500 * In addition, wait for the conflicting
2501 * request to finish locally before submitting
2502 * the conflicting peer request.
2503 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002504 err = drbd_wait_misc(device, &req->i);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002505 if (err) {
Andreas Gruenbachere33b32d2011-08-30 15:38:04 +02002506 _conn_request_state(connection, NS(conn, C_TIMEOUT), CS_HARD);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002507 fail_postponed_requests(device, sector, size);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002508 goto out;
2509 }
2510 goto repeat;
2511 }
2512 /*
2513 * Remember to restart the conflicting requests after
2514 * the new peer request has completed.
2515 */
2516 peer_req->flags |= EE_RESTART_REQUESTS;
2517 }
2518 }
2519 err = 0;
2520
2521 out:
2522 if (err)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002523 drbd_remove_epoch_entry_interval(device, peer_req);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002524 return err;
2525}
2526
Philipp Reisnerb411b362009-09-25 16:07:19 -07002527/* mirrored write */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002528static int receive_Data(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002529{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002530 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002531 struct drbd_device *device;
Lars Ellenberg21ae5d72014-05-05 23:42:24 +02002532 struct net_conf *nc;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002533 sector_t sector;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002534 struct drbd_peer_request *peer_req;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02002535 struct p_data *p = pi->data;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002536 u32 peer_seq = be32_to_cpu(p->seq_num);
Mike Christiebb3cc852016-06-05 14:32:06 -05002537 int op, op_flags;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002538 u32 dp_flags;
Philipp Reisner302bdea2011-04-21 11:36:49 +02002539 int err, tp;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002540
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002541 peer_device = conn_peer_device(connection, pi->vnr);
2542 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002543 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002544 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002545
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002546 if (!get_ldev(device)) {
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002547 int err2;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002548
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002549 err = wait_for_and_update_peer_seq(peer_device, peer_seq);
2550 drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002551 atomic_inc(&connection->current_epoch->epoch_size);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002552 err2 = drbd_drain_block(peer_device, pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002553 if (!err)
2554 err = err2;
2555 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002556 }
2557
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01002558 /*
2559 * Corresponding put_ldev done either below (on various errors), or in
2560 * drbd_peer_request_endio, if we successfully submit the data at the
2561 * end of this function.
2562 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002563
2564 sector = be64_to_cpu(p->sector);
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02002565 peer_req = read_in_block(peer_device, p->block_id, sector, pi);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002566 if (!peer_req) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002567 put_ldev(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002568 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002569 }
2570
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002571 peer_req->w.cb = e_end_block;
Lars Ellenberg21ae5d72014-05-05 23:42:24 +02002572 peer_req->submit_jif = jiffies;
2573 peer_req->flags |= EE_APPLICATION;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002574
Lars Ellenberg688593c2010-11-17 22:25:03 +01002575 dp_flags = be32_to_cpu(p->dp_flags);
Mike Christiebb3cc852016-06-05 14:32:06 -05002576 op = wire_flags_to_bio_op(dp_flags);
2577 op_flags = wire_flags_to_bio_flags(dp_flags);
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02002578 if (pi->cmd == P_TRIM) {
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02002579 D_ASSERT(peer_device, peer_req->i.size > 0);
Mike Christiebb3cc852016-06-05 14:32:06 -05002580 D_ASSERT(peer_device, op == REQ_OP_DISCARD);
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02002581 D_ASSERT(peer_device, peer_req->pages == NULL);
2582 } else if (peer_req->pages == NULL) {
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02002583 D_ASSERT(device, peer_req->i.size == 0);
2584 D_ASSERT(device, dp_flags & DP_FLUSH);
Lars Ellenberga73ff322012-06-25 19:15:38 +02002585 }
Lars Ellenberg688593c2010-11-17 22:25:03 +01002586
2587 if (dp_flags & DP_MAY_SET_IN_SYNC)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002588 peer_req->flags |= EE_MAY_SET_IN_SYNC;
Lars Ellenberg688593c2010-11-17 22:25:03 +01002589
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002590 spin_lock(&connection->epoch_lock);
2591 peer_req->epoch = connection->current_epoch;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002592 atomic_inc(&peer_req->epoch->epoch_size);
2593 atomic_inc(&peer_req->epoch->active);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002594 spin_unlock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002595
Philipp Reisner302bdea2011-04-21 11:36:49 +02002596 rcu_read_lock();
Lars Ellenberg21ae5d72014-05-05 23:42:24 +02002597 nc = rcu_dereference(peer_device->connection->net_conf);
2598 tp = nc->two_primaries;
2599 if (peer_device->connection->agreed_pro_version < 100) {
2600 switch (nc->wire_protocol) {
2601 case DRBD_PROT_C:
2602 dp_flags |= DP_SEND_WRITE_ACK;
2603 break;
2604 case DRBD_PROT_B:
2605 dp_flags |= DP_SEND_RECEIVE_ACK;
2606 break;
2607 }
2608 }
Philipp Reisner302bdea2011-04-21 11:36:49 +02002609 rcu_read_unlock();
Lars Ellenberg21ae5d72014-05-05 23:42:24 +02002610
2611 if (dp_flags & DP_SEND_WRITE_ACK) {
2612 peer_req->flags |= EE_SEND_WRITE_ACK;
2613 inc_unacked(device);
2614 /* corresponding dec_unacked() in e_end_block()
2615 * respective _drbd_clear_done_ee */
2616 }
2617
2618 if (dp_flags & DP_SEND_RECEIVE_ACK) {
2619 /* I really don't like it that the receiver thread
2620 * sends on the msock, but anyways */
Andreas Gruenbacher5dd2ca12014-08-11 16:59:23 +02002621 drbd_send_ack(peer_device, P_RECV_ACK, peer_req);
Lars Ellenberg21ae5d72014-05-05 23:42:24 +02002622 }
2623
Philipp Reisner302bdea2011-04-21 11:36:49 +02002624 if (tp) {
Lars Ellenberg21ae5d72014-05-05 23:42:24 +02002625 /* two primaries implies protocol C */
2626 D_ASSERT(device, dp_flags & DP_SEND_WRITE_ACK);
Philipp Reisner302bdea2011-04-21 11:36:49 +02002627 peer_req->flags |= EE_IN_INTERVAL_TREE;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002628 err = wait_for_and_update_peer_seq(peer_device, peer_seq);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002629 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002630 goto out_interrupted;
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002631 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002632 err = handle_write_conflicts(device, peer_req);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002633 if (err) {
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002634 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002635 if (err == -ENOENT) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002636 put_ldev(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002637 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002638 }
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002639 goto out_interrupted;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002640 }
Philipp Reisnerb874d232013-10-23 10:59:16 +02002641 } else {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002642 update_peer_seq(peer_device, peer_seq);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002643 spin_lock_irq(&device->resource->req_lock);
Philipp Reisnerb874d232013-10-23 10:59:16 +02002644 }
Lars Ellenberg9104d312016-06-14 00:26:31 +02002645 /* TRIM and WRITE_SAME are processed synchronously,
2646 * we wait for all pending requests, respectively wait for
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02002647 * active_ee to become empty in drbd_submit_peer_request();
2648 * better not add ourselves here. */
Lars Ellenberg9104d312016-06-14 00:26:31 +02002649 if ((peer_req->flags & (EE_IS_TRIM|EE_WRITE_SAME)) == 0)
Lars Ellenbergb9ed7082014-04-23 12:15:35 +02002650 list_add_tail(&peer_req->w.list, &device->active_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002651 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002652
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002653 if (device->state.conn == C_SYNC_TARGET)
2654 wait_event(device->ee_wait, !overlapping_resync_write(device, peer_req));
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01002655
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002656 if (device->state.pdsk < D_INCONSISTENT) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002657 /* In case we have the only disk of the cluster, */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002658 drbd_set_out_of_sync(device, peer_req->i.sector, peer_req->i.size);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002659 peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
Lars Ellenberg4dd726f2014-02-11 11:15:36 +01002660 drbd_al_begin_io(device, &peer_req->i);
Lars Ellenberg21ae5d72014-05-05 23:42:24 +02002661 peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002662 }
2663
Mike Christiebb3cc852016-06-05 14:32:06 -05002664 err = drbd_submit_peer_request(device, peer_req, op, op_flags,
2665 DRBD_FAULT_DT_WR);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002666 if (!err)
2667 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002668
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002669 /* don't care for the reason here */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002670 drbd_err(device, "submit failed, triggering re-connect\n");
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002671 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002672 list_del(&peer_req->w.list);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002673 drbd_remove_epoch_entry_interval(device, peer_req);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002674 spin_unlock_irq(&device->resource->req_lock);
Lars Ellenberg21ae5d72014-05-05 23:42:24 +02002675 if (peer_req->flags & EE_CALL_AL_COMPLETE_IO) {
2676 peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002677 drbd_al_complete_io(device, &peer_req->i);
Lars Ellenberg21ae5d72014-05-05 23:42:24 +02002678 }
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002679
Philipp Reisnerb411b362009-09-25 16:07:19 -07002680out_interrupted:
Fabian Frederick7e5fec32016-06-14 00:26:35 +02002681 drbd_may_finish_epoch(connection, peer_req->epoch, EV_PUT | EV_CLEANUP);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002682 put_ldev(device);
2683 drbd_free_peer_req(device, peer_req);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002684 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002685}
2686
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002687/* We may throttle resync, if the lower device seems to be busy,
2688 * and current sync rate is above c_min_rate.
2689 *
2690 * To decide whether or not the lower device is busy, we use a scheme similar
2691 * to MD RAID is_mddev_idle(): if the partition stats reveal "significant"
2692 * (more than 64 sectors) of activity we cannot account for with our own resync
2693 * activity, it obviously is "busy".
2694 *
2695 * The current sync rate used here uses only the most recent two step marks,
2696 * to have a short time average so we can react faster.
2697 */
Lars Ellenbergad3fee72013-12-20 11:22:13 +01002698bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector,
2699 bool throttle_if_app_is_waiting)
Lars Ellenberge8299872014-04-28 18:43:19 +02002700{
2701 struct lc_element *tmp;
Lars Ellenbergad3fee72013-12-20 11:22:13 +01002702 bool throttle = drbd_rs_c_min_rate_throttle(device);
Lars Ellenberge8299872014-04-28 18:43:19 +02002703
Lars Ellenbergad3fee72013-12-20 11:22:13 +01002704 if (!throttle || throttle_if_app_is_waiting)
2705 return throttle;
Lars Ellenberge8299872014-04-28 18:43:19 +02002706
2707 spin_lock_irq(&device->al_lock);
2708 tmp = lc_find(device->resync, BM_SECT_TO_EXT(sector));
2709 if (tmp) {
2710 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
2711 if (test_bit(BME_PRIORITY, &bm_ext->flags))
2712 throttle = false;
Lars Ellenbergad3fee72013-12-20 11:22:13 +01002713 /* Do not slow down if app IO is already waiting for this extent,
2714 * and our progress is necessary for application IO to complete. */
Lars Ellenberge8299872014-04-28 18:43:19 +02002715 }
2716 spin_unlock_irq(&device->al_lock);
2717
2718 return throttle;
2719}
2720
2721bool drbd_rs_c_min_rate_throttle(struct drbd_device *device)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002722{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002723 struct gendisk *disk = device->ldev->backing_bdev->bd_contains->bd_disk;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002724 unsigned long db, dt, dbdt;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002725 unsigned int c_min_rate;
Lars Ellenberge8299872014-04-28 18:43:19 +02002726 int curr_events;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002727
2728 rcu_read_lock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002729 c_min_rate = rcu_dereference(device->ldev->disk_conf)->c_min_rate;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002730 rcu_read_unlock();
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002731
2732 /* feature disabled? */
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002733 if (c_min_rate == 0)
Lars Ellenberge8299872014-04-28 18:43:19 +02002734 return false;
Philipp Reisnere3555d82010-11-07 15:56:29 +01002735
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002736 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
2737 (int)part_stat_read(&disk->part0, sectors[1]) -
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002738 atomic_read(&device->rs_sect_ev);
Lars Ellenbergad3fee72013-12-20 11:22:13 +01002739
2740 if (atomic_read(&device->ap_actlog_cnt)
Lars Ellenbergff8bd882014-11-10 17:21:12 +01002741 || curr_events - device->rs_last_events > 64) {
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002742 unsigned long rs_left;
2743 int i;
2744
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002745 device->rs_last_events = curr_events;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002746
2747 /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
2748 * approx. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002749 i = (device->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
Lars Ellenberg2649f082010-11-05 10:05:47 +01002750
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002751 if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T)
2752 rs_left = device->ov_left;
Lars Ellenberg2649f082010-11-05 10:05:47 +01002753 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002754 rs_left = drbd_bm_total_weight(device) - device->rs_failed;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002755
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002756 dt = ((long)jiffies - (long)device->rs_mark_time[i]) / HZ;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002757 if (!dt)
2758 dt++;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002759 db = device->rs_mark_left[i] - rs_left;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002760 dbdt = Bit2KB(db/dt);
2761
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002762 if (dbdt > c_min_rate)
Lars Ellenberge8299872014-04-28 18:43:19 +02002763 return true;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002764 }
Lars Ellenberge8299872014-04-28 18:43:19 +02002765 return false;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002766}
2767
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002768static int receive_DataRequest(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002769{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002770 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002771 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002772 sector_t sector;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002773 sector_t capacity;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002774 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002775 struct digest_info *di = NULL;
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002776 int size, verb;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002777 unsigned int fault_type;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02002778 struct p_block_req *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002779
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002780 peer_device = conn_peer_device(connection, pi->vnr);
2781 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002782 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002783 device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002784 capacity = drbd_get_capacity(device->this_bdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002785
2786 sector = be64_to_cpu(p->sector);
2787 size = be32_to_cpu(p->blksize);
2788
Andreas Gruenbacherc670a392011-02-21 12:41:39 +01002789 if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002790 drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
Philipp Reisnerb411b362009-09-25 16:07:19 -07002791 (unsigned long long)sector, size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002792 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002793 }
2794 if (sector + (size>>9) > capacity) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002795 drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
Philipp Reisnerb411b362009-09-25 16:07:19 -07002796 (unsigned long long)sector, size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002797 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002798 }
2799
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002800 if (!get_ldev_if_state(device, D_UP_TO_DATE)) {
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002801 verb = 1;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002802 switch (pi->cmd) {
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002803 case P_DATA_REQUEST:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002804 drbd_send_ack_rp(peer_device, P_NEG_DREPLY, p);
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002805 break;
Philipp Reisner700ca8c2016-06-14 00:26:13 +02002806 case P_RS_THIN_REQ:
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002807 case P_RS_DATA_REQUEST:
2808 case P_CSUM_RS_REQUEST:
2809 case P_OV_REQUEST:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002810 drbd_send_ack_rp(peer_device, P_NEG_RS_DREPLY , p);
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002811 break;
2812 case P_OV_REPLY:
2813 verb = 0;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002814 dec_rs_pending(device);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002815 drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size, ID_IN_SYNC);
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002816 break;
2817 default:
Andreas Gruenbacher49ba9b12011-03-25 00:35:45 +01002818 BUG();
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002819 }
2820 if (verb && __ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002821 drbd_err(device, "Can not satisfy peer's read request, "
Philipp Reisnerb411b362009-09-25 16:07:19 -07002822 "no local data.\n");
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002823
Lars Ellenberga821cc42010-09-06 12:31:37 +02002824 /* drain possibly payload */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002825 return drbd_drain_block(peer_device, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002826 }
2827
2828 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
2829 * "criss-cross" setup, that might cause write-out on some other DRBD,
2830 * which in turn might block on the other node at this very place. */
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02002831 peer_req = drbd_alloc_peer_req(peer_device, p->block_id, sector, size,
Lars Ellenberg9104d312016-06-14 00:26:31 +02002832 size, GFP_NOIO);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002833 if (!peer_req) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002834 put_ldev(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002835 return -ENOMEM;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002836 }
2837
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002838 switch (pi->cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002839 case P_DATA_REQUEST:
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002840 peer_req->w.cb = w_e_end_data_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002841 fault_type = DRBD_FAULT_DT_RD;
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002842 /* application IO, don't drbd_rs_begin_io */
Lars Ellenberg21ae5d72014-05-05 23:42:24 +02002843 peer_req->flags |= EE_APPLICATION;
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002844 goto submit;
2845
Philipp Reisner700ca8c2016-06-14 00:26:13 +02002846 case P_RS_THIN_REQ:
2847 /* If at some point in the future we have a smart way to
2848 find out if this data block is completely deallocated,
2849 then we would do something smarter here than reading
2850 the block... */
2851 peer_req->flags |= EE_RS_THIN_REQ;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002852 case P_RS_DATA_REQUEST:
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002853 peer_req->w.cb = w_e_end_rsdata_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002854 fault_type = DRBD_FAULT_RS_RD;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002855 /* used in the sector offset progress display */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002856 device->bm_resync_fo = BM_SECT_TO_BIT(sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002857 break;
2858
2859 case P_OV_REPLY:
2860 case P_CSUM_RS_REQUEST:
2861 fault_type = DRBD_FAULT_RS_RD;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002862 di = kmalloc(sizeof(*di) + pi->size, GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002863 if (!di)
2864 goto out_free_e;
2865
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002866 di->digest_size = pi->size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002867 di->digest = (((char *)di)+sizeof(struct digest_info));
2868
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002869 peer_req->digest = di;
2870 peer_req->flags |= EE_HAS_DIGEST;
Lars Ellenbergc36c3ce2010-08-11 20:42:55 +02002871
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002872 if (drbd_recv_all(peer_device->connection, di->digest, pi->size))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002873 goto out_free_e;
2874
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002875 if (pi->cmd == P_CSUM_RS_REQUEST) {
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002876 D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002877 peer_req->w.cb = w_e_end_csum_rs_req;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002878 /* used in the sector offset progress display */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002879 device->bm_resync_fo = BM_SECT_TO_BIT(sector);
Lars Ellenbergaaaba342014-03-18 12:30:09 +01002880 /* remember to report stats in drbd_resync_finished */
2881 device->use_csums = true;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002882 } else if (pi->cmd == P_OV_REPLY) {
Lars Ellenberg2649f082010-11-05 10:05:47 +01002883 /* track progress, we may need to throttle */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002884 atomic_add(size >> 9, &device->rs_sect_in);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002885 peer_req->w.cb = w_e_end_ov_reply;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002886 dec_rs_pending(device);
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002887 /* drbd_rs_begin_io done when we sent this request,
2888 * but accounting still needs to be done. */
2889 goto submit_for_resync;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002890 }
2891 break;
2892
2893 case P_OV_REQUEST:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002894 if (device->ov_start_sector == ~(sector_t)0 &&
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002895 peer_device->connection->agreed_pro_version >= 90) {
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002896 unsigned long now = jiffies;
2897 int i;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002898 device->ov_start_sector = sector;
2899 device->ov_position = sector;
2900 device->ov_left = drbd_bm_bits(device) - BM_SECT_TO_BIT(sector);
2901 device->rs_total = device->ov_left;
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002902 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002903 device->rs_mark_left[i] = device->ov_left;
2904 device->rs_mark_time[i] = now;
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002905 }
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002906 drbd_info(device, "Online Verify start sector: %llu\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07002907 (unsigned long long)sector);
2908 }
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002909 peer_req->w.cb = w_e_end_ov_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002910 fault_type = DRBD_FAULT_RS_RD;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002911 break;
2912
Philipp Reisnerb411b362009-09-25 16:07:19 -07002913 default:
Andreas Gruenbacher49ba9b12011-03-25 00:35:45 +01002914 BUG();
Philipp Reisnerb411b362009-09-25 16:07:19 -07002915 }
2916
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002917 /* Throttle, drbd_rs_begin_io and submit should become asynchronous
2918 * wrt the receiver, but it is not as straightforward as it may seem.
2919 * Various places in the resync start and stop logic assume resync
2920 * requests are processed in order, requeuing this on the worker thread
2921 * introduces a bunch of new code for synchronization between threads.
2922 *
2923 * Unlimited throttling before drbd_rs_begin_io may stall the resync
2924 * "forever", throttling after drbd_rs_begin_io will lock that extent
2925 * for application writes for the same time. For now, just throttle
2926 * here, where the rest of the code expects the receiver to sleep for
2927 * a while, anyways.
2928 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002929
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002930 /* Throttle before drbd_rs_begin_io, as that locks out application IO;
2931 * this defers syncer requests for some time, before letting at least
2932 * on request through. The resync controller on the receiving side
2933 * will adapt to the incoming rate accordingly.
2934 *
2935 * We cannot throttle here if remote is Primary/SyncTarget:
2936 * we would also throttle its application reads.
2937 * In that case, throttling is done on the SyncTarget only.
2938 */
Lars Ellenbergc5a2c152014-05-08 10:08:05 +02002939
2940 /* Even though this may be a resync request, we do add to "read_ee";
2941 * "sync_ee" is only used for resync WRITEs.
2942 * Add to list early, so debugfs can find this request
2943 * even if we have to sleep below. */
2944 spin_lock_irq(&device->resource->req_lock);
2945 list_add_tail(&peer_req->w.list, &device->read_ee);
2946 spin_unlock_irq(&device->resource->req_lock);
2947
Lars Ellenberg944410e2014-05-06 15:02:05 +02002948 update_receiver_timing_details(connection, drbd_rs_should_slow_down);
Lars Ellenbergad3fee72013-12-20 11:22:13 +01002949 if (device->state.peer != R_PRIMARY
2950 && drbd_rs_should_slow_down(device, sector, false))
Philipp Reisnere3555d82010-11-07 15:56:29 +01002951 schedule_timeout_uninterruptible(HZ/10);
Lars Ellenberg944410e2014-05-06 15:02:05 +02002952 update_receiver_timing_details(connection, drbd_rs_begin_io);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002953 if (drbd_rs_begin_io(device, sector))
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002954 goto out_free_e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002955
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002956submit_for_resync:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002957 atomic_add(size >> 9, &device->rs_sect_ev);
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002958
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002959submit:
Lars Ellenberg944410e2014-05-06 15:02:05 +02002960 update_receiver_timing_details(connection, drbd_submit_peer_request);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002961 inc_unacked(device);
Mike Christiebb3cc852016-06-05 14:32:06 -05002962 if (drbd_submit_peer_request(device, peer_req, REQ_OP_READ, 0,
2963 fault_type) == 0)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002964 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002965
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002966 /* don't care for the reason here */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002967 drbd_err(device, "submit failed, triggering re-connect\n");
Lars Ellenbergc5a2c152014-05-08 10:08:05 +02002968
2969out_free_e:
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002970 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002971 list_del(&peer_req->w.list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002972 spin_unlock_irq(&device->resource->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002973 /* no drbd_rs_complete_io(), we are dropping the connection anyways */
2974
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002975 put_ldev(device);
2976 drbd_free_peer_req(device, peer_req);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002977 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002978}
2979
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002980/**
2981 * drbd_asb_recover_0p - Recover after split-brain with no remaining primaries
2982 */
2983static int drbd_asb_recover_0p(struct drbd_peer_device *peer_device) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002984{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002985 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002986 int self, peer, rv = -100;
2987 unsigned long ch_self, ch_peer;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002988 enum drbd_after_sb_p after_sb_0p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002989
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002990 self = device->ldev->md.uuid[UI_BITMAP] & 1;
2991 peer = device->p_uuid[UI_BITMAP] & 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002992
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002993 ch_peer = device->p_uuid[UI_SIZE];
2994 ch_self = device->comm_bm_set;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002995
Philipp Reisner44ed1672011-04-19 17:10:19 +02002996 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002997 after_sb_0p = rcu_dereference(peer_device->connection->net_conf)->after_sb_0p;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002998 rcu_read_unlock();
2999 switch (after_sb_0p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003000 case ASB_CONSENSUS:
3001 case ASB_DISCARD_SECONDARY:
3002 case ASB_CALL_HELPER:
Philipp Reisner44ed1672011-04-19 17:10:19 +02003003 case ASB_VIOLENTLY:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003004 drbd_err(device, "Configuration error.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003005 break;
3006 case ASB_DISCONNECT:
3007 break;
3008 case ASB_DISCARD_YOUNGER_PRI:
3009 if (self == 0 && peer == 1) {
3010 rv = -1;
3011 break;
3012 }
3013 if (self == 1 && peer == 0) {
3014 rv = 1;
3015 break;
3016 }
3017 /* Else fall through to one of the other strategies... */
3018 case ASB_DISCARD_OLDER_PRI:
3019 if (self == 0 && peer == 1) {
3020 rv = 1;
3021 break;
3022 }
3023 if (self == 1 && peer == 0) {
3024 rv = -1;
3025 break;
3026 }
3027 /* Else fall through to one of the other strategies... */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003028 drbd_warn(device, "Discard younger/older primary did not find a decision\n"
Philipp Reisnerb411b362009-09-25 16:07:19 -07003029 "Using discard-least-changes instead\n");
3030 case ASB_DISCARD_ZERO_CHG:
3031 if (ch_peer == 0 && ch_self == 0) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003032 rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003033 ? -1 : 1;
3034 break;
3035 } else {
3036 if (ch_peer == 0) { rv = 1; break; }
3037 if (ch_self == 0) { rv = -1; break; }
3038 }
Philipp Reisner44ed1672011-04-19 17:10:19 +02003039 if (after_sb_0p == ASB_DISCARD_ZERO_CHG)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003040 break;
3041 case ASB_DISCARD_LEAST_CHG:
3042 if (ch_self < ch_peer)
3043 rv = -1;
3044 else if (ch_self > ch_peer)
3045 rv = 1;
3046 else /* ( ch_self == ch_peer ) */
3047 /* Well, then use something else. */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003048 rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003049 ? -1 : 1;
3050 break;
3051 case ASB_DISCARD_LOCAL:
3052 rv = -1;
3053 break;
3054 case ASB_DISCARD_REMOTE:
3055 rv = 1;
3056 }
3057
3058 return rv;
3059}
3060
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003061/**
3062 * drbd_asb_recover_1p - Recover after split-brain with one remaining primary
3063 */
3064static int drbd_asb_recover_1p(struct drbd_peer_device *peer_device) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003065{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003066 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01003067 int hg, rv = -100;
Philipp Reisner44ed1672011-04-19 17:10:19 +02003068 enum drbd_after_sb_p after_sb_1p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003069
Philipp Reisner44ed1672011-04-19 17:10:19 +02003070 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003071 after_sb_1p = rcu_dereference(peer_device->connection->net_conf)->after_sb_1p;
Philipp Reisner44ed1672011-04-19 17:10:19 +02003072 rcu_read_unlock();
3073 switch (after_sb_1p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003074 case ASB_DISCARD_YOUNGER_PRI:
3075 case ASB_DISCARD_OLDER_PRI:
3076 case ASB_DISCARD_LEAST_CHG:
3077 case ASB_DISCARD_LOCAL:
3078 case ASB_DISCARD_REMOTE:
Philipp Reisner44ed1672011-04-19 17:10:19 +02003079 case ASB_DISCARD_ZERO_CHG:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003080 drbd_err(device, "Configuration error.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003081 break;
3082 case ASB_DISCONNECT:
3083 break;
3084 case ASB_CONSENSUS:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003085 hg = drbd_asb_recover_0p(peer_device);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003086 if (hg == -1 && device->state.role == R_SECONDARY)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003087 rv = hg;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003088 if (hg == 1 && device->state.role == R_PRIMARY)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003089 rv = hg;
3090 break;
3091 case ASB_VIOLENTLY:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003092 rv = drbd_asb_recover_0p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003093 break;
3094 case ASB_DISCARD_SECONDARY:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003095 return device->state.role == R_PRIMARY ? 1 : -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003096 case ASB_CALL_HELPER:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003097 hg = drbd_asb_recover_0p(peer_device);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003098 if (hg == -1 && device->state.role == R_PRIMARY) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01003099 enum drbd_state_rv rv2;
3100
Philipp Reisnerb411b362009-09-25 16:07:19 -07003101 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
3102 * we might be here in C_WF_REPORT_PARAMS which is transient.
3103 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003104 rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01003105 if (rv2 != SS_SUCCESS) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003106 drbd_khelper(device, "pri-lost-after-sb");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003107 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003108 drbd_warn(device, "Successfully gave up primary role.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003109 rv = hg;
3110 }
3111 } else
3112 rv = hg;
3113 }
3114
3115 return rv;
3116}
3117
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003118/**
3119 * drbd_asb_recover_2p - Recover after split-brain with two remaining primaries
3120 */
3121static int drbd_asb_recover_2p(struct drbd_peer_device *peer_device) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003122{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003123 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01003124 int hg, rv = -100;
Philipp Reisner44ed1672011-04-19 17:10:19 +02003125 enum drbd_after_sb_p after_sb_2p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003126
Philipp Reisner44ed1672011-04-19 17:10:19 +02003127 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003128 after_sb_2p = rcu_dereference(peer_device->connection->net_conf)->after_sb_2p;
Philipp Reisner44ed1672011-04-19 17:10:19 +02003129 rcu_read_unlock();
3130 switch (after_sb_2p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003131 case ASB_DISCARD_YOUNGER_PRI:
3132 case ASB_DISCARD_OLDER_PRI:
3133 case ASB_DISCARD_LEAST_CHG:
3134 case ASB_DISCARD_LOCAL:
3135 case ASB_DISCARD_REMOTE:
3136 case ASB_CONSENSUS:
3137 case ASB_DISCARD_SECONDARY:
Philipp Reisner44ed1672011-04-19 17:10:19 +02003138 case ASB_DISCARD_ZERO_CHG:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003139 drbd_err(device, "Configuration error.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003140 break;
3141 case ASB_VIOLENTLY:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003142 rv = drbd_asb_recover_0p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003143 break;
3144 case ASB_DISCONNECT:
3145 break;
3146 case ASB_CALL_HELPER:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003147 hg = drbd_asb_recover_0p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003148 if (hg == -1) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01003149 enum drbd_state_rv rv2;
3150
Philipp Reisnerb411b362009-09-25 16:07:19 -07003151 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
3152 * we might be here in C_WF_REPORT_PARAMS which is transient.
3153 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003154 rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01003155 if (rv2 != SS_SUCCESS) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003156 drbd_khelper(device, "pri-lost-after-sb");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003157 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003158 drbd_warn(device, "Successfully gave up primary role.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003159 rv = hg;
3160 }
3161 } else
3162 rv = hg;
3163 }
3164
3165 return rv;
3166}
3167
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003168static void drbd_uuid_dump(struct drbd_device *device, char *text, u64 *uuid,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003169 u64 bits, u64 flags)
3170{
3171 if (!uuid) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003172 drbd_info(device, "%s uuid info vanished while I was looking!\n", text);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003173 return;
3174 }
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003175 drbd_info(device, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07003176 text,
3177 (unsigned long long)uuid[UI_CURRENT],
3178 (unsigned long long)uuid[UI_BITMAP],
3179 (unsigned long long)uuid[UI_HISTORY_START],
3180 (unsigned long long)uuid[UI_HISTORY_END],
3181 (unsigned long long)bits,
3182 (unsigned long long)flags);
3183}
3184
3185/*
3186 100 after split brain try auto recover
3187 2 C_SYNC_SOURCE set BitMap
3188 1 C_SYNC_SOURCE use BitMap
3189 0 no Sync
3190 -1 C_SYNC_TARGET use BitMap
3191 -2 C_SYNC_TARGET set BitMap
3192 -100 after split brain, disconnect
3193-1000 unrelated data
Philipp Reisner4a23f262011-01-11 17:42:17 +01003194-1091 requires proto 91
3195-1096 requires proto 96
Philipp Reisnerb411b362009-09-25 16:07:19 -07003196 */
Lars Ellenbergf2d3d752016-06-14 00:26:32 +02003197
3198static int drbd_uuid_compare(struct drbd_device *const device, enum drbd_role const peer_role, int *rule_nr) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003199{
Lars Ellenberg44a4d552013-11-22 12:40:58 +01003200 struct drbd_peer_device *const peer_device = first_peer_device(device);
3201 struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003202 u64 self, peer;
3203 int i, j;
3204
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003205 self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
3206 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003207
3208 *rule_nr = 10;
3209 if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED)
3210 return 0;
3211
3212 *rule_nr = 20;
3213 if ((self == UUID_JUST_CREATED || self == (u64)0) &&
3214 peer != UUID_JUST_CREATED)
3215 return -2;
3216
3217 *rule_nr = 30;
3218 if (self != UUID_JUST_CREATED &&
3219 (peer == UUID_JUST_CREATED || peer == (u64)0))
3220 return 2;
3221
3222 if (self == peer) {
3223 int rct, dc; /* roles at crash time */
3224
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003225 if (device->p_uuid[UI_BITMAP] == (u64)0 && device->ldev->md.uuid[UI_BITMAP] != (u64)0) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003226
Lars Ellenberg44a4d552013-11-22 12:40:58 +01003227 if (connection->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01003228 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003229
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003230 if ((device->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
3231 (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003232 drbd_info(device, "was SyncSource, missed the resync finished event, corrected myself:\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003233 drbd_uuid_move_history(device);
3234 device->ldev->md.uuid[UI_HISTORY_START] = device->ldev->md.uuid[UI_BITMAP];
3235 device->ldev->md.uuid[UI_BITMAP] = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003236
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003237 drbd_uuid_dump(device, "self", device->ldev->md.uuid,
3238 device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003239 *rule_nr = 34;
3240 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003241 drbd_info(device, "was SyncSource (peer failed to write sync_uuid)\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003242 *rule_nr = 36;
3243 }
3244
3245 return 1;
3246 }
3247
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003248 if (device->ldev->md.uuid[UI_BITMAP] == (u64)0 && device->p_uuid[UI_BITMAP] != (u64)0) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003249
Lars Ellenberg44a4d552013-11-22 12:40:58 +01003250 if (connection->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01003251 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003252
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003253 if ((device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_BITMAP] & ~((u64)1)) &&
3254 (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003255 drbd_info(device, "was SyncTarget, peer missed the resync finished event, corrected peer:\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003256
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003257 device->p_uuid[UI_HISTORY_START + 1] = device->p_uuid[UI_HISTORY_START];
3258 device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_BITMAP];
3259 device->p_uuid[UI_BITMAP] = 0UL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003260
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003261 drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003262 *rule_nr = 35;
3263 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003264 drbd_info(device, "was SyncTarget (failed to write sync_uuid)\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003265 *rule_nr = 37;
3266 }
3267
3268 return -1;
3269 }
3270
3271 /* Common power [off|failure] */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003272 rct = (test_bit(CRASHED_PRIMARY, &device->flags) ? 1 : 0) +
3273 (device->p_uuid[UI_FLAGS] & 2);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003274 /* lowest bit is set when we were primary,
3275 * next bit (weight 2) is set when peer was primary */
3276 *rule_nr = 40;
3277
Lars Ellenbergf2d3d752016-06-14 00:26:32 +02003278 /* Neither has the "crashed primary" flag set,
3279 * only a replication link hickup. */
3280 if (rct == 0)
3281 return 0;
3282
3283 /* Current UUID equal and no bitmap uuid; does not necessarily
3284 * mean this was a "simultaneous hard crash", maybe IO was
3285 * frozen, so no UUID-bump happened.
3286 * This is a protocol change, overload DRBD_FF_WSAME as flag
3287 * for "new-enough" peer DRBD version. */
3288 if (device->state.role == R_PRIMARY || peer_role == R_PRIMARY) {
3289 *rule_nr = 41;
3290 if (!(connection->agreed_features & DRBD_FF_WSAME)) {
3291 drbd_warn(peer_device, "Equivalent unrotated UUIDs, but current primary present.\n");
3292 return -(0x10000 | PRO_VERSION_MAX | (DRBD_FF_WSAME << 8));
3293 }
3294 if (device->state.role == R_PRIMARY && peer_role == R_PRIMARY) {
3295 /* At least one has the "crashed primary" bit set,
3296 * both are primary now, but neither has rotated its UUIDs?
3297 * "Can not happen." */
3298 drbd_err(peer_device, "Equivalent unrotated UUIDs, but both are primary. Can not resolve this.\n");
3299 return -100;
3300 }
3301 if (device->state.role == R_PRIMARY)
3302 return 1;
3303 return -1;
3304 }
3305
3306 /* Both are secondary.
3307 * Really looks like recovery from simultaneous hard crash.
3308 * Check which had been primary before, and arbitrate. */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003309 switch (rct) {
Lars Ellenbergf2d3d752016-06-14 00:26:32 +02003310 case 0: /* !self_pri && !peer_pri */ return 0; /* already handled */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003311 case 1: /* self_pri && !peer_pri */ return 1;
3312 case 2: /* !self_pri && peer_pri */ return -1;
3313 case 3: /* self_pri && peer_pri */
Lars Ellenberg44a4d552013-11-22 12:40:58 +01003314 dc = test_bit(RESOLVE_CONFLICTS, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003315 return dc ? -1 : 1;
3316 }
3317 }
3318
3319 *rule_nr = 50;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003320 peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003321 if (self == peer)
3322 return -1;
3323
3324 *rule_nr = 51;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003325 peer = device->p_uuid[UI_HISTORY_START] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003326 if (self == peer) {
Lars Ellenberg44a4d552013-11-22 12:40:58 +01003327 if (connection->agreed_pro_version < 96 ?
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003328 (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
3329 (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
3330 peer + UUID_NEW_BM_OFFSET == (device->p_uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003331 /* The last P_SYNC_UUID did not get though. Undo the last start of
3332 resync as sync source modifications of the peer's UUIDs. */
3333
Lars Ellenberg44a4d552013-11-22 12:40:58 +01003334 if (connection->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01003335 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003336
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003337 device->p_uuid[UI_BITMAP] = device->p_uuid[UI_HISTORY_START];
3338 device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_HISTORY_START + 1];
Philipp Reisner4a23f262011-01-11 17:42:17 +01003339
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003340 drbd_info(device, "Lost last syncUUID packet, corrected:\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003341 drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
Philipp Reisner4a23f262011-01-11 17:42:17 +01003342
Philipp Reisnerb411b362009-09-25 16:07:19 -07003343 return -1;
3344 }
3345 }
3346
3347 *rule_nr = 60;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003348 self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003349 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003350 peer = device->p_uuid[i] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003351 if (self == peer)
3352 return -2;
3353 }
3354
3355 *rule_nr = 70;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003356 self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
3357 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003358 if (self == peer)
3359 return 1;
3360
3361 *rule_nr = 71;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003362 self = device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003363 if (self == peer) {
Lars Ellenberg44a4d552013-11-22 12:40:58 +01003364 if (connection->agreed_pro_version < 96 ?
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003365 (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
3366 (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
3367 self + UUID_NEW_BM_OFFSET == (device->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003368 /* The last P_SYNC_UUID did not get though. Undo the last start of
3369 resync as sync source modifications of our UUIDs. */
3370
Lars Ellenberg44a4d552013-11-22 12:40:58 +01003371 if (connection->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01003372 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003373
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003374 __drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_HISTORY_START]);
3375 __drbd_uuid_set(device, UI_HISTORY_START, device->ldev->md.uuid[UI_HISTORY_START + 1]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003376
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003377 drbd_info(device, "Last syncUUID did not get through, corrected:\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003378 drbd_uuid_dump(device, "self", device->ldev->md.uuid,
3379 device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003380
3381 return 1;
3382 }
3383 }
3384
3385
3386 *rule_nr = 80;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003387 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003388 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003389 self = device->ldev->md.uuid[i] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003390 if (self == peer)
3391 return 2;
3392 }
3393
3394 *rule_nr = 90;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003395 self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
3396 peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003397 if (self == peer && self != ((u64)0))
3398 return 100;
3399
3400 *rule_nr = 100;
3401 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003402 self = device->ldev->md.uuid[i] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003403 for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003404 peer = device->p_uuid[j] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003405 if (self == peer)
3406 return -100;
3407 }
3408 }
3409
3410 return -1000;
3411}
3412
3413/* drbd_sync_handshake() returns the new conn state on success, or
3414 CONN_MASK (-1) on failure.
3415 */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003416static enum drbd_conns drbd_sync_handshake(struct drbd_peer_device *peer_device,
3417 enum drbd_role peer_role,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003418 enum drbd_disk_state peer_disk) __must_hold(local)
3419{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003420 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003421 enum drbd_conns rv = C_MASK;
3422 enum drbd_disk_state mydisk;
Philipp Reisner44ed1672011-04-19 17:10:19 +02003423 struct net_conf *nc;
Andreas Gruenbacher6dff2902011-06-28 14:18:12 +02003424 int hg, rule_nr, rr_conflict, tentative;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003425
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003426 mydisk = device->state.disk;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003427 if (mydisk == D_NEGOTIATING)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003428 mydisk = device->new_state_tmp.disk;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003429
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003430 drbd_info(device, "drbd_sync_handshake:\n");
Philipp Reisner9f2247b2012-08-16 14:25:58 +02003431
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003432 spin_lock_irq(&device->ldev->md.uuid_lock);
3433 drbd_uuid_dump(device, "self", device->ldev->md.uuid, device->comm_bm_set, 0);
3434 drbd_uuid_dump(device, "peer", device->p_uuid,
3435 device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003436
Lars Ellenbergf2d3d752016-06-14 00:26:32 +02003437 hg = drbd_uuid_compare(device, peer_role, &rule_nr);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003438 spin_unlock_irq(&device->ldev->md.uuid_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003439
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003440 drbd_info(device, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003441
3442 if (hg == -1000) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003443 drbd_alert(device, "Unrelated data, aborting!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003444 return C_MASK;
3445 }
Lars Ellenbergf2d3d752016-06-14 00:26:32 +02003446 if (hg < -0x10000) {
3447 int proto, fflags;
3448 hg = -hg;
3449 proto = hg & 0xff;
3450 fflags = (hg >> 8) & 0xff;
3451 drbd_alert(device, "To resolve this both sides have to support at least protocol %d and feature flags 0x%x\n",
3452 proto, fflags);
3453 return C_MASK;
3454 }
Philipp Reisner4a23f262011-01-11 17:42:17 +01003455 if (hg < -1000) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003456 drbd_alert(device, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003457 return C_MASK;
3458 }
3459
3460 if ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) ||
3461 (peer_disk == D_INCONSISTENT && mydisk > D_INCONSISTENT)) {
3462 int f = (hg == -100) || abs(hg) == 2;
3463 hg = mydisk > D_INCONSISTENT ? 1 : -1;
3464 if (f)
3465 hg = hg*2;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003466 drbd_info(device, "Becoming sync %s due to disk states.\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07003467 hg > 0 ? "source" : "target");
3468 }
3469
Adam Gandelman3a11a482010-04-08 16:48:23 -07003470 if (abs(hg) == 100)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003471 drbd_khelper(device, "initial-split-brain");
Adam Gandelman3a11a482010-04-08 16:48:23 -07003472
Philipp Reisner44ed1672011-04-19 17:10:19 +02003473 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003474 nc = rcu_dereference(peer_device->connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +02003475
3476 if (hg == 100 || (hg == -100 && nc->always_asbp)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003477 int pcount = (device->state.role == R_PRIMARY)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003478 + (peer_role == R_PRIMARY);
3479 int forced = (hg == -100);
3480
3481 switch (pcount) {
3482 case 0:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003483 hg = drbd_asb_recover_0p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003484 break;
3485 case 1:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003486 hg = drbd_asb_recover_1p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003487 break;
3488 case 2:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003489 hg = drbd_asb_recover_2p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003490 break;
3491 }
3492 if (abs(hg) < 100) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003493 drbd_warn(device, "Split-Brain detected, %d primaries, "
Philipp Reisnerb411b362009-09-25 16:07:19 -07003494 "automatically solved. Sync from %s node\n",
3495 pcount, (hg < 0) ? "peer" : "this");
3496 if (forced) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003497 drbd_warn(device, "Doing a full sync, since"
Philipp Reisnerb411b362009-09-25 16:07:19 -07003498 " UUIDs where ambiguous.\n");
3499 hg = hg*2;
3500 }
3501 }
3502 }
3503
3504 if (hg == -100) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003505 if (test_bit(DISCARD_MY_DATA, &device->flags) && !(device->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003506 hg = -1;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003507 if (!test_bit(DISCARD_MY_DATA, &device->flags) && (device->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003508 hg = 1;
3509
3510 if (abs(hg) < 100)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003511 drbd_warn(device, "Split-Brain detected, manually solved. "
Philipp Reisnerb411b362009-09-25 16:07:19 -07003512 "Sync from %s node\n",
3513 (hg < 0) ? "peer" : "this");
3514 }
Philipp Reisner44ed1672011-04-19 17:10:19 +02003515 rr_conflict = nc->rr_conflict;
Andreas Gruenbacher6dff2902011-06-28 14:18:12 +02003516 tentative = nc->tentative;
Philipp Reisner44ed1672011-04-19 17:10:19 +02003517 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07003518
3519 if (hg == -100) {
Lars Ellenberg580b9762010-02-26 23:15:23 +01003520 /* FIXME this log message is not correct if we end up here
3521 * after an attempted attach on a diskless node.
3522 * We just refuse to attach -- well, we drop the "connection"
3523 * to that disk, in a way... */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003524 drbd_alert(device, "Split-Brain detected but unresolved, dropping connection!\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003525 drbd_khelper(device, "split-brain");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003526 return C_MASK;
3527 }
3528
3529 if (hg > 0 && mydisk <= D_INCONSISTENT) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003530 drbd_err(device, "I shall become SyncSource, but I am inconsistent!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003531 return C_MASK;
3532 }
3533
3534 if (hg < 0 && /* by intention we do not use mydisk here. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003535 device->state.role == R_PRIMARY && device->state.disk >= D_CONSISTENT) {
Philipp Reisner44ed1672011-04-19 17:10:19 +02003536 switch (rr_conflict) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003537 case ASB_CALL_HELPER:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003538 drbd_khelper(device, "pri-lost");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003539 /* fall through */
3540 case ASB_DISCONNECT:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003541 drbd_err(device, "I shall become SyncTarget, but I am primary!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003542 return C_MASK;
3543 case ASB_VIOLENTLY:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003544 drbd_warn(device, "Becoming SyncTarget, violating the stable-data"
Philipp Reisnerb411b362009-09-25 16:07:19 -07003545 "assumption\n");
3546 }
3547 }
3548
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003549 if (tentative || test_bit(CONN_DRY_RUN, &peer_device->connection->flags)) {
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003550 if (hg == 0)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003551 drbd_info(device, "dry-run connect: No resync, would become Connected immediately.\n");
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003552 else
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003553 drbd_info(device, "dry-run connect: Would become %s, doing a %s resync.",
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003554 drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET),
3555 abs(hg) >= 2 ? "full" : "bit-map based");
3556 return C_MASK;
3557 }
3558
Philipp Reisnerb411b362009-09-25 16:07:19 -07003559 if (abs(hg) >= 2) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003560 drbd_info(device, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003561 if (drbd_bitmap_io(device, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003562 BM_LOCKED_SET_ALLOWED))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003563 return C_MASK;
3564 }
3565
3566 if (hg > 0) { /* become sync source. */
3567 rv = C_WF_BITMAP_S;
3568 } else if (hg < 0) { /* become sync target */
3569 rv = C_WF_BITMAP_T;
3570 } else {
3571 rv = C_CONNECTED;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003572 if (drbd_bm_total_weight(device)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003573 drbd_info(device, "No resync, but %lu bits in bitmap!\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003574 drbd_bm_total_weight(device));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003575 }
3576 }
3577
3578 return rv;
3579}
3580
Philipp Reisnerf179d762011-05-16 17:31:47 +02003581static enum drbd_after_sb_p convert_after_sb(enum drbd_after_sb_p peer)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003582{
3583 /* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */
Philipp Reisnerf179d762011-05-16 17:31:47 +02003584 if (peer == ASB_DISCARD_REMOTE)
3585 return ASB_DISCARD_LOCAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003586
3587 /* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */
Philipp Reisnerf179d762011-05-16 17:31:47 +02003588 if (peer == ASB_DISCARD_LOCAL)
3589 return ASB_DISCARD_REMOTE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003590
3591 /* everything else is valid if they are equal on both sides. */
Philipp Reisnerf179d762011-05-16 17:31:47 +02003592 return peer;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003593}
3594
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003595static int receive_protocol(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003596{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003597 struct p_protocol *p = pi->data;
Philipp Reisner036b17e2011-05-16 17:38:11 +02003598 enum drbd_after_sb_p p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
3599 int p_proto, p_discard_my_data, p_two_primaries, cf;
3600 struct net_conf *nc, *old_net_conf, *new_net_conf = NULL;
3601 char integrity_alg[SHARED_SECRET_MAX] = "";
Herbert Xu9534d672016-01-24 21:19:21 +08003602 struct crypto_ahash *peer_integrity_tfm = NULL;
Philipp Reisner7aca6c72011-05-17 10:12:56 +02003603 void *int_dig_in = NULL, *int_dig_vv = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003604
Philipp Reisnerb411b362009-09-25 16:07:19 -07003605 p_proto = be32_to_cpu(p->protocol);
3606 p_after_sb_0p = be32_to_cpu(p->after_sb_0p);
3607 p_after_sb_1p = be32_to_cpu(p->after_sb_1p);
3608 p_after_sb_2p = be32_to_cpu(p->after_sb_2p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003609 p_two_primaries = be32_to_cpu(p->two_primaries);
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003610 cf = be32_to_cpu(p->conn_flags);
Andreas Gruenbacher6139f602011-05-06 20:00:02 +02003611 p_discard_my_data = cf & CF_DISCARD_MY_DATA;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003612
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003613 if (connection->agreed_pro_version >= 87) {
Andreas Gruenbacher86db0612011-04-28 15:24:18 +02003614 int err;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003615
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02003616 if (pi->size > sizeof(integrity_alg))
Andreas Gruenbacher86db0612011-04-28 15:24:18 +02003617 return -EIO;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003618 err = drbd_recv_all(connection, integrity_alg, pi->size);
Andreas Gruenbacher86db0612011-04-28 15:24:18 +02003619 if (err)
3620 return err;
Philipp Reisner036b17e2011-05-16 17:38:11 +02003621 integrity_alg[SHARED_SECRET_MAX - 1] = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003622 }
3623
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003624 if (pi->cmd != P_PROTOCOL_UPDATE) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003625 clear_bit(CONN_DRY_RUN, &connection->flags);
Philipp Reisner036b17e2011-05-16 17:38:11 +02003626
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003627 if (cf & CF_DRY_RUN)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003628 set_bit(CONN_DRY_RUN, &connection->flags);
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003629
3630 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003631 nc = rcu_dereference(connection->net_conf);
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003632
3633 if (p_proto != nc->wire_protocol) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003634 drbd_err(connection, "incompatible %s settings\n", "protocol");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003635 goto disconnect_rcu_unlock;
3636 }
3637
3638 if (convert_after_sb(p_after_sb_0p) != nc->after_sb_0p) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003639 drbd_err(connection, "incompatible %s settings\n", "after-sb-0pri");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003640 goto disconnect_rcu_unlock;
3641 }
3642
3643 if (convert_after_sb(p_after_sb_1p) != nc->after_sb_1p) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003644 drbd_err(connection, "incompatible %s settings\n", "after-sb-1pri");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003645 goto disconnect_rcu_unlock;
3646 }
3647
3648 if (convert_after_sb(p_after_sb_2p) != nc->after_sb_2p) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003649 drbd_err(connection, "incompatible %s settings\n", "after-sb-2pri");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003650 goto disconnect_rcu_unlock;
3651 }
3652
3653 if (p_discard_my_data && nc->discard_my_data) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003654 drbd_err(connection, "incompatible %s settings\n", "discard-my-data");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003655 goto disconnect_rcu_unlock;
3656 }
3657
3658 if (p_two_primaries != nc->two_primaries) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003659 drbd_err(connection, "incompatible %s settings\n", "allow-two-primaries");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003660 goto disconnect_rcu_unlock;
3661 }
3662
3663 if (strcmp(integrity_alg, nc->integrity_alg)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003664 drbd_err(connection, "incompatible %s settings\n", "data-integrity-alg");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003665 goto disconnect_rcu_unlock;
3666 }
3667
3668 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07003669 }
3670
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003671 if (integrity_alg[0]) {
3672 int hash_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003673
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003674 /*
3675 * We can only change the peer data integrity algorithm
3676 * here. Changing our own data integrity algorithm
3677 * requires that we send a P_PROTOCOL_UPDATE packet at
3678 * the same time; otherwise, the peer has no way to
3679 * tell between which packets the algorithm should
3680 * change.
3681 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003682
Herbert Xu9534d672016-01-24 21:19:21 +08003683 peer_integrity_tfm = crypto_alloc_ahash(integrity_alg, 0, CRYPTO_ALG_ASYNC);
Lars Ellenberg1b57e662016-06-14 00:26:39 +02003684 if (IS_ERR(peer_integrity_tfm)) {
3685 peer_integrity_tfm = NULL;
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003686 drbd_err(connection, "peer data-integrity-alg %s not supported\n",
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003687 integrity_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003688 goto disconnect;
3689 }
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003690
Herbert Xu9534d672016-01-24 21:19:21 +08003691 hash_size = crypto_ahash_digestsize(peer_integrity_tfm);
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003692 int_dig_in = kmalloc(hash_size, GFP_KERNEL);
3693 int_dig_vv = kmalloc(hash_size, GFP_KERNEL);
3694 if (!(int_dig_in && int_dig_vv)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003695 drbd_err(connection, "Allocation of buffers for data integrity checking failed\n");
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003696 goto disconnect;
3697 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003698 }
3699
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003700 new_net_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL);
3701 if (!new_net_conf) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003702 drbd_err(connection, "Allocation of new net_conf failed\n");
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003703 goto disconnect;
3704 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003705
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003706 mutex_lock(&connection->data.mutex);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003707 mutex_lock(&connection->resource->conf_update);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003708 old_net_conf = connection->net_conf;
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003709 *new_net_conf = *old_net_conf;
3710
3711 new_net_conf->wire_protocol = p_proto;
3712 new_net_conf->after_sb_0p = convert_after_sb(p_after_sb_0p);
3713 new_net_conf->after_sb_1p = convert_after_sb(p_after_sb_1p);
3714 new_net_conf->after_sb_2p = convert_after_sb(p_after_sb_2p);
3715 new_net_conf->two_primaries = p_two_primaries;
3716
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003717 rcu_assign_pointer(connection->net_conf, new_net_conf);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003718 mutex_unlock(&connection->resource->conf_update);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003719 mutex_unlock(&connection->data.mutex);
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003720
Herbert Xu9534d672016-01-24 21:19:21 +08003721 crypto_free_ahash(connection->peer_integrity_tfm);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003722 kfree(connection->int_dig_in);
3723 kfree(connection->int_dig_vv);
3724 connection->peer_integrity_tfm = peer_integrity_tfm;
3725 connection->int_dig_in = int_dig_in;
3726 connection->int_dig_vv = int_dig_vv;
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003727
3728 if (strcmp(old_net_conf->integrity_alg, integrity_alg))
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003729 drbd_info(connection, "peer data-integrity-alg: %s\n",
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003730 integrity_alg[0] ? integrity_alg : "(none)");
3731
3732 synchronize_rcu();
3733 kfree(old_net_conf);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003734 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003735
Philipp Reisner44ed1672011-04-19 17:10:19 +02003736disconnect_rcu_unlock:
3737 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07003738disconnect:
Herbert Xu9534d672016-01-24 21:19:21 +08003739 crypto_free_ahash(peer_integrity_tfm);
Philipp Reisner036b17e2011-05-16 17:38:11 +02003740 kfree(int_dig_in);
3741 kfree(int_dig_vv);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003742 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003743 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003744}
3745
3746/* helper function
3747 * input: alg name, feature name
3748 * return: NULL (alg name was "")
3749 * ERR_PTR(error) if something goes wrong
3750 * or the crypto hash ptr, if it worked out ok. */
Herbert Xu9534d672016-01-24 21:19:21 +08003751static struct crypto_ahash *drbd_crypto_alloc_digest_safe(const struct drbd_device *device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003752 const char *alg, const char *name)
3753{
Herbert Xu9534d672016-01-24 21:19:21 +08003754 struct crypto_ahash *tfm;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003755
3756 if (!alg[0])
3757 return NULL;
3758
Herbert Xu9534d672016-01-24 21:19:21 +08003759 tfm = crypto_alloc_ahash(alg, 0, CRYPTO_ALG_ASYNC);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003760 if (IS_ERR(tfm)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003761 drbd_err(device, "Can not allocate \"%s\" as %s (reason: %ld)\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07003762 alg, name, PTR_ERR(tfm));
3763 return tfm;
3764 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003765 return tfm;
3766}
3767
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003768static int ignore_remaining_packet(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003769{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003770 void *buffer = connection->data.rbuf;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003771 int size = pi->size;
3772
3773 while (size) {
3774 int s = min_t(int, size, DRBD_SOCKET_BUFFER_SIZE);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003775 s = drbd_recv(connection, buffer, s);
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003776 if (s <= 0) {
3777 if (s < 0)
3778 return s;
3779 break;
3780 }
3781 size -= s;
3782 }
3783 if (size)
3784 return -EIO;
3785 return 0;
3786}
3787
3788/*
3789 * config_unknown_volume - device configuration command for unknown volume
3790 *
3791 * When a device is added to an existing connection, the node on which the
3792 * device is added first will send configuration commands to its peer but the
3793 * peer will not know about the device yet. It will warn and ignore these
3794 * commands. Once the device is added on the second node, the second node will
3795 * send the same device configuration commands, but in the other direction.
3796 *
3797 * (We can also end up here if drbd is misconfigured.)
3798 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003799static int config_unknown_volume(struct drbd_connection *connection, struct packet_info *pi)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003800{
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003801 drbd_warn(connection, "%s packet received for volume %u, which is not configured locally\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02003802 cmdname(pi->cmd), pi->vnr);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003803 return ignore_remaining_packet(connection, pi);
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003804}
3805
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003806static int receive_SyncParam(struct drbd_connection *connection, struct packet_info *pi)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003807{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003808 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003809 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003810 struct p_rs_param_95 *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003811 unsigned int header_size, data_size, exp_max_sz;
Herbert Xu9534d672016-01-24 21:19:21 +08003812 struct crypto_ahash *verify_tfm = NULL;
3813 struct crypto_ahash *csums_tfm = NULL;
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003814 struct net_conf *old_net_conf, *new_net_conf = NULL;
Philipp Reisner813472c2011-05-03 16:47:02 +02003815 struct disk_conf *old_disk_conf = NULL, *new_disk_conf = NULL;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003816 const int apv = connection->agreed_pro_version;
Philipp Reisner813472c2011-05-03 16:47:02 +02003817 struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
Philipp Reisner778f2712010-07-06 11:14:00 +02003818 int fifo_size = 0;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003819 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003820
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003821 peer_device = conn_peer_device(connection, pi->vnr);
3822 if (!peer_device)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003823 return config_unknown_volume(connection, pi);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003824 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003825
3826 exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param)
3827 : apv == 88 ? sizeof(struct p_rs_param)
3828 + SHARED_SECRET_MAX
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003829 : apv <= 94 ? sizeof(struct p_rs_param_89)
3830 : /* apv >= 95 */ sizeof(struct p_rs_param_95);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003831
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003832 if (pi->size > exp_max_sz) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003833 drbd_err(device, "SyncParam packet too long: received %u, expected <= %u bytes\n",
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003834 pi->size, exp_max_sz);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003835 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003836 }
3837
3838 if (apv <= 88) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003839 header_size = sizeof(struct p_rs_param);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003840 data_size = pi->size - header_size;
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003841 } else if (apv <= 94) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003842 header_size = sizeof(struct p_rs_param_89);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003843 data_size = pi->size - header_size;
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02003844 D_ASSERT(device, data_size == 0);
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003845 } else {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003846 header_size = sizeof(struct p_rs_param_95);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003847 data_size = pi->size - header_size;
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02003848 D_ASSERT(device, data_size == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003849 }
3850
3851 /* initialize verify_alg and csums_alg */
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003852 p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003853 memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
3854
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003855 err = drbd_recv_all(peer_device->connection, p, header_size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003856 if (err)
3857 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003858
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003859 mutex_lock(&connection->resource->conf_update);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003860 old_net_conf = peer_device->connection->net_conf;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003861 if (get_ldev(device)) {
Philipp Reisner813472c2011-05-03 16:47:02 +02003862 new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3863 if (!new_disk_conf) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003864 put_ldev(device);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003865 mutex_unlock(&connection->resource->conf_update);
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003866 drbd_err(device, "Allocation of new disk_conf failed\n");
Philipp Reisner813472c2011-05-03 16:47:02 +02003867 return -ENOMEM;
3868 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003869
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003870 old_disk_conf = device->ldev->disk_conf;
Philipp Reisner813472c2011-05-03 16:47:02 +02003871 *new_disk_conf = *old_disk_conf;
3872
Andreas Gruenbacher6394b932011-05-11 14:29:52 +02003873 new_disk_conf->resync_rate = be32_to_cpu(p->resync_rate);
Philipp Reisner813472c2011-05-03 16:47:02 +02003874 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003875
3876 if (apv >= 88) {
3877 if (apv == 88) {
Philipp Reisner5de73822012-03-28 10:17:32 +02003878 if (data_size > SHARED_SECRET_MAX || data_size == 0) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003879 drbd_err(device, "verify-alg of wrong size, "
Philipp Reisner5de73822012-03-28 10:17:32 +02003880 "peer wants %u, accepting only up to %u byte\n",
3881 data_size, SHARED_SECRET_MAX);
Philipp Reisner813472c2011-05-03 16:47:02 +02003882 err = -EIO;
3883 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003884 }
3885
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003886 err = drbd_recv_all(peer_device->connection, p->verify_alg, data_size);
Philipp Reisner813472c2011-05-03 16:47:02 +02003887 if (err)
3888 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003889 /* we expect NUL terminated string */
3890 /* but just in case someone tries to be evil */
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02003891 D_ASSERT(device, p->verify_alg[data_size-1] == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003892 p->verify_alg[data_size-1] = 0;
3893
3894 } else /* apv >= 89 */ {
3895 /* we still expect NUL terminated strings */
3896 /* but just in case someone tries to be evil */
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02003897 D_ASSERT(device, p->verify_alg[SHARED_SECRET_MAX-1] == 0);
3898 D_ASSERT(device, p->csums_alg[SHARED_SECRET_MAX-1] == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003899 p->verify_alg[SHARED_SECRET_MAX-1] = 0;
3900 p->csums_alg[SHARED_SECRET_MAX-1] = 0;
3901 }
3902
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003903 if (strcmp(old_net_conf->verify_alg, p->verify_alg)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003904 if (device->state.conn == C_WF_REPORT_PARAMS) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003905 drbd_err(device, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003906 old_net_conf->verify_alg, p->verify_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003907 goto disconnect;
3908 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003909 verify_tfm = drbd_crypto_alloc_digest_safe(device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003910 p->verify_alg, "verify-alg");
3911 if (IS_ERR(verify_tfm)) {
3912 verify_tfm = NULL;
3913 goto disconnect;
3914 }
3915 }
3916
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003917 if (apv >= 89 && strcmp(old_net_conf->csums_alg, p->csums_alg)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003918 if (device->state.conn == C_WF_REPORT_PARAMS) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003919 drbd_err(device, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003920 old_net_conf->csums_alg, p->csums_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003921 goto disconnect;
3922 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003923 csums_tfm = drbd_crypto_alloc_digest_safe(device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003924 p->csums_alg, "csums-alg");
3925 if (IS_ERR(csums_tfm)) {
3926 csums_tfm = NULL;
3927 goto disconnect;
3928 }
3929 }
3930
Philipp Reisner813472c2011-05-03 16:47:02 +02003931 if (apv > 94 && new_disk_conf) {
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003932 new_disk_conf->c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
3933 new_disk_conf->c_delay_target = be32_to_cpu(p->c_delay_target);
3934 new_disk_conf->c_fill_target = be32_to_cpu(p->c_fill_target);
3935 new_disk_conf->c_max_rate = be32_to_cpu(p->c_max_rate);
Philipp Reisner778f2712010-07-06 11:14:00 +02003936
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003937 fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003938 if (fifo_size != device->rs_plan_s->size) {
Philipp Reisner813472c2011-05-03 16:47:02 +02003939 new_plan = fifo_alloc(fifo_size);
3940 if (!new_plan) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003941 drbd_err(device, "kmalloc of fifo_buffer failed");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003942 put_ldev(device);
Philipp Reisner778f2712010-07-06 11:14:00 +02003943 goto disconnect;
3944 }
3945 }
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003946 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003947
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003948 if (verify_tfm || csums_tfm) {
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003949 new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
3950 if (!new_net_conf) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003951 drbd_err(device, "Allocation of new net_conf failed\n");
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003952 goto disconnect;
3953 }
3954
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003955 *new_net_conf = *old_net_conf;
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003956
3957 if (verify_tfm) {
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003958 strcpy(new_net_conf->verify_alg, p->verify_alg);
3959 new_net_conf->verify_alg_len = strlen(p->verify_alg) + 1;
Herbert Xu9534d672016-01-24 21:19:21 +08003960 crypto_free_ahash(peer_device->connection->verify_tfm);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003961 peer_device->connection->verify_tfm = verify_tfm;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003962 drbd_info(device, "using verify-alg: \"%s\"\n", p->verify_alg);
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003963 }
3964 if (csums_tfm) {
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003965 strcpy(new_net_conf->csums_alg, p->csums_alg);
3966 new_net_conf->csums_alg_len = strlen(p->csums_alg) + 1;
Herbert Xu9534d672016-01-24 21:19:21 +08003967 crypto_free_ahash(peer_device->connection->csums_tfm);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003968 peer_device->connection->csums_tfm = csums_tfm;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003969 drbd_info(device, "using csums-alg: \"%s\"\n", p->csums_alg);
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003970 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003971 rcu_assign_pointer(connection->net_conf, new_net_conf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003972 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003973 }
3974
Philipp Reisner813472c2011-05-03 16:47:02 +02003975 if (new_disk_conf) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003976 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
3977 put_ldev(device);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003978 }
Philipp Reisner813472c2011-05-03 16:47:02 +02003979
3980 if (new_plan) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003981 old_plan = device->rs_plan_s;
3982 rcu_assign_pointer(device->rs_plan_s, new_plan);
Philipp Reisner813472c2011-05-03 16:47:02 +02003983 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003984
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003985 mutex_unlock(&connection->resource->conf_update);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003986 synchronize_rcu();
3987 if (new_net_conf)
3988 kfree(old_net_conf);
3989 kfree(old_disk_conf);
Philipp Reisner813472c2011-05-03 16:47:02 +02003990 kfree(old_plan);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003991
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003992 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003993
Philipp Reisner813472c2011-05-03 16:47:02 +02003994reconnect:
3995 if (new_disk_conf) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003996 put_ldev(device);
Philipp Reisner813472c2011-05-03 16:47:02 +02003997 kfree(new_disk_conf);
3998 }
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003999 mutex_unlock(&connection->resource->conf_update);
Philipp Reisner813472c2011-05-03 16:47:02 +02004000 return -EIO;
4001
Philipp Reisnerb411b362009-09-25 16:07:19 -07004002disconnect:
Philipp Reisner813472c2011-05-03 16:47:02 +02004003 kfree(new_plan);
4004 if (new_disk_conf) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004005 put_ldev(device);
Philipp Reisner813472c2011-05-03 16:47:02 +02004006 kfree(new_disk_conf);
4007 }
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004008 mutex_unlock(&connection->resource->conf_update);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004009 /* just for completeness: actually not needed,
4010 * as this is not reached if csums_tfm was ok. */
Herbert Xu9534d672016-01-24 21:19:21 +08004011 crypto_free_ahash(csums_tfm);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004012 /* but free the verify_tfm again, if csums_tfm did not work out */
Herbert Xu9534d672016-01-24 21:19:21 +08004013 crypto_free_ahash(verify_tfm);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004014 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004015 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004016}
4017
Philipp Reisnerb411b362009-09-25 16:07:19 -07004018/* warn if the arguments differ by more than 12.5% */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004019static void warn_if_differ_considerably(struct drbd_device *device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07004020 const char *s, sector_t a, sector_t b)
4021{
4022 sector_t d;
4023 if (a == 0 || b == 0)
4024 return;
4025 d = (a > b) ? (a - b) : (b - a);
4026 if (d > (a>>3) || d > (b>>3))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004027 drbd_warn(device, "Considerable difference in %s: %llus vs. %llus\n", s,
Philipp Reisnerb411b362009-09-25 16:07:19 -07004028 (unsigned long long)a, (unsigned long long)b);
4029}
4030
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004031static int receive_sizes(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004032{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004033 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004034 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004035 struct p_sizes *p = pi->data;
Lars Ellenberg9104d312016-06-14 00:26:31 +02004036 struct o_qlim *o = (connection->agreed_features & DRBD_FF_WSAME) ? p->qlim : NULL;
Philipp Reisnere96c9632013-06-25 16:50:07 +02004037 enum determine_dev_size dd = DS_UNCHANGED;
Lars Ellenberg6a8d68b2014-03-18 12:22:14 +01004038 sector_t p_size, p_usize, p_csize, my_usize;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004039 int ldsc = 0; /* local disk size changed */
Philipp Reisnere89b5912010-03-24 17:11:33 +01004040 enum dds_flags ddsf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004041
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004042 peer_device = conn_peer_device(connection, pi->vnr);
4043 if (!peer_device)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004044 return config_unknown_volume(connection, pi);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004045 device = peer_device->device;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004046
Philipp Reisnerb411b362009-09-25 16:07:19 -07004047 p_size = be64_to_cpu(p->d_size);
4048 p_usize = be64_to_cpu(p->u_size);
Lars Ellenberg6a8d68b2014-03-18 12:22:14 +01004049 p_csize = be64_to_cpu(p->c_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004050
Philipp Reisnerb411b362009-09-25 16:07:19 -07004051 /* just store the peer's disk size for now.
4052 * we still need to figure out whether we accept that. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004053 device->p_size = p_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004054
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004055 if (get_ldev(device)) {
Lars Ellenberg60bac042016-06-14 00:26:30 +02004056 sector_t new_size, cur_size;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02004057 rcu_read_lock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004058 my_usize = rcu_dereference(device->ldev->disk_conf)->disk_size;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02004059 rcu_read_unlock();
4060
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004061 warn_if_differ_considerably(device, "lower level device sizes",
4062 p_size, drbd_get_max_capacity(device->ldev));
4063 warn_if_differ_considerably(device, "user requested size",
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02004064 p_usize, my_usize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004065
4066 /* if this is the first connect, or an otherwise expected
4067 * param exchange, choose the minimum */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004068 if (device->state.conn == C_WF_REPORT_PARAMS)
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02004069 p_usize = min_not_zero(my_usize, p_usize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004070
4071 /* Never shrink a device with usable data during connect.
4072 But allow online shrinking if we are connected. */
Lars Ellenberg60bac042016-06-14 00:26:30 +02004073 new_size = drbd_new_dev_size(device, device->ldev, p_usize, 0);
4074 cur_size = drbd_get_capacity(device->this_bdev);
4075 if (new_size < cur_size &&
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004076 device->state.disk >= D_OUTDATED &&
4077 device->state.conn < C_CONNECTED) {
Lars Ellenberg60bac042016-06-14 00:26:30 +02004078 drbd_err(device, "The peer's disk size is too small! (%llu < %llu sectors)\n",
4079 (unsigned long long)new_size, (unsigned long long)cur_size);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004080 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004081 put_ldev(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004082 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004083 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02004084
4085 if (my_usize != p_usize) {
4086 struct disk_conf *old_disk_conf, *new_disk_conf = NULL;
4087
4088 new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
4089 if (!new_disk_conf) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004090 drbd_err(device, "Allocation of new disk_conf failed\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004091 put_ldev(device);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02004092 return -ENOMEM;
4093 }
4094
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004095 mutex_lock(&connection->resource->conf_update);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004096 old_disk_conf = device->ldev->disk_conf;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02004097 *new_disk_conf = *old_disk_conf;
4098 new_disk_conf->disk_size = p_usize;
4099
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004100 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004101 mutex_unlock(&connection->resource->conf_update);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02004102 synchronize_rcu();
4103 kfree(old_disk_conf);
4104
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004105 drbd_info(device, "Peer sets u_size to %lu sectors\n",
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02004106 (unsigned long)my_usize);
4107 }
4108
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004109 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004110 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004111
Lars Ellenberg20c68fd2014-04-28 18:43:25 +02004112 device->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
Lars Ellenbergdd4f6992016-06-14 00:26:20 +02004113 /* Leave drbd_reconsider_queue_parameters() before drbd_determine_dev_size().
Lars Ellenberg20c68fd2014-04-28 18:43:25 +02004114 In case we cleared the QUEUE_FLAG_DISCARD from our queue in
Lars Ellenbergdd4f6992016-06-14 00:26:20 +02004115 drbd_reconsider_queue_parameters(), we can be sure that after
Lars Ellenberg20c68fd2014-04-28 18:43:25 +02004116 drbd_determine_dev_size() no REQ_DISCARDs are in the queue. */
4117
Philipp Reisnere89b5912010-03-24 17:11:33 +01004118 ddsf = be16_to_cpu(p->dds_flags);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004119 if (get_ldev(device)) {
Lars Ellenberg9104d312016-06-14 00:26:31 +02004120 drbd_reconsider_queue_parameters(device, device->ldev, o);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004121 dd = drbd_determine_dev_size(device, ddsf, NULL);
4122 put_ldev(device);
Philipp Reisnere96c9632013-06-25 16:50:07 +02004123 if (dd == DS_ERROR)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004124 return -EIO;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004125 drbd_md_sync(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004126 } else {
Lars Ellenberg6a8d68b2014-03-18 12:22:14 +01004127 /*
4128 * I am diskless, need to accept the peer's *current* size.
4129 * I must NOT accept the peers backing disk size,
4130 * it may have been larger than mine all along...
4131 *
4132 * At this point, the peer knows more about my disk, or at
4133 * least about what we last agreed upon, than myself.
4134 * So if his c_size is less than his d_size, the most likely
4135 * reason is that *my* d_size was smaller last time we checked.
4136 *
4137 * However, if he sends a zero current size,
4138 * take his (user-capped or) backing disk size anyways.
4139 */
Lars Ellenberg9104d312016-06-14 00:26:31 +02004140 drbd_reconsider_queue_parameters(device, NULL, o);
Lars Ellenberg6a8d68b2014-03-18 12:22:14 +01004141 drbd_set_my_capacity(device, p_csize ?: p_usize ?: p_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004142 }
4143
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004144 if (get_ldev(device)) {
4145 if (device->ldev->known_size != drbd_get_capacity(device->ldev->backing_bdev)) {
4146 device->ldev->known_size = drbd_get_capacity(device->ldev->backing_bdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004147 ldsc = 1;
4148 }
4149
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004150 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004151 }
4152
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004153 if (device->state.conn > C_WF_REPORT_PARAMS) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004154 if (be64_to_cpu(p->c_size) !=
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004155 drbd_get_capacity(device->this_bdev) || ldsc) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004156 /* we have different sizes, probably peer
4157 * needs to know my new size... */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004158 drbd_send_sizes(peer_device, 0, ddsf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004159 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004160 if (test_and_clear_bit(RESIZE_PENDING, &device->flags) ||
4161 (dd == DS_GREW && device->state.conn == C_CONNECTED)) {
4162 if (device->state.pdsk >= D_INCONSISTENT &&
4163 device->state.disk >= D_INCONSISTENT) {
Philipp Reisnere89b5912010-03-24 17:11:33 +01004164 if (ddsf & DDSF_NO_RESYNC)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004165 drbd_info(device, "Resync of new storage suppressed with --assume-clean\n");
Philipp Reisnere89b5912010-03-24 17:11:33 +01004166 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004167 resync_after_online_grow(device);
Philipp Reisnere89b5912010-03-24 17:11:33 +01004168 } else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004169 set_bit(RESYNC_AFTER_NEG, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004170 }
4171 }
4172
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004173 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004174}
4175
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004176static int receive_uuids(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004177{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004178 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004179 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004180 struct p_uuids *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004181 u64 *p_uuid;
Lars Ellenberg62b0da32011-01-20 13:25:21 +01004182 int i, updated_uuids = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004183
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004184 peer_device = conn_peer_device(connection, pi->vnr);
4185 if (!peer_device)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004186 return config_unknown_volume(connection, pi);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004187 device = peer_device->device;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004188
Philipp Reisnerb411b362009-09-25 16:07:19 -07004189 p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO);
Jing Wang063eacf2012-10-25 15:00:56 +08004190 if (!p_uuid) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004191 drbd_err(device, "kmalloc of p_uuid failed\n");
Jing Wang063eacf2012-10-25 15:00:56 +08004192 return false;
4193 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004194
4195 for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
4196 p_uuid[i] = be64_to_cpu(p->uuid[i]);
4197
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004198 kfree(device->p_uuid);
4199 device->p_uuid = p_uuid;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004200
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004201 if (device->state.conn < C_CONNECTED &&
4202 device->state.disk < D_INCONSISTENT &&
4203 device->state.role == R_PRIMARY &&
4204 (device->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004205 drbd_err(device, "Can only connect to data with current UUID=%016llX\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004206 (unsigned long long)device->ed_uuid);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004207 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004208 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004209 }
4210
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004211 if (get_ldev(device)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004212 int skip_initial_sync =
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004213 device->state.conn == C_CONNECTED &&
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004214 peer_device->connection->agreed_pro_version >= 90 &&
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004215 device->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07004216 (p_uuid[UI_FLAGS] & 8);
4217 if (skip_initial_sync) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004218 drbd_info(device, "Accepted new current UUID, preparing to skip initial sync\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004219 drbd_bitmap_io(device, &drbd_bmio_clear_n_write,
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004220 "clear_n_write from receive_uuids",
4221 BM_LOCKED_TEST_ALLOWED);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004222 _drbd_uuid_set(device, UI_CURRENT, p_uuid[UI_CURRENT]);
4223 _drbd_uuid_set(device, UI_BITMAP, 0);
4224 _drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
Philipp Reisnerb411b362009-09-25 16:07:19 -07004225 CS_VERBOSE, NULL);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004226 drbd_md_sync(device);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01004227 updated_uuids = 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004228 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004229 put_ldev(device);
4230 } else if (device->state.disk < D_INCONSISTENT &&
4231 device->state.role == R_PRIMARY) {
Philipp Reisner18a50fa2010-06-21 14:14:15 +02004232 /* I am a diskless primary, the peer just created a new current UUID
4233 for me. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004234 updated_uuids = drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004235 }
4236
4237 /* Before we test for the disk state, we should wait until an eventually
4238 ongoing cluster wide state change is finished. That is important if
4239 we are primary and are detaching from our disk. We need to see the
4240 new disk state... */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004241 mutex_lock(device->state_mutex);
4242 mutex_unlock(device->state_mutex);
4243 if (device->state.conn >= C_CONNECTED && device->state.disk < D_INCONSISTENT)
4244 updated_uuids |= drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01004245
4246 if (updated_uuids)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004247 drbd_print_uuids(device, "receiver updated UUIDs to");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004248
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004249 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004250}
4251
4252/**
4253 * convert_state() - Converts the peer's view of the cluster state to our point of view
4254 * @ps: The state as seen by the peer.
4255 */
4256static union drbd_state convert_state(union drbd_state ps)
4257{
4258 union drbd_state ms;
4259
4260 static enum drbd_conns c_tab[] = {
Philipp Reisner369bea62011-07-06 23:04:44 +02004261 [C_WF_REPORT_PARAMS] = C_WF_REPORT_PARAMS,
Philipp Reisnerb411b362009-09-25 16:07:19 -07004262 [C_CONNECTED] = C_CONNECTED,
4263
4264 [C_STARTING_SYNC_S] = C_STARTING_SYNC_T,
4265 [C_STARTING_SYNC_T] = C_STARTING_SYNC_S,
4266 [C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */
4267 [C_VERIFY_S] = C_VERIFY_T,
4268 [C_MASK] = C_MASK,
4269 };
4270
4271 ms.i = ps.i;
4272
4273 ms.conn = c_tab[ps.conn];
4274 ms.peer = ps.role;
4275 ms.role = ps.peer;
4276 ms.pdsk = ps.disk;
4277 ms.disk = ps.pdsk;
4278 ms.peer_isp = (ps.aftr_isp | ps.user_isp);
4279
4280 return ms;
4281}
4282
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004283static int receive_req_state(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004284{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004285 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004286 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004287 struct p_req_state *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004288 union drbd_state mask, val;
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +01004289 enum drbd_state_rv rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004290
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004291 peer_device = conn_peer_device(connection, pi->vnr);
4292 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004293 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004294 device = peer_device->device;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004295
Philipp Reisnerb411b362009-09-25 16:07:19 -07004296 mask.i = be32_to_cpu(p->mask);
4297 val.i = be32_to_cpu(p->val);
4298
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004299 if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags) &&
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004300 mutex_is_locked(device->state_mutex)) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004301 drbd_send_sr_reply(peer_device, SS_CONCURRENT_ST_CHG);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004302 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004303 }
4304
4305 mask = convert_state(mask);
4306 val = convert_state(val);
4307
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004308 rv = drbd_change_state(device, CS_VERBOSE, mask, val);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004309 drbd_send_sr_reply(peer_device, rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004310
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004311 drbd_md_sync(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004312
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004313 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004314}
4315
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004316static int receive_req_conn_state(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004317{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004318 struct p_req_state *p = pi->data;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01004319 union drbd_state mask, val;
4320 enum drbd_state_rv rv;
4321
4322 mask.i = be32_to_cpu(p->mask);
4323 val.i = be32_to_cpu(p->val);
4324
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004325 if (test_bit(RESOLVE_CONFLICTS, &connection->flags) &&
4326 mutex_is_locked(&connection->cstate_mutex)) {
4327 conn_send_sr_reply(connection, SS_CONCURRENT_ST_CHG);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004328 return 0;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01004329 }
4330
4331 mask = convert_state(mask);
4332 val = convert_state(val);
4333
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004334 rv = conn_request_state(connection, mask, val, CS_VERBOSE | CS_LOCAL_ONLY | CS_IGN_OUTD_FAIL);
4335 conn_send_sr_reply(connection, rv);
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01004336
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004337 return 0;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01004338}
4339
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004340static int receive_state(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004341{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004342 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004343 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004344 struct p_state *p = pi->data;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004345 union drbd_state os, ns, peer_state;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004346 enum drbd_disk_state real_peer_disk;
Philipp Reisner65d922c2010-06-16 16:18:09 +02004347 enum chg_state_flags cs_flags;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004348 int rv;
4349
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004350 peer_device = conn_peer_device(connection, pi->vnr);
4351 if (!peer_device)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004352 return config_unknown_volume(connection, pi);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004353 device = peer_device->device;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004354
Philipp Reisnerb411b362009-09-25 16:07:19 -07004355 peer_state.i = be32_to_cpu(p->state);
4356
4357 real_peer_disk = peer_state.disk;
4358 if (peer_state.disk == D_NEGOTIATING) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004359 real_peer_disk = device->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004360 drbd_info(device, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004361 }
4362
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004363 spin_lock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004364 retry:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004365 os = ns = drbd_read_state(device);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004366 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004367
Philipp Reisner668700b2015-03-16 16:08:29 +01004368 /* If some other part of the code (ack_receiver thread, timeout)
Lars Ellenberg545752d2011-12-05 14:39:25 +01004369 * already decided to close the connection again,
4370 * we must not "re-establish" it here. */
4371 if (os.conn <= C_TEAR_DOWN)
Lars Ellenberg58ffa582012-07-26 14:09:49 +02004372 return -ECONNRESET;
Lars Ellenberg545752d2011-12-05 14:39:25 +01004373
Lars Ellenberg40424e42011-09-26 15:24:56 +02004374 /* If this is the "end of sync" confirmation, usually the peer disk
4375 * transitions from D_INCONSISTENT to D_UP_TO_DATE. For empty (0 bits
4376 * set) resync started in PausedSyncT, or if the timing of pause-/
4377 * unpause-sync events has been "just right", the peer disk may
4378 * transition from D_CONSISTENT to D_UP_TO_DATE as well.
4379 */
4380 if ((os.pdsk == D_INCONSISTENT || os.pdsk == D_CONSISTENT) &&
4381 real_peer_disk == D_UP_TO_DATE &&
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02004382 os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
4383 /* If we are (becoming) SyncSource, but peer is still in sync
4384 * preparation, ignore its uptodate-ness to avoid flapping, it
4385 * will change to inconsistent once the peer reaches active
4386 * syncing states.
4387 * It may have changed syncer-paused flags, however, so we
4388 * cannot ignore this completely. */
4389 if (peer_state.conn > C_CONNECTED &&
4390 peer_state.conn < C_SYNC_SOURCE)
4391 real_peer_disk = D_INCONSISTENT;
4392
4393 /* if peer_state changes to connected at the same time,
4394 * it explicitly notifies us that it finished resync.
4395 * Maybe we should finish it up, too? */
4396 else if (os.conn >= C_SYNC_SOURCE &&
4397 peer_state.conn == C_CONNECTED) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004398 if (drbd_bm_total_weight(device) <= device->rs_failed)
4399 drbd_resync_finished(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004400 return 0;
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02004401 }
4402 }
4403
Lars Ellenberg02b91b52012-06-28 18:26:52 +02004404 /* explicit verify finished notification, stop sector reached. */
4405 if (os.conn == C_VERIFY_T && os.disk == D_UP_TO_DATE &&
4406 peer_state.conn == C_CONNECTED && real_peer_disk == D_UP_TO_DATE) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004407 ov_out_of_sync_print(device);
4408 drbd_resync_finished(device);
Lars Ellenberg58ffa582012-07-26 14:09:49 +02004409 return 0;
Lars Ellenberg02b91b52012-06-28 18:26:52 +02004410 }
4411
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02004412 /* peer says his disk is inconsistent, while we think it is uptodate,
4413 * and this happens while the peer still thinks we have a sync going on,
4414 * but we think we are already done with the sync.
4415 * We ignore this to avoid flapping pdsk.
4416 * This should not happen, if the peer is a recent version of drbd. */
4417 if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT &&
4418 os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE)
4419 real_peer_disk = D_UP_TO_DATE;
4420
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004421 if (ns.conn == C_WF_REPORT_PARAMS)
4422 ns.conn = C_CONNECTED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004423
Philipp Reisner67531712010-10-27 12:21:30 +02004424 if (peer_state.conn == C_AHEAD)
4425 ns.conn = C_BEHIND;
4426
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004427 if (device->p_uuid && peer_state.disk >= D_NEGOTIATING &&
4428 get_ldev_if_state(device, D_NEGOTIATING)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004429 int cr; /* consider resync */
4430
4431 /* if we established a new connection */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004432 cr = (os.conn < C_CONNECTED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004433 /* if we had an established connection
4434 * and one of the nodes newly attaches a disk */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004435 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07004436 (peer_state.disk == D_NEGOTIATING ||
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004437 os.disk == D_NEGOTIATING));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004438 /* if we have both been inconsistent, and the peer has been
4439 * forced to be UpToDate with --overwrite-data */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004440 cr |= test_bit(CONSIDER_RESYNC, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004441 /* if we had been plain connected, and the admin requested to
4442 * start a sync by "invalidate" or "invalidate-remote" */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004443 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07004444 (peer_state.conn >= C_STARTING_SYNC_S &&
4445 peer_state.conn <= C_WF_BITMAP_T));
4446
4447 if (cr)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004448 ns.conn = drbd_sync_handshake(peer_device, peer_state.role, real_peer_disk);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004449
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004450 put_ldev(device);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004451 if (ns.conn == C_MASK) {
4452 ns.conn = C_CONNECTED;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004453 if (device->state.disk == D_NEGOTIATING) {
4454 drbd_force_state(device, NS(disk, D_FAILED));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004455 } else if (peer_state.disk == D_NEGOTIATING) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004456 drbd_err(device, "Disk attach process on the peer node was aborted.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004457 peer_state.disk = D_DISKLESS;
Lars Ellenberg580b9762010-02-26 23:15:23 +01004458 real_peer_disk = D_DISKLESS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004459 } else {
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004460 if (test_and_clear_bit(CONN_DRY_RUN, &peer_device->connection->flags))
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004461 return -EIO;
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02004462 D_ASSERT(device, os.conn == C_WF_REPORT_PARAMS);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004463 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004464 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004465 }
4466 }
4467 }
4468
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004469 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004470 if (os.i != drbd_read_state(device).i)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004471 goto retry;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004472 clear_bit(CONSIDER_RESYNC, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004473 ns.peer = peer_state.role;
4474 ns.pdsk = real_peer_disk;
4475 ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004476 if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004477 ns.disk = device->new_state_tmp.disk;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004478 cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004479 if (ns.pdsk == D_CONSISTENT && drbd_suspended(device) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
4480 test_bit(NEW_CUR_UUID, &device->flags)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004481 /* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this
Philipp Reisner481c6f52010-06-22 14:03:27 +02004482 for temporal network outages! */
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004483 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004484 drbd_err(device, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004485 tl_clear(peer_device->connection);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004486 drbd_uuid_new_current(device);
4487 clear_bit(NEW_CUR_UUID, &device->flags);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004488 conn_request_state(peer_device->connection, NS2(conn, C_PROTOCOL_ERROR, susp, 0), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004489 return -EIO;
Philipp Reisner481c6f52010-06-22 14:03:27 +02004490 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004491 rv = _drbd_set_state(device, ns, cs_flags, NULL);
4492 ns = drbd_read_state(device);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004493 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004494
4495 if (rv < SS_SUCCESS) {
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004496 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004497 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004498 }
4499
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004500 if (os.conn > C_WF_REPORT_PARAMS) {
4501 if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07004502 peer_state.disk != D_NEGOTIATING ) {
4503 /* we want resync, peer has not yet decided to sync... */
4504 /* Nowadays only used when forcing a node into primary role and
4505 setting its disk to UpToDate with that */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004506 drbd_send_uuids(peer_device);
4507 drbd_send_current_state(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004508 }
4509 }
4510
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004511 clear_bit(DISCARD_MY_DATA, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004512
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004513 drbd_md_sync(device); /* update connected indicator, la_size_sect, ... */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004514
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004515 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004516}
4517
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004518static int receive_sync_uuid(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004519{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004520 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004521 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004522 struct p_rs_uuid *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004523
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004524 peer_device = conn_peer_device(connection, pi->vnr);
4525 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004526 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004527 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004528
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004529 wait_event(device->misc_wait,
4530 device->state.conn == C_WF_SYNC_UUID ||
4531 device->state.conn == C_BEHIND ||
4532 device->state.conn < C_CONNECTED ||
4533 device->state.disk < D_NEGOTIATING);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004534
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02004535 /* D_ASSERT(device, device->state.conn == C_WF_SYNC_UUID ); */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004536
Philipp Reisnerb411b362009-09-25 16:07:19 -07004537 /* Here the _drbd_uuid_ functions are right, current should
4538 _not_ be rotated into the history */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004539 if (get_ldev_if_state(device, D_NEGOTIATING)) {
4540 _drbd_uuid_set(device, UI_CURRENT, be64_to_cpu(p->uuid));
4541 _drbd_uuid_set(device, UI_BITMAP, 0UL);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004542
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004543 drbd_print_uuids(device, "updated sync uuid");
4544 drbd_start_resync(device, C_SYNC_TARGET);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004545
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004546 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004547 } else
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004548 drbd_err(device, "Ignoring SyncUUID packet!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004549
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004550 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004551}
4552
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004553/**
4554 * receive_bitmap_plain
4555 *
4556 * Return 0 when done, 1 when another iteration is needed, and a negative error
4557 * code upon failure.
4558 */
4559static int
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004560receive_bitmap_plain(struct drbd_peer_device *peer_device, unsigned int size,
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004561 unsigned long *p, struct bm_xfer_ctx *c)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004562{
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004563 unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE -
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004564 drbd_header_size(peer_device->connection);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004565 unsigned int num_words = min_t(size_t, data_size / sizeof(*p),
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004566 c->bm_words - c->word_offset);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004567 unsigned int want = num_words * sizeof(*p);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004568 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004569
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004570 if (want != size) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004571 drbd_err(peer_device, "%s:want (%u) != size (%u)\n", __func__, want, size);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004572 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004573 }
4574 if (want == 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004575 return 0;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004576 err = drbd_recv_all(peer_device->connection, p, want);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004577 if (err)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004578 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004579
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004580 drbd_bm_merge_lel(peer_device->device, c->word_offset, num_words, p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004581
4582 c->word_offset += num_words;
4583 c->bit_offset = c->word_offset * BITS_PER_LONG;
4584 if (c->bit_offset > c->bm_bits)
4585 c->bit_offset = c->bm_bits;
4586
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004587 return 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004588}
4589
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004590static enum drbd_bitmap_code dcbp_get_code(struct p_compressed_bm *p)
4591{
4592 return (enum drbd_bitmap_code)(p->encoding & 0x0f);
4593}
4594
4595static int dcbp_get_start(struct p_compressed_bm *p)
4596{
4597 return (p->encoding & 0x80) != 0;
4598}
4599
4600static int dcbp_get_pad_bits(struct p_compressed_bm *p)
4601{
4602 return (p->encoding >> 4) & 0x7;
4603}
4604
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004605/**
4606 * recv_bm_rle_bits
4607 *
4608 * Return 0 when done, 1 when another iteration is needed, and a negative error
4609 * code upon failure.
4610 */
4611static int
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004612recv_bm_rle_bits(struct drbd_peer_device *peer_device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07004613 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01004614 struct bm_xfer_ctx *c,
4615 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004616{
4617 struct bitstream bs;
4618 u64 look_ahead;
4619 u64 rl;
4620 u64 tmp;
4621 unsigned long s = c->bit_offset;
4622 unsigned long e;
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004623 int toggle = dcbp_get_start(p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004624 int have;
4625 int bits;
4626
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004627 bitstream_init(&bs, p->code, len, dcbp_get_pad_bits(p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004628
4629 bits = bitstream_get_bits(&bs, &look_ahead, 64);
4630 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004631 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004632
4633 for (have = bits; have > 0; s += rl, toggle = !toggle) {
4634 bits = vli_decode_bits(&rl, look_ahead);
4635 if (bits <= 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004636 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004637
4638 if (toggle) {
4639 e = s + rl -1;
4640 if (e >= c->bm_bits) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004641 drbd_err(peer_device, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004642 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004643 }
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004644 _drbd_bm_set_bits(peer_device->device, s, e);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004645 }
4646
4647 if (have < bits) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004648 drbd_err(peer_device, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07004649 have, bits, look_ahead,
4650 (unsigned int)(bs.cur.b - p->code),
4651 (unsigned int)bs.buf_len);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004652 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004653 }
Lars Ellenbergd2da5b02013-10-23 10:59:18 +02004654 /* if we consumed all 64 bits, assign 0; >> 64 is "undefined"; */
4655 if (likely(bits < 64))
4656 look_ahead >>= bits;
4657 else
4658 look_ahead = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004659 have -= bits;
4660
4661 bits = bitstream_get_bits(&bs, &tmp, 64 - have);
4662 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004663 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004664 look_ahead |= tmp << have;
4665 have += bits;
4666 }
4667
4668 c->bit_offset = s;
4669 bm_xfer_ctx_bit_to_word_offset(c);
4670
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004671 return (s != c->bm_bits);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004672}
4673
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004674/**
4675 * decode_bitmap_c
4676 *
4677 * Return 0 when done, 1 when another iteration is needed, and a negative error
4678 * code upon failure.
4679 */
4680static int
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004681decode_bitmap_c(struct drbd_peer_device *peer_device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07004682 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01004683 struct bm_xfer_ctx *c,
4684 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004685{
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004686 if (dcbp_get_code(p) == RLE_VLI_Bits)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004687 return recv_bm_rle_bits(peer_device, p, c, len - sizeof(*p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004688
4689 /* other variants had been implemented for evaluation,
4690 * but have been dropped as this one turned out to be "best"
4691 * during all our tests. */
4692
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004693 drbd_err(peer_device, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
4694 conn_request_state(peer_device->connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004695 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004696}
4697
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004698void INFO_bm_xfer_stats(struct drbd_device *device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07004699 const char *direction, struct bm_xfer_ctx *c)
4700{
4701 /* what would it take to transfer it "plaintext" */
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02004702 unsigned int header_size = drbd_header_size(first_peer_device(device)->connection);
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004703 unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE - header_size;
4704 unsigned int plain =
4705 header_size * (DIV_ROUND_UP(c->bm_words, data_size) + 1) +
4706 c->bm_words * sizeof(unsigned long);
4707 unsigned int total = c->bytes[0] + c->bytes[1];
4708 unsigned int r;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004709
4710 /* total can not be zero. but just in case: */
4711 if (total == 0)
4712 return;
4713
4714 /* don't report if not compressed */
4715 if (total >= plain)
4716 return;
4717
4718 /* total < plain. check for overflow, still */
4719 r = (total > UINT_MAX/1000) ? (total / (plain/1000))
4720 : (1000 * total / plain);
4721
4722 if (r > 1000)
4723 r = 1000;
4724
4725 r = 1000 - r;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004726 drbd_info(device, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
Philipp Reisnerb411b362009-09-25 16:07:19 -07004727 "total %u; compression: %u.%u%%\n",
4728 direction,
4729 c->bytes[1], c->packets[1],
4730 c->bytes[0], c->packets[0],
4731 total, r/10, r % 10);
4732}
4733
4734/* Since we are processing the bitfield from lower addresses to higher,
4735 it does not matter if the process it in 32 bit chunks or 64 bit
4736 chunks as long as it is little endian. (Understand it as byte stream,
4737 beginning with the lowest byte...) If we would use big endian
4738 we would need to process it from the highest address to the lowest,
4739 in order to be agnostic to the 32 vs 64 bits issue.
4740
4741 returns 0 on failure, 1 if we successfully received it. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004742static int receive_bitmap(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004743{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004744 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004745 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004746 struct bm_xfer_ctx c;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004747 int err;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004748
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004749 peer_device = conn_peer_device(connection, pi->vnr);
4750 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004751 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004752 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004753
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004754 drbd_bm_lock(device, "receive bitmap", BM_LOCKED_SET_ALLOWED);
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004755 /* you are supposed to send additional out-of-sync information
4756 * if you actually set bits during this phase */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004757
Philipp Reisnerb411b362009-09-25 16:07:19 -07004758 c = (struct bm_xfer_ctx) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004759 .bm_bits = drbd_bm_bits(device),
4760 .bm_words = drbd_bm_words(device),
Philipp Reisnerb411b362009-09-25 16:07:19 -07004761 };
4762
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004763 for(;;) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004764 if (pi->cmd == P_BITMAP)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004765 err = receive_bitmap_plain(peer_device, pi->size, pi->data, &c);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004766 else if (pi->cmd == P_COMPRESSED_BITMAP) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004767 /* MAYBE: sanity check that we speak proto >= 90,
4768 * and the feature is enabled! */
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004769 struct p_compressed_bm *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004770
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004771 if (pi->size > DRBD_SOCKET_BUFFER_SIZE - drbd_header_size(connection)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004772 drbd_err(device, "ReportCBitmap packet too large\n");
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004773 err = -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004774 goto out;
4775 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004776 if (pi->size <= sizeof(*p)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004777 drbd_err(device, "ReportCBitmap packet too small (l:%u)\n", pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004778 err = -EIO;
Andreas Gruenbacher78fcbda2010-12-10 22:18:27 +01004779 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004780 }
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004781 err = drbd_recv_all(peer_device->connection, p, pi->size);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004782 if (err)
4783 goto out;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004784 err = decode_bitmap_c(peer_device, p, &c, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004785 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004786 drbd_warn(device, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", pi->cmd);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004787 err = -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004788 goto out;
4789 }
4790
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004791 c.packets[pi->cmd == P_BITMAP]++;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004792 c.bytes[pi->cmd == P_BITMAP] += drbd_header_size(connection) + pi->size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004793
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004794 if (err <= 0) {
4795 if (err < 0)
4796 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004797 break;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004798 }
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004799 err = drbd_recv_header(peer_device->connection, pi);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004800 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004801 goto out;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004802 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004803
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004804 INFO_bm_xfer_stats(device, "receive", &c);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004805
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004806 if (device->state.conn == C_WF_BITMAP_T) {
Andreas Gruenbacherde1f8e42010-12-10 21:04:00 +01004807 enum drbd_state_rv rv;
4808
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004809 err = drbd_send_bitmap(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004810 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004811 goto out;
4812 /* Omit CS_ORDERED with this state transition to avoid deadlocks. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004813 rv = _drbd_request_state(device, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02004814 D_ASSERT(device, rv == SS_SUCCESS);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004815 } else if (device->state.conn != C_WF_BITMAP_S) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004816 /* admin may have requested C_DISCONNECTING,
4817 * other threads may have noticed network errors */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004818 drbd_info(device, "unexpected cstate (%s) in receive_bitmap\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004819 drbd_conn_str(device->state.conn));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004820 }
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004821 err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004822
Philipp Reisnerb411b362009-09-25 16:07:19 -07004823 out:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004824 drbd_bm_unlock(device);
4825 if (!err && device->state.conn == C_WF_BITMAP_S)
4826 drbd_start_resync(device, C_SYNC_SOURCE);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004827 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004828}
4829
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004830static int receive_skip(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004831{
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004832 drbd_warn(connection, "skipping unknown optional packet type %d, l: %d!\n",
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004833 pi->cmd, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004834
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004835 return ignore_remaining_packet(connection, pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004836}
4837
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004838static int receive_UnplugRemote(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004839{
Philipp Reisnerb411b362009-09-25 16:07:19 -07004840 /* Make sure we've acked all the TCP data associated
4841 * with the data requests being unplugged */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004842 drbd_tcp_quickack(connection->data.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004843
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004844 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004845}
4846
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004847static int receive_out_of_sync(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisner73a01a12010-10-27 14:33:00 +02004848{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004849 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004850 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004851 struct p_block_desc *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004852
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004853 peer_device = conn_peer_device(connection, pi->vnr);
4854 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004855 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004856 device = peer_device->device;
Philipp Reisner73a01a12010-10-27 14:33:00 +02004857
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004858 switch (device->state.conn) {
Lars Ellenbergf735e3632010-12-17 21:06:18 +01004859 case C_WF_SYNC_UUID:
4860 case C_WF_BITMAP_T:
4861 case C_BEHIND:
4862 break;
4863 default:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004864 drbd_err(device, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004865 drbd_conn_str(device->state.conn));
Lars Ellenbergf735e3632010-12-17 21:06:18 +01004866 }
4867
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004868 drbd_set_out_of_sync(device, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
Philipp Reisner73a01a12010-10-27 14:33:00 +02004869
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004870 return 0;
Philipp Reisner73a01a12010-10-27 14:33:00 +02004871}
4872
Philipp Reisner700ca8c2016-06-14 00:26:13 +02004873static int receive_rs_deallocated(struct drbd_connection *connection, struct packet_info *pi)
4874{
4875 struct drbd_peer_device *peer_device;
4876 struct p_block_desc *p = pi->data;
4877 struct drbd_device *device;
4878 sector_t sector;
4879 int size, err = 0;
4880
4881 peer_device = conn_peer_device(connection, pi->vnr);
4882 if (!peer_device)
4883 return -EIO;
4884 device = peer_device->device;
4885
4886 sector = be64_to_cpu(p->sector);
4887 size = be32_to_cpu(p->blksize);
4888
4889 dec_rs_pending(device);
4890
4891 if (get_ldev(device)) {
4892 struct drbd_peer_request *peer_req;
4893 const int op = REQ_OP_DISCARD;
4894
4895 peer_req = drbd_alloc_peer_req(peer_device, ID_SYNCER, sector,
Lars Ellenberg9104d312016-06-14 00:26:31 +02004896 size, 0, GFP_NOIO);
Philipp Reisner700ca8c2016-06-14 00:26:13 +02004897 if (!peer_req) {
4898 put_ldev(device);
4899 return -ENOMEM;
4900 }
4901
4902 peer_req->w.cb = e_end_resync_block;
4903 peer_req->submit_jif = jiffies;
4904 peer_req->flags |= EE_IS_TRIM;
4905
4906 spin_lock_irq(&device->resource->req_lock);
4907 list_add_tail(&peer_req->w.list, &device->sync_ee);
4908 spin_unlock_irq(&device->resource->req_lock);
4909
4910 atomic_add(pi->size >> 9, &device->rs_sect_ev);
4911 err = drbd_submit_peer_request(device, peer_req, op, 0, DRBD_FAULT_RS_WR);
4912
4913 if (err) {
4914 spin_lock_irq(&device->resource->req_lock);
4915 list_del(&peer_req->w.list);
4916 spin_unlock_irq(&device->resource->req_lock);
4917
4918 drbd_free_peer_req(device, peer_req);
4919 put_ldev(device);
4920 err = 0;
4921 goto fail;
4922 }
4923
4924 inc_unacked(device);
4925
4926 /* No put_ldev() here. Gets called in drbd_endio_write_sec_final(),
4927 as well as drbd_rs_complete_io() */
4928 } else {
4929 fail:
4930 drbd_rs_complete_io(device, sector);
4931 drbd_send_ack_ex(peer_device, P_NEG_ACK, sector, size, ID_SYNCER);
4932 }
4933
4934 atomic_add(size >> 9, &device->rs_sect_in);
4935
4936 return err;
4937}
4938
Philipp Reisner02918be2010-08-20 14:35:10 +02004939struct data_cmd {
4940 int expect_payload;
Lars Ellenberg9104d312016-06-14 00:26:31 +02004941 unsigned int pkt_size;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004942 int (*fn)(struct drbd_connection *, struct packet_info *);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004943};
4944
Philipp Reisner02918be2010-08-20 14:35:10 +02004945static struct data_cmd drbd_cmd_handler[] = {
4946 [P_DATA] = { 1, sizeof(struct p_data), receive_Data },
4947 [P_DATA_REPLY] = { 1, sizeof(struct p_data), receive_DataReply },
4948 [P_RS_DATA_REPLY] = { 1, sizeof(struct p_data), receive_RSDataReply } ,
4949 [P_BARRIER] = { 0, sizeof(struct p_barrier), receive_Barrier } ,
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004950 [P_BITMAP] = { 1, 0, receive_bitmap } ,
4951 [P_COMPRESSED_BITMAP] = { 1, 0, receive_bitmap } ,
4952 [P_UNPLUG_REMOTE] = { 0, 0, receive_UnplugRemote },
Philipp Reisner02918be2010-08-20 14:35:10 +02004953 [P_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
4954 [P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004955 [P_SYNC_PARAM] = { 1, 0, receive_SyncParam },
4956 [P_SYNC_PARAM89] = { 1, 0, receive_SyncParam },
Philipp Reisner02918be2010-08-20 14:35:10 +02004957 [P_PROTOCOL] = { 1, sizeof(struct p_protocol), receive_protocol },
4958 [P_UUIDS] = { 0, sizeof(struct p_uuids), receive_uuids },
4959 [P_SIZES] = { 0, sizeof(struct p_sizes), receive_sizes },
4960 [P_STATE] = { 0, sizeof(struct p_state), receive_state },
4961 [P_STATE_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_state },
4962 [P_SYNC_UUID] = { 0, sizeof(struct p_rs_uuid), receive_sync_uuid },
4963 [P_OV_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
4964 [P_OV_REPLY] = { 1, sizeof(struct p_block_req), receive_DataRequest },
4965 [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest },
Philipp Reisner700ca8c2016-06-14 00:26:13 +02004966 [P_RS_THIN_REQ] = { 0, sizeof(struct p_block_req), receive_DataRequest },
Philipp Reisner02918be2010-08-20 14:35:10 +02004967 [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip },
Philipp Reisner73a01a12010-10-27 14:33:00 +02004968 [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004969 [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_conn_state },
Philipp Reisner036b17e2011-05-16 17:38:11 +02004970 [P_PROTOCOL_UPDATE] = { 1, sizeof(struct p_protocol), receive_protocol },
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02004971 [P_TRIM] = { 0, sizeof(struct p_trim), receive_Data },
Philipp Reisner700ca8c2016-06-14 00:26:13 +02004972 [P_RS_DEALLOCATED] = { 0, sizeof(struct p_block_desc), receive_rs_deallocated },
Lars Ellenberg9104d312016-06-14 00:26:31 +02004973 [P_WSAME] = { 1, sizeof(struct p_wsame), receive_Data },
Philipp Reisner02918be2010-08-20 14:35:10 +02004974};
4975
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004976static void drbdd(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004977{
Philipp Reisner77351055b2011-02-07 17:24:26 +01004978 struct packet_info pi;
Philipp Reisner02918be2010-08-20 14:35:10 +02004979 size_t shs; /* sub header size */
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004980 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004981
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004982 while (get_t_state(&connection->receiver) == RUNNING) {
Lars Ellenberg9104d312016-06-14 00:26:31 +02004983 struct data_cmd const *cmd;
Andreas Gruenbacherdeebe192011-03-25 00:01:04 +01004984
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004985 drbd_thread_current_set_cpu(&connection->receiver);
Lars Ellenberg944410e2014-05-06 15:02:05 +02004986 update_receiver_timing_details(connection, drbd_recv_header);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004987 if (drbd_recv_header(connection, &pi))
Philipp Reisner02918be2010-08-20 14:35:10 +02004988 goto err_out;
4989
Andreas Gruenbacherdeebe192011-03-25 00:01:04 +01004990 cmd = &drbd_cmd_handler[pi.cmd];
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004991 if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) || !cmd->fn)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004992 drbd_err(connection, "Unexpected data packet %s (0x%04x)",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004993 cmdname(pi.cmd), pi.cmd);
Philipp Reisner02918be2010-08-20 14:35:10 +02004994 goto err_out;
Lars Ellenberg0b33a912009-11-16 15:58:04 +01004995 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004996
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004997 shs = cmd->pkt_size;
Lars Ellenberg9104d312016-06-14 00:26:31 +02004998 if (pi.cmd == P_SIZES && connection->agreed_features & DRBD_FF_WSAME)
4999 shs += sizeof(struct o_qlim);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005000 if (pi.size > shs && !cmd->expect_payload) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005001 drbd_err(connection, "No payload expected %s l:%d\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02005002 cmdname(pi.cmd), pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02005003 goto err_out;
5004 }
Lars Ellenberg9104d312016-06-14 00:26:31 +02005005 if (pi.size < shs) {
5006 drbd_err(connection, "%s: unexpected packet size, expected:%d received:%d\n",
5007 cmdname(pi.cmd), (int)shs, pi.size);
5008 goto err_out;
5009 }
Philipp Reisner02918be2010-08-20 14:35:10 +02005010
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02005011 if (shs) {
Lars Ellenberg944410e2014-05-06 15:02:05 +02005012 update_receiver_timing_details(connection, drbd_recv_all_warn);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005013 err = drbd_recv_all_warn(connection, pi.data, shs);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01005014 if (err)
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02005015 goto err_out;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01005016 pi.size -= shs;
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02005017 }
5018
Lars Ellenberg944410e2014-05-06 15:02:05 +02005019 update_receiver_timing_details(connection, cmd->fn);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005020 err = cmd->fn(connection, &pi);
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01005021 if (err) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005022 drbd_err(connection, "error receiving %s, e: %d l: %d!\n",
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02005023 cmdname(pi.cmd), err, pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02005024 goto err_out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005025 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005026 }
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01005027 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005028
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01005029 err_out:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005030 conn_request_state(connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005031}
5032
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005033static void conn_disconnect(struct drbd_connection *connection)
Philipp Reisnerf70b35112010-06-24 14:34:40 +02005034{
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005035 struct drbd_peer_device *peer_device;
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01005036 enum drbd_conns oc;
Philipp Reisner376694a2011-11-07 10:54:28 +01005037 int vnr;
Philipp Reisnerf70b35112010-06-24 14:34:40 +02005038
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005039 if (connection->cstate == C_STANDALONE)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005040 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005041
Lars Ellenberg545752d2011-12-05 14:39:25 +01005042 /* We are about to start the cleanup after connection loss.
5043 * Make sure drbd_make_request knows about that.
5044 * Usually we should be in some network failure state already,
5045 * but just in case we are not, we fix it up here.
5046 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005047 conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
Lars Ellenberg545752d2011-12-05 14:39:25 +01005048
Philipp Reisner668700b2015-03-16 16:08:29 +01005049 /* ack_receiver does not clean up anything. it must not interfere, either */
Philipp Reisner1c03e522015-03-16 15:01:00 +01005050 drbd_thread_stop(&connection->ack_receiver);
Philipp Reisner668700b2015-03-16 16:08:29 +01005051 if (connection->ack_sender) {
5052 destroy_workqueue(connection->ack_sender);
5053 connection->ack_sender = NULL;
5054 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005055 drbd_free_sock(connection);
Philipp Reisner360cc742011-02-08 14:29:53 +01005056
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005057 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005058 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5059 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005060 kref_get(&device->kref);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005061 rcu_read_unlock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005062 drbd_disconnected(peer_device);
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005063 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005064 rcu_read_lock();
5065 }
5066 rcu_read_unlock();
5067
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005068 if (!list_empty(&connection->current_epoch->list))
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005069 drbd_err(connection, "ASSERTION FAILED: connection->current_epoch->list not empty\n");
Philipp Reisner12038a32011-11-09 19:18:00 +01005070 /* ok, no more ee's on the fly, it is safe to reset the epoch_size */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005071 atomic_set(&connection->current_epoch->epoch_size, 0);
5072 connection->send.seen_any_write_yet = false;
Philipp Reisner12038a32011-11-09 19:18:00 +01005073
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005074 drbd_info(connection, "Connection closed\n");
Philipp Reisner360cc742011-02-08 14:29:53 +01005075
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005076 if (conn_highest_role(connection) == R_PRIMARY && conn_highest_pdsk(connection) >= D_UNKNOWN)
5077 conn_try_outdate_peer_async(connection);
Philipp Reisnercb703452011-03-24 11:03:07 +01005078
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005079 spin_lock_irq(&connection->resource->req_lock);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005080 oc = connection->cstate;
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01005081 if (oc >= C_UNCONNECTED)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005082 _conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE);
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01005083
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005084 spin_unlock_irq(&connection->resource->req_lock);
Philipp Reisner360cc742011-02-08 14:29:53 +01005085
Lars Ellenbergf3dfa402011-05-02 10:45:05 +02005086 if (oc == C_DISCONNECTING)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005087 conn_request_state(connection, NS(conn, C_STANDALONE), CS_VERBOSE | CS_HARD);
Philipp Reisner360cc742011-02-08 14:29:53 +01005088}
5089
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005090static int drbd_disconnected(struct drbd_peer_device *peer_device)
Philipp Reisner360cc742011-02-08 14:29:53 +01005091{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005092 struct drbd_device *device = peer_device->device;
Philipp Reisner360cc742011-02-08 14:29:53 +01005093 unsigned int i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005094
Philipp Reisner85719572010-07-21 10:20:17 +02005095 /* wait for current activity to cease. */
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005096 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005097 _drbd_wait_ee_list_empty(device, &device->active_ee);
5098 _drbd_wait_ee_list_empty(device, &device->sync_ee);
5099 _drbd_wait_ee_list_empty(device, &device->read_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005100 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005101
5102 /* We do not have data structures that would allow us to
5103 * get the rs_pending_cnt down to 0 again.
5104 * * On C_SYNC_TARGET we do not have any data structures describing
5105 * the pending RSDataRequest's we have sent.
5106 * * On C_SYNC_SOURCE there is no data structure that tracks
5107 * the P_RS_DATA_REPLY blocks that we sent to the SyncTarget.
5108 * And no, it is not the sum of the reference counts in the
5109 * resync_LRU. The resync_LRU tracks the whole operation including
5110 * the disk-IO, while the rs_pending_cnt only tracks the blocks
5111 * on the fly. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005112 drbd_rs_cancel_all(device);
5113 device->rs_total = 0;
5114 device->rs_failed = 0;
5115 atomic_set(&device->rs_pending_cnt, 0);
5116 wake_up(&device->misc_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005117
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005118 del_timer_sync(&device->resync_timer);
5119 resync_timer_fn((unsigned long)device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005120
Philipp Reisnerb411b362009-09-25 16:07:19 -07005121 /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
5122 * w_make_resync_request etc. which may still be on the worker queue
5123 * to be "canceled" */
Andreas Gruenbacherb5043c52011-07-28 15:56:02 +02005124 drbd_flush_workqueue(&peer_device->connection->sender_work);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005125
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005126 drbd_finish_peer_reqs(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005127
Philipp Reisnerd10b4ea2011-11-30 23:25:36 +01005128 /* This second workqueue flush is necessary, since drbd_finish_peer_reqs()
5129 might have issued a work again. The one before drbd_finish_peer_reqs() is
5130 necessary to reclain net_ee in drbd_finish_peer_reqs(). */
Andreas Gruenbacherb5043c52011-07-28 15:56:02 +02005131 drbd_flush_workqueue(&peer_device->connection->sender_work);
Philipp Reisnerd10b4ea2011-11-30 23:25:36 +01005132
Lars Ellenberg08332d72012-08-17 15:09:13 +02005133 /* need to do it again, drbd_finish_peer_reqs() may have populated it
5134 * again via drbd_try_clear_on_disk_bm(). */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005135 drbd_rs_cancel_all(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005136
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005137 kfree(device->p_uuid);
5138 device->p_uuid = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005139
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005140 if (!drbd_suspended(device))
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005141 tl_clear(peer_device->connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005142
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005143 drbd_md_sync(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005144
Lars Ellenbergbe115b62016-06-14 00:26:11 +02005145 if (get_ldev(device)) {
5146 drbd_bitmap_io(device, &drbd_bm_write_copy_pages,
5147 "write from disconnected", BM_LOCKED_CHANGE_ALLOWED);
5148 put_ldev(device);
5149 }
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01005150
Philipp Reisnerb411b362009-09-25 16:07:19 -07005151 /* tcp_close and release of sendpage pages can be deferred. I don't
5152 * want to use SO_LINGER, because apparently it can be deferred for
5153 * more than 20 seconds (longest time I checked).
5154 *
5155 * Actually we don't care for exactly when the network stack does its
5156 * put_page(), but release our reference on these pages right here.
5157 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005158 i = drbd_free_peer_reqs(device, &device->net_ee);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005159 if (i)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02005160 drbd_info(device, "net_ee not empty, killed %u entries\n", i);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005161 i = atomic_read(&device->pp_in_use_by_net);
Lars Ellenberg435f0742010-09-06 12:30:25 +02005162 if (i)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02005163 drbd_info(device, "pp_in_use_by_net = %d, expected 0\n", i);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005164 i = atomic_read(&device->pp_in_use);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005165 if (i)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02005166 drbd_info(device, "pp_in_use = %d, expected 0\n", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005167
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02005168 D_ASSERT(device, list_empty(&device->read_ee));
5169 D_ASSERT(device, list_empty(&device->active_ee));
5170 D_ASSERT(device, list_empty(&device->sync_ee));
5171 D_ASSERT(device, list_empty(&device->done_ee));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005172
Philipp Reisner360cc742011-02-08 14:29:53 +01005173 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005174}
5175
5176/*
5177 * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version
5178 * we can agree on is stored in agreed_pro_version.
5179 *
5180 * feature flags and the reserved array should be enough room for future
5181 * enhancements of the handshake protocol, and possible plugins...
5182 *
5183 * for now, they are expected to be zero, but ignored.
5184 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005185static int drbd_send_features(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005186{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02005187 struct drbd_socket *sock;
5188 struct p_connection_features *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005189
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005190 sock = &connection->data;
5191 p = conn_prepare_command(connection, sock);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02005192 if (!p)
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01005193 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005194 memset(p, 0, sizeof(*p));
5195 p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
5196 p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
Lars Ellenberg20c68fd2014-04-28 18:43:25 +02005197 p->feature_flags = cpu_to_be32(PRO_FEATURES);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005198 return conn_send_command(connection, sock, P_CONNECTION_FEATURES, sizeof(*p), NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005199}
5200
5201/*
5202 * return values:
5203 * 1 yes, we have a valid connection
5204 * 0 oops, did not work out, please try again
5205 * -1 peer talks different language,
5206 * no point in trying again, please go standalone.
5207 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005208static int drbd_do_features(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005209{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005210 /* ASSERT current == connection->receiver ... */
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005211 struct p_connection_features *p;
5212 const int expect = sizeof(struct p_connection_features);
Philipp Reisner77351055b2011-02-07 17:24:26 +01005213 struct packet_info pi;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01005214 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005215
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005216 err = drbd_send_features(connection);
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01005217 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005218 return 0;
5219
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005220 err = drbd_recv_header(connection, &pi);
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01005221 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005222 return 0;
5223
Andreas Gruenbacher60381782011-03-28 17:05:50 +02005224 if (pi.cmd != P_CONNECTION_FEATURES) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005225 drbd_err(connection, "expected ConnectionFeatures packet, received: %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02005226 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005227 return -1;
5228 }
5229
Philipp Reisner77351055b2011-02-07 17:24:26 +01005230 if (pi.size != expect) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005231 drbd_err(connection, "expected ConnectionFeatures length: %u, received: %u\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01005232 expect, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005233 return -1;
5234 }
5235
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005236 p = pi.data;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005237 err = drbd_recv_all_warn(connection, p, expect);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01005238 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005239 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005240
Philipp Reisnerb411b362009-09-25 16:07:19 -07005241 p->protocol_min = be32_to_cpu(p->protocol_min);
5242 p->protocol_max = be32_to_cpu(p->protocol_max);
5243 if (p->protocol_max == 0)
5244 p->protocol_max = p->protocol_min;
5245
5246 if (PRO_VERSION_MAX < p->protocol_min ||
5247 PRO_VERSION_MIN > p->protocol_max)
5248 goto incompat;
5249
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005250 connection->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
Lars Ellenberg20c68fd2014-04-28 18:43:25 +02005251 connection->agreed_features = PRO_FEATURES & be32_to_cpu(p->feature_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005252
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005253 drbd_info(connection, "Handshake successful: "
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005254 "Agreed network protocol version %d\n", connection->agreed_pro_version);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005255
Lars Ellenberg9104d312016-06-14 00:26:31 +02005256 drbd_info(connection, "Feature flags enabled on protocol level: 0x%x%s%s%s.\n",
5257 connection->agreed_features,
5258 connection->agreed_features & DRBD_FF_TRIM ? " TRIM" : "",
5259 connection->agreed_features & DRBD_FF_THIN_RESYNC ? " THIN_RESYNC" : "",
5260 connection->agreed_features & DRBD_FF_WSAME ? " WRITE_SAME" :
5261 connection->agreed_features ? "" : " none");
Philipp Reisner92d94ae2016-06-14 00:26:15 +02005262
Philipp Reisnerb411b362009-09-25 16:07:19 -07005263 return 1;
5264
5265 incompat:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005266 drbd_err(connection, "incompatible DRBD dialects: "
Philipp Reisnerb411b362009-09-25 16:07:19 -07005267 "I support %d-%d, peer supports %d-%d\n",
5268 PRO_VERSION_MIN, PRO_VERSION_MAX,
5269 p->protocol_min, p->protocol_max);
5270 return -1;
5271}
5272
5273#if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005274static int drbd_do_auth(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005275{
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005276 drbd_err(connection, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
5277 drbd_err(connection, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01005278 return -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005279}
5280#else
5281#define CHALLENGE_LEN 64
Johannes Thomab10d96c2010-01-07 16:02:50 +01005282
5283/* Return value:
5284 1 - auth succeeded,
5285 0 - failed, try again (network error),
5286 -1 - auth failed, don't try again.
5287*/
5288
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005289static int drbd_do_auth(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005290{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02005291 struct drbd_socket *sock;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005292 char my_challenge[CHALLENGE_LEN]; /* 64 Bytes... */
Philipp Reisnerb411b362009-09-25 16:07:19 -07005293 char *response = NULL;
5294 char *right_response = NULL;
5295 char *peers_ch = NULL;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005296 unsigned int key_len;
5297 char secret[SHARED_SECRET_MAX]; /* 64 byte */
Philipp Reisnerb411b362009-09-25 16:07:19 -07005298 unsigned int resp_size;
Herbert Xu9534d672016-01-24 21:19:21 +08005299 SHASH_DESC_ON_STACK(desc, connection->cram_hmac_tfm);
Philipp Reisner77351055b2011-02-07 17:24:26 +01005300 struct packet_info pi;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005301 struct net_conf *nc;
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01005302 int err, rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005303
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02005304 /* FIXME: Put the challenge/response into the preallocated socket buffer. */
5305
Philipp Reisner44ed1672011-04-19 17:10:19 +02005306 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005307 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +02005308 key_len = strlen(nc->shared_secret);
5309 memcpy(secret, nc->shared_secret, key_len);
5310 rcu_read_unlock();
5311
Herbert Xu9534d672016-01-24 21:19:21 +08005312 desc->tfm = connection->cram_hmac_tfm;
5313 desc->flags = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005314
Herbert Xu9534d672016-01-24 21:19:21 +08005315 rv = crypto_shash_setkey(connection->cram_hmac_tfm, (u8 *)secret, key_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005316 if (rv) {
Herbert Xu9534d672016-01-24 21:19:21 +08005317 drbd_err(connection, "crypto_shash_setkey() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01005318 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005319 goto fail;
5320 }
5321
5322 get_random_bytes(my_challenge, CHALLENGE_LEN);
5323
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005324 sock = &connection->data;
5325 if (!conn_prepare_command(connection, sock)) {
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02005326 rv = 0;
5327 goto fail;
5328 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005329 rv = !conn_send_command(connection, sock, P_AUTH_CHALLENGE, 0,
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02005330 my_challenge, CHALLENGE_LEN);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005331 if (!rv)
5332 goto fail;
5333
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005334 err = drbd_recv_header(connection, &pi);
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01005335 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07005336 rv = 0;
5337 goto fail;
5338 }
5339
Philipp Reisner77351055b2011-02-07 17:24:26 +01005340 if (pi.cmd != P_AUTH_CHALLENGE) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005341 drbd_err(connection, "expected AuthChallenge packet, received: %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02005342 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005343 rv = 0;
5344 goto fail;
5345 }
5346
Philipp Reisner77351055b2011-02-07 17:24:26 +01005347 if (pi.size > CHALLENGE_LEN * 2) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005348 drbd_err(connection, "expected AuthChallenge payload too big.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01005349 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005350 goto fail;
5351 }
5352
Philipp Reisner67cca282014-04-28 18:43:30 +02005353 if (pi.size < CHALLENGE_LEN) {
5354 drbd_err(connection, "AuthChallenge payload too small.\n");
5355 rv = -1;
5356 goto fail;
5357 }
5358
Philipp Reisner77351055b2011-02-07 17:24:26 +01005359 peers_ch = kmalloc(pi.size, GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005360 if (peers_ch == NULL) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005361 drbd_err(connection, "kmalloc of peers_ch failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01005362 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005363 goto fail;
5364 }
5365
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005366 err = drbd_recv_all_warn(connection, peers_ch, pi.size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01005367 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07005368 rv = 0;
5369 goto fail;
5370 }
5371
Philipp Reisner67cca282014-04-28 18:43:30 +02005372 if (!memcmp(my_challenge, peers_ch, CHALLENGE_LEN)) {
5373 drbd_err(connection, "Peer presented the same challenge!\n");
5374 rv = -1;
5375 goto fail;
5376 }
5377
Herbert Xu9534d672016-01-24 21:19:21 +08005378 resp_size = crypto_shash_digestsize(connection->cram_hmac_tfm);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005379 response = kmalloc(resp_size, GFP_NOIO);
5380 if (response == NULL) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005381 drbd_err(connection, "kmalloc of response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01005382 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005383 goto fail;
5384 }
5385
Herbert Xu9534d672016-01-24 21:19:21 +08005386 rv = crypto_shash_digest(desc, peers_ch, pi.size, response);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005387 if (rv) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005388 drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01005389 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005390 goto fail;
5391 }
5392
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005393 if (!conn_prepare_command(connection, sock)) {
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02005394 rv = 0;
5395 goto fail;
5396 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005397 rv = !conn_send_command(connection, sock, P_AUTH_RESPONSE, 0,
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02005398 response, resp_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005399 if (!rv)
5400 goto fail;
5401
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005402 err = drbd_recv_header(connection, &pi);
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01005403 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07005404 rv = 0;
5405 goto fail;
5406 }
5407
Philipp Reisner77351055b2011-02-07 17:24:26 +01005408 if (pi.cmd != P_AUTH_RESPONSE) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005409 drbd_err(connection, "expected AuthResponse packet, received: %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02005410 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005411 rv = 0;
5412 goto fail;
5413 }
5414
Philipp Reisner77351055b2011-02-07 17:24:26 +01005415 if (pi.size != resp_size) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005416 drbd_err(connection, "expected AuthResponse payload of wrong size\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005417 rv = 0;
5418 goto fail;
5419 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005420
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005421 err = drbd_recv_all_warn(connection, response , resp_size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01005422 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07005423 rv = 0;
5424 goto fail;
5425 }
5426
5427 right_response = kmalloc(resp_size, GFP_NOIO);
Julia Lawall2d1ee872009-12-27 22:27:11 +01005428 if (right_response == NULL) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005429 drbd_err(connection, "kmalloc of right_response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01005430 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005431 goto fail;
5432 }
5433
Herbert Xu9534d672016-01-24 21:19:21 +08005434 rv = crypto_shash_digest(desc, my_challenge, CHALLENGE_LEN,
5435 right_response);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005436 if (rv) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005437 drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01005438 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005439 goto fail;
5440 }
5441
5442 rv = !memcmp(response, right_response, resp_size);
5443
5444 if (rv)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005445 drbd_info(connection, "Peer authenticated using %d bytes HMAC\n",
Philipp Reisner44ed1672011-04-19 17:10:19 +02005446 resp_size);
Johannes Thomab10d96c2010-01-07 16:02:50 +01005447 else
5448 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005449
5450 fail:
5451 kfree(peers_ch);
5452 kfree(response);
5453 kfree(right_response);
Herbert Xu9534d672016-01-24 21:19:21 +08005454 shash_desc_zero(desc);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005455
5456 return rv;
5457}
5458#endif
5459
Andreas Gruenbacher8fe60552011-07-22 11:04:36 +02005460int drbd_receiver(struct drbd_thread *thi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005461{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005462 struct drbd_connection *connection = thi->connection;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005463 int h;
5464
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005465 drbd_info(connection, "receiver (re)started\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005466
5467 do {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005468 h = conn_connect(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005469 if (h == 0) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005470 conn_disconnect(connection);
Philipp Reisner20ee6392011-01-18 15:28:59 +01005471 schedule_timeout_interruptible(HZ);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005472 }
5473 if (h == -1) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005474 drbd_warn(connection, "Discarding network configuration.\n");
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005475 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005476 }
5477 } while (h == 0);
5478
Philipp Reisner91fd4da2011-04-20 17:47:29 +02005479 if (h > 0)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005480 drbdd(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005481
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005482 conn_disconnect(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005483
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005484 drbd_info(connection, "receiver terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005485 return 0;
5486}
5487
5488/* ********* acknowledge sender ******** */
5489
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005490static int got_conn_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005491{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005492 struct p_req_state_reply *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005493 int retcode = be32_to_cpu(p->retcode);
5494
5495 if (retcode >= SS_SUCCESS) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005496 set_bit(CONN_WD_ST_CHG_OKAY, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005497 } else {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005498 set_bit(CONN_WD_ST_CHG_FAIL, &connection->flags);
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005499 drbd_err(connection, "Requested state change failed by peer: %s (%d)\n",
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01005500 drbd_set_st_err_str(retcode), retcode);
5501 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005502 wake_up(&connection->ping_wait);
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01005503
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005504 return 0;
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01005505}
5506
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005507static int got_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005508{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005509 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005510 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005511 struct p_req_state_reply *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005512 int retcode = be32_to_cpu(p->retcode);
5513
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005514 peer_device = conn_peer_device(connection, pi->vnr);
5515 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005516 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005517 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005518
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005519 if (test_bit(CONN_WD_ST_CHG_REQ, &connection->flags)) {
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02005520 D_ASSERT(device, connection->agreed_pro_version < 100);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005521 return got_conn_RqSReply(connection, pi);
Philipp Reisner4d0fc3f2012-01-20 13:52:27 +01005522 }
5523
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01005524 if (retcode >= SS_SUCCESS) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005525 set_bit(CL_ST_CHG_SUCCESS, &device->flags);
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01005526 } else {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005527 set_bit(CL_ST_CHG_FAIL, &device->flags);
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02005528 drbd_err(device, "Requested state change failed by peer: %s (%d)\n",
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01005529 drbd_set_st_err_str(retcode), retcode);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005530 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005531 wake_up(&device->state_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005532
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005533 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005534}
5535
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005536static int got_Ping(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005537{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005538 return drbd_send_ping_ack(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005539
5540}
5541
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005542static int got_PingAck(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005543{
5544 /* restore idle timeout */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005545 connection->meta.socket->sk->sk_rcvtimeo = connection->net_conf->ping_int*HZ;
5546 if (!test_and_set_bit(GOT_PING_ACK, &connection->flags))
5547 wake_up(&connection->ping_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005548
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005549 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005550}
5551
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005552static int got_IsInSync(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005553{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005554 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005555 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005556 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005557 sector_t sector = be64_to_cpu(p->sector);
5558 int blksize = be32_to_cpu(p->blksize);
5559
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005560 peer_device = conn_peer_device(connection, pi->vnr);
5561 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005562 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005563 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005564
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005565 D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005566
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005567 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005568
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005569 if (get_ldev(device)) {
5570 drbd_rs_complete_io(device, sector);
5571 drbd_set_in_sync(device, sector, blksize);
Lars Ellenberg1d53f092010-09-05 01:13:24 +02005572 /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005573 device->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
5574 put_ldev(device);
Lars Ellenberg1d53f092010-09-05 01:13:24 +02005575 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005576 dec_rs_pending(device);
5577 atomic_add(blksize >> 9, &device->rs_sect_in);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005578
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005579 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005580}
5581
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01005582static int
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005583validate_req_change_req_state(struct drbd_device *device, u64 id, sector_t sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01005584 struct rb_root *root, const char *func,
5585 enum drbd_req_event what, bool missing_ok)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005586{
5587 struct drbd_request *req;
5588 struct bio_and_error m;
5589
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005590 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005591 req = find_request(device, root, id, sector, missing_ok, func);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005592 if (unlikely(!req)) {
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005593 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005594 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005595 }
5596 __req_mod(req, what, &m);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005597 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005598
5599 if (m.bio)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005600 complete_master_bio(device, &m);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005601 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005602}
5603
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005604static int got_BlockAck(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005605{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005606 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005607 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005608 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005609 sector_t sector = be64_to_cpu(p->sector);
5610 int blksize = be32_to_cpu(p->blksize);
5611 enum drbd_req_event what;
5612
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005613 peer_device = conn_peer_device(connection, pi->vnr);
5614 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005615 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005616 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005617
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005618 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005619
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01005620 if (p->block_id == ID_SYNCER) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005621 drbd_set_in_sync(device, sector, blksize);
5622 dec_rs_pending(device);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005623 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005624 }
Andreas Gruenbachere05e1e52011-03-25 15:16:26 +01005625 switch (pi->cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07005626 case P_RS_WRITE_ACK:
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01005627 what = WRITE_ACKED_BY_PEER_AND_SIS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005628 break;
5629 case P_WRITE_ACK:
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01005630 what = WRITE_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005631 break;
5632 case P_RECV_ACK:
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01005633 what = RECV_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005634 break;
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02005635 case P_SUPERSEDED:
5636 what = CONFLICT_RESOLVED;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01005637 break;
5638 case P_RETRY_WRITE:
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01005639 what = POSTPONE_WRITE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005640 break;
5641 default:
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005642 BUG();
Philipp Reisnerb411b362009-09-25 16:07:19 -07005643 }
5644
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005645 return validate_req_change_req_state(device, p->block_id, sector,
5646 &device->write_requests, __func__,
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005647 what, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005648}
5649
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005650static int got_NegAck(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005651{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005652 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005653 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005654 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005655 sector_t sector = be64_to_cpu(p->sector);
Philipp Reisner2deb8332011-01-17 18:39:18 +01005656 int size = be32_to_cpu(p->blksize);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005657 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005658
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005659 peer_device = conn_peer_device(connection, pi->vnr);
5660 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005661 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005662 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005663
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005664 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005665
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01005666 if (p->block_id == ID_SYNCER) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005667 dec_rs_pending(device);
5668 drbd_rs_failed_io(device, sector, size);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005669 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005670 }
Philipp Reisner2deb8332011-01-17 18:39:18 +01005671
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005672 err = validate_req_change_req_state(device, p->block_id, sector,
5673 &device->write_requests, __func__,
Philipp Reisner303d1442011-04-13 16:24:47 -07005674 NEG_ACKED, true);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005675 if (err) {
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01005676 /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs.
5677 The master bio might already be completed, therefore the
5678 request is no longer in the collision hash. */
5679 /* In Protocol B we might already have got a P_RECV_ACK
5680 but then get a P_NEG_ACK afterwards. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005681 drbd_set_out_of_sync(device, sector, size);
Philipp Reisner2deb8332011-01-17 18:39:18 +01005682 }
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005683 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005684}
5685
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005686static int got_NegDReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005687{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005688 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005689 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005690 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005691 sector_t sector = be64_to_cpu(p->sector);
5692
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005693 peer_device = conn_peer_device(connection, pi->vnr);
5694 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005695 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005696 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005697
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005698 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01005699
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02005700 drbd_err(device, "Got NegDReply; Sector %llus, len %u.\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07005701 (unsigned long long)sector, be32_to_cpu(p->blksize));
5702
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005703 return validate_req_change_req_state(device, p->block_id, sector,
5704 &device->read_requests, __func__,
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005705 NEG_ACKED, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005706}
5707
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005708static int got_NegRSDReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005709{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005710 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005711 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005712 sector_t sector;
5713 int size;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005714 struct p_block_ack *p = pi->data;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005715
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005716 peer_device = conn_peer_device(connection, pi->vnr);
5717 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005718 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005719 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005720
5721 sector = be64_to_cpu(p->sector);
5722 size = be32_to_cpu(p->blksize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005723
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005724 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005725
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005726 dec_rs_pending(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005727
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005728 if (get_ldev_if_state(device, D_FAILED)) {
5729 drbd_rs_complete_io(device, sector);
Andreas Gruenbachere05e1e52011-03-25 15:16:26 +01005730 switch (pi->cmd) {
Philipp Reisnerd612d302010-12-27 10:53:28 +01005731 case P_NEG_RS_DREPLY:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005732 drbd_rs_failed_io(device, sector, size);
Philipp Reisnerd612d302010-12-27 10:53:28 +01005733 case P_RS_CANCEL:
5734 break;
5735 default:
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005736 BUG();
Philipp Reisnerd612d302010-12-27 10:53:28 +01005737 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005738 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005739 }
5740
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005741 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005742}
5743
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005744static int got_BarrierAck(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005745{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005746 struct p_barrier_ack *p = pi->data;
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005747 struct drbd_peer_device *peer_device;
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005748 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005749
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005750 tl_release(connection, p->barrier, be32_to_cpu(p->set_size));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005751
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005752 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005753 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5754 struct drbd_device *device = peer_device->device;
5755
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005756 if (device->state.conn == C_AHEAD &&
5757 atomic_read(&device->ap_in_flight) == 0 &&
5758 !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &device->flags)) {
5759 device->start_resync_timer.expires = jiffies + HZ;
5760 add_timer(&device->start_resync_timer);
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005761 }
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02005762 }
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005763 rcu_read_unlock();
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02005764
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005765 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005766}
5767
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005768static int got_OVResult(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005769{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005770 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005771 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005772 struct p_block_ack *p = pi->data;
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02005773 struct drbd_device_work *dw;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005774 sector_t sector;
5775 int size;
5776
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005777 peer_device = conn_peer_device(connection, pi->vnr);
5778 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005779 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005780 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005781
Philipp Reisnerb411b362009-09-25 16:07:19 -07005782 sector = be64_to_cpu(p->sector);
5783 size = be32_to_cpu(p->blksize);
5784
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005785 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005786
5787 if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005788 drbd_ov_out_of_sync_found(device, sector, size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005789 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005790 ov_out_of_sync_print(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005791
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005792 if (!get_ldev(device))
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005793 return 0;
Lars Ellenberg1d53f092010-09-05 01:13:24 +02005794
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005795 drbd_rs_complete_io(device, sector);
5796 dec_rs_pending(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005797
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005798 --device->ov_left;
Lars Ellenbergea5442a2010-11-05 09:48:01 +01005799
5800 /* let's advance progress step marks only for every other megabyte */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005801 if ((device->ov_left & 0x200) == 0x200)
5802 drbd_advance_rs_marks(device, device->ov_left);
Lars Ellenbergea5442a2010-11-05 09:48:01 +01005803
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005804 if (device->ov_left == 0) {
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02005805 dw = kmalloc(sizeof(*dw), GFP_NOIO);
5806 if (dw) {
5807 dw->w.cb = w_ov_finished;
5808 dw->device = device;
5809 drbd_queue_work(&peer_device->connection->sender_work, &dw->w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005810 } else {
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02005811 drbd_err(device, "kmalloc(dw) failed.");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005812 ov_out_of_sync_print(device);
5813 drbd_resync_finished(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005814 }
5815 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005816 put_ldev(device);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005817 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005818}
5819
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005820static int got_skip(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisner0ced55a2010-04-30 15:26:20 +02005821{
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005822 return 0;
Philipp Reisner0ced55a2010-04-30 15:26:20 +02005823}
5824
Philipp Reisner668700b2015-03-16 16:08:29 +01005825struct meta_sock_cmd {
Philipp Reisnerb411b362009-09-25 16:07:19 -07005826 size_t pkt_size;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005827 int (*fn)(struct drbd_connection *connection, struct packet_info *);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005828};
5829
Philipp Reisner668700b2015-03-16 16:08:29 +01005830static void set_rcvtimeo(struct drbd_connection *connection, bool ping_timeout)
5831{
5832 long t;
5833 struct net_conf *nc;
5834
5835 rcu_read_lock();
5836 nc = rcu_dereference(connection->net_conf);
5837 t = ping_timeout ? nc->ping_timeo : nc->ping_int;
5838 rcu_read_unlock();
5839
5840 t *= HZ;
5841 if (ping_timeout)
5842 t /= 10;
5843
5844 connection->meta.socket->sk->sk_rcvtimeo = t;
5845}
5846
5847static void set_ping_timeout(struct drbd_connection *connection)
5848{
5849 set_rcvtimeo(connection, 1);
5850}
5851
5852static void set_idle_timeout(struct drbd_connection *connection)
5853{
5854 set_rcvtimeo(connection, 0);
5855}
5856
5857static struct meta_sock_cmd ack_receiver_tbl[] = {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005858 [P_PING] = { 0, got_Ping },
5859 [P_PING_ACK] = { 0, got_PingAck },
Philipp Reisnerb411b362009-09-25 16:07:19 -07005860 [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
5861 [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
5862 [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02005863 [P_SUPERSEDED] = { sizeof(struct p_block_ack), got_BlockAck },
Philipp Reisnerb411b362009-09-25 16:07:19 -07005864 [P_NEG_ACK] = { sizeof(struct p_block_ack), got_NegAck },
5865 [P_NEG_DREPLY] = { sizeof(struct p_block_ack), got_NegDReply },
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005866 [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), got_NegRSDReply },
Philipp Reisnerb411b362009-09-25 16:07:19 -07005867 [P_OV_RESULT] = { sizeof(struct p_block_ack), got_OVResult },
5868 [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck },
5869 [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
5870 [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync },
Philipp Reisner02918be2010-08-20 14:35:10 +02005871 [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip },
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005872 [P_RS_CANCEL] = { sizeof(struct p_block_ack), got_NegRSDReply },
5873 [P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), got_conn_RqSReply },
5874 [P_RETRY_WRITE] = { sizeof(struct p_block_ack), got_BlockAck },
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01005875};
Philipp Reisnerb411b362009-09-25 16:07:19 -07005876
Philipp Reisner1c03e522015-03-16 15:01:00 +01005877int drbd_ack_receiver(struct drbd_thread *thi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005878{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005879 struct drbd_connection *connection = thi->connection;
Philipp Reisner668700b2015-03-16 16:08:29 +01005880 struct meta_sock_cmd *cmd = NULL;
Philipp Reisner77351055b2011-02-07 17:24:26 +01005881 struct packet_info pi;
Philipp Reisner668700b2015-03-16 16:08:29 +01005882 unsigned long pre_recv_jif;
Philipp Reisner257d0af2011-01-26 12:15:29 +01005883 int rv;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005884 void *buf = connection->meta.rbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005885 int received = 0;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005886 unsigned int header_size = drbd_header_size(connection);
Andreas Gruenbacher52b061a2011-03-30 11:38:49 +02005887 int expect = header_size;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005888 bool ping_timeout_active = false;
Philipp Reisner3990e042013-03-27 14:08:48 +01005889 struct sched_param param = { .sched_priority = 2 };
Philipp Reisnerb411b362009-09-25 16:07:19 -07005890
Philipp Reisner3990e042013-03-27 14:08:48 +01005891 rv = sched_setscheduler(current, SCHED_RR, &param);
5892 if (rv < 0)
Philipp Reisner668700b2015-03-16 16:08:29 +01005893 drbd_err(connection, "drbd_ack_receiver: ERROR set priority, ret=%d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005894
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +01005895 while (get_t_state(thi) == RUNNING) {
Philipp Reisner80822282011-02-08 12:46:30 +01005896 drbd_thread_current_set_cpu(thi);
Philipp Reisner44ed1672011-04-19 17:10:19 +02005897
Philipp Reisner668700b2015-03-16 16:08:29 +01005898 conn_reclaim_net_peer_reqs(connection);
Philipp Reisner44ed1672011-04-19 17:10:19 +02005899
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005900 if (test_and_clear_bit(SEND_PING, &connection->flags)) {
5901 if (drbd_send_ping(connection)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005902 drbd_err(connection, "drbd_send_ping has failed\n");
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01005903 goto reconnect;
5904 }
Philipp Reisner668700b2015-03-16 16:08:29 +01005905 set_ping_timeout(connection);
Philipp Reisner44ed1672011-04-19 17:10:19 +02005906 ping_timeout_active = true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005907 }
5908
Philipp Reisner668700b2015-03-16 16:08:29 +01005909 pre_recv_jif = jiffies;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005910 rv = drbd_recv_short(connection->meta.socket, buf, expect-received, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005911
5912 /* Note:
5913 * -EINTR (on meta) we got a signal
5914 * -EAGAIN (on meta) rcvtimeo expired
5915 * -ECONNRESET other side closed the connection
5916 * -ERESTARTSYS (on data) we got a signal
5917 * rv < 0 other than above: unexpected error!
5918 * rv == expected: full header or command
5919 * rv < expected: "woken" by signal during receive
5920 * rv == 0 : "connection shut down by peer"
5921 */
5922 if (likely(rv > 0)) {
5923 received += rv;
5924 buf += rv;
5925 } else if (rv == 0) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005926 if (test_bit(DISCONNECT_SENT, &connection->flags)) {
Philipp Reisnerb66623e2012-08-08 21:19:09 +02005927 long t;
5928 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005929 t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
Philipp Reisnerb66623e2012-08-08 21:19:09 +02005930 rcu_read_unlock();
5931
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005932 t = wait_event_timeout(connection->ping_wait,
5933 connection->cstate < C_WF_REPORT_PARAMS,
Philipp Reisnerb66623e2012-08-08 21:19:09 +02005934 t);
Philipp Reisner599377a2012-08-17 14:50:22 +02005935 if (t)
5936 break;
5937 }
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005938 drbd_err(connection, "meta connection shut down by peer.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005939 goto reconnect;
5940 } else if (rv == -EAGAIN) {
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02005941 /* If the data socket received something meanwhile,
5942 * that is good enough: peer is still alive. */
Philipp Reisner668700b2015-03-16 16:08:29 +01005943 if (time_after(connection->last_received, pre_recv_jif))
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02005944 continue;
Lars Ellenbergf36af182011-03-09 22:44:55 +01005945 if (ping_timeout_active) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005946 drbd_err(connection, "PingAck did not arrive in time.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005947 goto reconnect;
5948 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005949 set_bit(SEND_PING, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005950 continue;
5951 } else if (rv == -EINTR) {
Philipp Reisner668700b2015-03-16 16:08:29 +01005952 /* maybe drbd_thread_stop(): the while condition will notice.
5953 * maybe woken for send_ping: we'll send a ping above,
5954 * and change the rcvtimeo */
5955 flush_signals(current);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005956 continue;
5957 } else {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005958 drbd_err(connection, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005959 goto reconnect;
5960 }
5961
5962 if (received == expect && cmd == NULL) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005963 if (decode_header(connection, connection->meta.rbuf, &pi))
Philipp Reisnerb411b362009-09-25 16:07:19 -07005964 goto reconnect;
Philipp Reisner668700b2015-03-16 16:08:29 +01005965 cmd = &ack_receiver_tbl[pi.cmd];
5966 if (pi.cmd >= ARRAY_SIZE(ack_receiver_tbl) || !cmd->fn) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005967 drbd_err(connection, "Unexpected meta packet %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02005968 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005969 goto disconnect;
5970 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005971 expect = header_size + cmd->pkt_size;
Andreas Gruenbacher52b061a2011-03-30 11:38:49 +02005972 if (pi.size != expect - header_size) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005973 drbd_err(connection, "Wrong packet size on meta (c: %d, l: %d)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01005974 pi.cmd, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005975 goto reconnect;
Philipp Reisner257d0af2011-01-26 12:15:29 +01005976 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005977 }
5978 if (received == expect) {
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005979 bool err;
Philipp Reisnera4fbda82011-03-16 11:13:17 +01005980
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005981 err = cmd->fn(connection, &pi);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005982 if (err) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005983 drbd_err(connection, "%pf failed\n", cmd->fn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005984 goto reconnect;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005985 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005986
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005987 connection->last_received = jiffies;
Lars Ellenbergf36af182011-03-09 22:44:55 +01005988
Philipp Reisner668700b2015-03-16 16:08:29 +01005989 if (cmd == &ack_receiver_tbl[P_PING_ACK]) {
5990 set_idle_timeout(connection);
Philipp Reisner44ed1672011-04-19 17:10:19 +02005991 ping_timeout_active = false;
5992 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005993
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005994 buf = connection->meta.rbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005995 received = 0;
Andreas Gruenbacher52b061a2011-03-30 11:38:49 +02005996 expect = header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005997 cmd = NULL;
5998 }
5999 }
6000
6001 if (0) {
6002reconnect:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02006003 conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
6004 conn_md_sync(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07006005 }
6006 if (0) {
6007disconnect:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02006008 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07006009 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07006010
Philipp Reisner668700b2015-03-16 16:08:29 +01006011 drbd_info(connection, "ack_receiver terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07006012
6013 return 0;
6014}
Philipp Reisner668700b2015-03-16 16:08:29 +01006015
6016void drbd_send_acks_wf(struct work_struct *ws)
6017{
6018 struct drbd_peer_device *peer_device =
6019 container_of(ws, struct drbd_peer_device, send_acks_work);
6020 struct drbd_connection *connection = peer_device->connection;
6021 struct drbd_device *device = peer_device->device;
6022 struct net_conf *nc;
6023 int tcp_cork, err;
6024
6025 rcu_read_lock();
6026 nc = rcu_dereference(connection->net_conf);
6027 tcp_cork = nc->tcp_cork;
6028 rcu_read_unlock();
6029
6030 if (tcp_cork)
6031 drbd_tcp_cork(connection->meta.socket);
6032
6033 err = drbd_finish_peer_reqs(device);
6034 kref_put(&device->kref, drbd_destroy_device);
6035 /* get is in drbd_endio_write_sec_final(). That is necessary to keep the
6036 struct work_struct send_acks_work alive, which is in the peer_device object */
6037
6038 if (err) {
6039 conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
6040 return;
6041 }
6042
6043 if (tcp_cork)
6044 drbd_tcp_uncork(connection->meta.socket);
6045
6046 return;
6047}