blob: db5c58042734a7b2a60bfc48d55ad786e83094e6 [file] [log] [blame]
Philipp Reisnerb411b362009-09-25 16:07:19 -07001/*
2 drbd_receiver.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23 */
24
25
Philipp Reisnerb411b362009-09-25 16:07:19 -070026#include <linux/module.h>
27
28#include <asm/uaccess.h>
29#include <net/sock.h>
30
Philipp Reisnerb411b362009-09-25 16:07:19 -070031#include <linux/drbd.h>
32#include <linux/fs.h>
33#include <linux/file.h>
34#include <linux/in.h>
35#include <linux/mm.h>
36#include <linux/memcontrol.h>
37#include <linux/mm_inline.h>
38#include <linux/slab.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070039#include <linux/pkt_sched.h>
40#define __KERNEL_SYSCALLS__
41#include <linux/unistd.h>
42#include <linux/vmalloc.h>
43#include <linux/random.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070044#include <linux/string.h>
45#include <linux/scatterlist.h>
46#include "drbd_int.h"
Andreas Gruenbachera3603a62011-05-30 11:47:37 +020047#include "drbd_protocol.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070048#include "drbd_req.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070049#include "drbd_vli.h"
50
Lars Ellenberg20c68fd2014-04-28 18:43:25 +020051#define PRO_FEATURES (FF_TRIM)
52
Philipp Reisner77351055b2011-02-07 17:24:26 +010053struct packet_info {
54 enum drbd_packet cmd;
Andreas Gruenbachere2857212011-03-25 00:57:38 +010055 unsigned int size;
56 unsigned int vnr;
Andreas Gruenbachere6589832011-03-30 12:54:42 +020057 void *data;
Philipp Reisner77351055b2011-02-07 17:24:26 +010058};
59
Philipp Reisnerb411b362009-09-25 16:07:19 -070060enum finish_epoch {
61 FE_STILL_LIVE,
62 FE_DESTROYED,
63 FE_RECYCLED,
64};
65
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +020066static int drbd_do_features(struct drbd_connection *connection);
67static int drbd_do_auth(struct drbd_connection *connection);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +020068static int drbd_disconnected(struct drbd_peer_device *);
Lars Ellenberga0fb3c42014-04-28 18:43:23 +020069static void conn_wait_active_ee_empty(struct drbd_connection *connection);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +020070static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *, struct drbd_epoch *, enum epoch_event);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +010071static int e_end_block(struct drbd_work *, int);
Philipp Reisnerb411b362009-09-25 16:07:19 -070072
Philipp Reisnerb411b362009-09-25 16:07:19 -070073
74#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
75
Lars Ellenberg45bb9122010-05-14 17:10:48 +020076/*
77 * some helper functions to deal with single linked page lists,
78 * page->private being our "next" pointer.
79 */
80
81/* If at least n pages are linked at head, get n pages off.
82 * Otherwise, don't modify head, and return NULL.
83 * Locking is the responsibility of the caller.
84 */
85static struct page *page_chain_del(struct page **head, int n)
86{
87 struct page *page;
88 struct page *tmp;
89
90 BUG_ON(!n);
91 BUG_ON(!head);
92
93 page = *head;
Philipp Reisner23ce4222010-05-20 13:35:31 +020094
95 if (!page)
96 return NULL;
97
Lars Ellenberg45bb9122010-05-14 17:10:48 +020098 while (page) {
99 tmp = page_chain_next(page);
100 if (--n == 0)
101 break; /* found sufficient pages */
102 if (tmp == NULL)
103 /* insufficient pages, don't use any of them. */
104 return NULL;
105 page = tmp;
106 }
107
108 /* add end of list marker for the returned list */
109 set_page_private(page, 0);
110 /* actual return value, and adjustment of head */
111 page = *head;
112 *head = tmp;
113 return page;
114}
115
116/* may be used outside of locks to find the tail of a (usually short)
117 * "private" page chain, before adding it back to a global chain head
118 * with page_chain_add() under a spinlock. */
119static struct page *page_chain_tail(struct page *page, int *len)
120{
121 struct page *tmp;
122 int i = 1;
123 while ((tmp = page_chain_next(page)))
124 ++i, page = tmp;
125 if (len)
126 *len = i;
127 return page;
128}
129
130static int page_chain_free(struct page *page)
131{
132 struct page *tmp;
133 int i = 0;
134 page_chain_for_each_safe(page, tmp) {
135 put_page(page);
136 ++i;
137 }
138 return i;
139}
140
141static void page_chain_add(struct page **head,
142 struct page *chain_first, struct page *chain_last)
143{
144#if 1
145 struct page *tmp;
146 tmp = page_chain_tail(chain_first, NULL);
147 BUG_ON(tmp != chain_last);
148#endif
149
150 /* add chain to head */
151 set_page_private(chain_last, (unsigned long)*head);
152 *head = chain_first;
153}
154
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200155static struct page *__drbd_alloc_pages(struct drbd_device *device,
Andreas Gruenbacher18c2d522011-04-07 21:08:50 +0200156 unsigned int number)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700157{
158 struct page *page = NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200159 struct page *tmp = NULL;
Andreas Gruenbacher18c2d522011-04-07 21:08:50 +0200160 unsigned int i = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700161
162 /* Yes, testing drbd_pp_vacant outside the lock is racy.
163 * So what. It saves a spin_lock. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200164 if (drbd_pp_vacant >= number) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700165 spin_lock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200166 page = page_chain_del(&drbd_pp_pool, number);
167 if (page)
168 drbd_pp_vacant -= number;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700169 spin_unlock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200170 if (page)
171 return page;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700172 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200173
Philipp Reisnerb411b362009-09-25 16:07:19 -0700174 /* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD
175 * "criss-cross" setup, that might cause write-out on some other DRBD,
176 * which in turn might block on the other node at this very place. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200177 for (i = 0; i < number; i++) {
178 tmp = alloc_page(GFP_TRY);
179 if (!tmp)
180 break;
181 set_page_private(tmp, (unsigned long)page);
182 page = tmp;
183 }
184
185 if (i == number)
186 return page;
187
188 /* Not enough pages immediately available this time.
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200189 * No need to jump around here, drbd_alloc_pages will retry this
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200190 * function "soon". */
191 if (page) {
192 tmp = page_chain_tail(page, NULL);
193 spin_lock(&drbd_pp_lock);
194 page_chain_add(&drbd_pp_pool, page, tmp);
195 drbd_pp_vacant += i;
196 spin_unlock(&drbd_pp_lock);
197 }
198 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700199}
200
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200201static void reclaim_finished_net_peer_reqs(struct drbd_device *device,
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200202 struct list_head *to_be_freed)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700203{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200204 struct drbd_peer_request *peer_req, *tmp;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700205
206 /* The EEs are always appended to the end of the list. Since
207 they are sent in order over the wire, they have to finish
208 in order. As soon as we see the first not finished we can
209 stop to examine the list... */
210
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200211 list_for_each_entry_safe(peer_req, tmp, &device->net_ee, w.list) {
Andreas Gruenbacher045417f2011-04-07 21:34:24 +0200212 if (drbd_peer_req_has_active_page(peer_req))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700213 break;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200214 list_move(&peer_req->w.list, to_be_freed);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700215 }
216}
217
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200218static void drbd_kick_lo_and_reclaim_net(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700219{
220 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100221 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700222
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200223 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200224 reclaim_finished_net_peer_reqs(device, &reclaimed);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200225 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700226
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200227 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200228 drbd_free_net_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700229}
230
231/**
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200232 * drbd_alloc_pages() - Returns @number pages, retries forever (or until signalled)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200233 * @device: DRBD device.
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200234 * @number: number of pages requested
235 * @retry: whether to retry, if not enough pages are available right now
Philipp Reisnerb411b362009-09-25 16:07:19 -0700236 *
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200237 * Tries to allocate number pages, first from our own page pool, then from
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200238 * the kernel.
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200239 * Possibly retry until DRBD frees sufficient pages somewhere else.
240 *
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200241 * If this allocation would exceed the max_buffers setting, we throttle
242 * allocation (schedule_timeout) to give the system some room to breathe.
243 *
244 * We do not use max-buffers as hard limit, because it could lead to
245 * congestion and further to a distributed deadlock during online-verify or
246 * (checksum based) resync, if the max-buffers, socket buffer sizes and
247 * resync-rate settings are mis-configured.
248 *
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200249 * Returns a page chain linked via page->private.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700250 */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200251struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int number,
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200252 bool retry)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700253{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200254 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700255 struct page *page = NULL;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200256 struct net_conf *nc;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700257 DEFINE_WAIT(wait);
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200258 unsigned int mxb;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700259
Philipp Reisner44ed1672011-04-19 17:10:19 +0200260 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200261 nc = rcu_dereference(peer_device->connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200262 mxb = nc ? nc->max_buffers : 1000000;
263 rcu_read_unlock();
264
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200265 if (atomic_read(&device->pp_in_use) < mxb)
266 page = __drbd_alloc_pages(device, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700267
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200268 while (page == NULL) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700269 prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
270
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200271 drbd_kick_lo_and_reclaim_net(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700272
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200273 if (atomic_read(&device->pp_in_use) < mxb) {
274 page = __drbd_alloc_pages(device, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700275 if (page)
276 break;
277 }
278
279 if (!retry)
280 break;
281
282 if (signal_pending(current)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200283 drbd_warn(device, "drbd_alloc_pages interrupted!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700284 break;
285 }
286
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200287 if (schedule_timeout(HZ/10) == 0)
288 mxb = UINT_MAX;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700289 }
290 finish_wait(&drbd_pp_wait, &wait);
291
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200292 if (page)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200293 atomic_add(number, &device->pp_in_use);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700294 return page;
295}
296
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200297/* Must not be used from irq, as that may deadlock: see drbd_alloc_pages.
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200298 * Is also used from inside an other spin_lock_irq(&resource->req_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200299 * Either links the page chain back to the global pool,
300 * or returns all pages to the system. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200301static void drbd_free_pages(struct drbd_device *device, struct page *page, int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700302{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200303 atomic_t *a = is_net ? &device->pp_in_use_by_net : &device->pp_in_use;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700304 int i;
Lars Ellenberg435f0742010-09-06 12:30:25 +0200305
Lars Ellenberga73ff322012-06-25 19:15:38 +0200306 if (page == NULL)
307 return;
308
Philipp Reisner81a5d602011-02-22 19:53:16 -0500309 if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200310 i = page_chain_free(page);
311 else {
312 struct page *tmp;
313 tmp = page_chain_tail(page, &i);
314 spin_lock(&drbd_pp_lock);
315 page_chain_add(&drbd_pp_pool, page, tmp);
316 drbd_pp_vacant += i;
317 spin_unlock(&drbd_pp_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700318 }
Lars Ellenberg435f0742010-09-06 12:30:25 +0200319 i = atomic_sub_return(i, a);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200320 if (i < 0)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200321 drbd_warn(device, "ASSERTION FAILED: %s: %d < 0\n",
Lars Ellenberg435f0742010-09-06 12:30:25 +0200322 is_net ? "pp_in_use_by_net" : "pp_in_use", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700323 wake_up(&drbd_pp_wait);
324}
325
326/*
327You need to hold the req_lock:
328 _drbd_wait_ee_list_empty()
329
330You must not have the req_lock:
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +0200331 drbd_free_peer_req()
Andreas Gruenbacher0db55362011-04-06 16:09:15 +0200332 drbd_alloc_peer_req()
Andreas Gruenbacher7721f562011-04-06 17:14:02 +0200333 drbd_free_peer_reqs()
Philipp Reisnerb411b362009-09-25 16:07:19 -0700334 drbd_ee_fix_bhs()
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200335 drbd_finish_peer_reqs()
Philipp Reisnerb411b362009-09-25 16:07:19 -0700336 drbd_clear_done_ee()
337 drbd_wait_ee_list_empty()
338*/
339
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100340struct drbd_peer_request *
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200341drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
Lars Ellenberga0fb3c42014-04-28 18:43:23 +0200342 unsigned int data_size, bool has_payload, gfp_t gfp_mask) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700343{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200344 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100345 struct drbd_peer_request *peer_req;
Lars Ellenberga73ff322012-06-25 19:15:38 +0200346 struct page *page = NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200347 unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700348
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200349 if (drbd_insert_fault(device, DRBD_FAULT_AL_EE))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700350 return NULL;
351
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100352 peer_req = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
353 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700354 if (!(gfp_mask & __GFP_NOWARN))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200355 drbd_err(device, "%s: allocation failed\n", __func__);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700356 return NULL;
357 }
358
Lars Ellenberga0fb3c42014-04-28 18:43:23 +0200359 if (has_payload && data_size) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200360 page = drbd_alloc_pages(peer_device, nr_pages, (gfp_mask & __GFP_WAIT));
Lars Ellenberga73ff322012-06-25 19:15:38 +0200361 if (!page)
362 goto fail;
363 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700364
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100365 drbd_clear_interval(&peer_req->i);
366 peer_req->i.size = data_size;
367 peer_req->i.sector = sector;
368 peer_req->i.local = false;
369 peer_req->i.waiting = false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700370
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100371 peer_req->epoch = NULL;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200372 peer_req->peer_device = peer_device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100373 peer_req->pages = page;
374 atomic_set(&peer_req->pending_bios, 0);
375 peer_req->flags = 0;
Andreas Gruenbacher9a8e7752011-01-11 14:04:09 +0100376 /*
377 * The block_id is opaque to the receiver. It is not endianness
378 * converted, and sent back to the sender unchanged.
379 */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100380 peer_req->block_id = id;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700381
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100382 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700383
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200384 fail:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100385 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700386 return NULL;
387}
388
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200389void __drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request *peer_req,
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100390 int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700391{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100392 if (peer_req->flags & EE_HAS_DIGEST)
393 kfree(peer_req->digest);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200394 drbd_free_pages(device, peer_req->pages, is_net);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +0200395 D_ASSERT(device, atomic_read(&peer_req->pending_bios) == 0);
396 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100397 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700398}
399
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200400int drbd_free_peer_reqs(struct drbd_device *device, struct list_head *list)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700401{
402 LIST_HEAD(work_list);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100403 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700404 int count = 0;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200405 int is_net = list == &device->net_ee;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700406
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200407 spin_lock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700408 list_splice_init(list, &work_list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200409 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700410
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200411 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200412 __drbd_free_peer_req(device, peer_req, is_net);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700413 count++;
414 }
415 return count;
416}
417
Philipp Reisnerb411b362009-09-25 16:07:19 -0700418/*
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200419 * See also comments in _req_mod(,BARRIER_ACKED) and receive_Barrier.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700420 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200421static int drbd_finish_peer_reqs(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700422{
423 LIST_HEAD(work_list);
424 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100425 struct drbd_peer_request *peer_req, *t;
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100426 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700427
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200428 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200429 reclaim_finished_net_peer_reqs(device, &reclaimed);
430 list_splice_init(&device->done_ee, &work_list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200431 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700432
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200433 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200434 drbd_free_net_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700435
436 /* possible callbacks here:
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +0200437 * e_end_block, and e_end_resync_block, e_send_superseded.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700438 * all ignore the last argument.
439 */
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200440 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100441 int err2;
442
Philipp Reisnerb411b362009-09-25 16:07:19 -0700443 /* list_del not necessary, next/prev members not touched */
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200444 err2 = peer_req->w.cb(&peer_req->w, !!err);
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100445 if (!err)
446 err = err2;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200447 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700448 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200449 wake_up(&device->ee_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700450
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100451 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700452}
453
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200454static void _drbd_wait_ee_list_empty(struct drbd_device *device,
Andreas Gruenbacherd4da1532011-04-07 00:06:56 +0200455 struct list_head *head)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700456{
457 DEFINE_WAIT(wait);
458
459 /* avoids spin_lock/unlock
460 * and calling prepare_to_wait in the fast path */
461 while (!list_empty(head)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200462 prepare_to_wait(&device->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200463 spin_unlock_irq(&device->resource->req_lock);
Jens Axboe7eaceac2011-03-10 08:52:07 +0100464 io_schedule();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200465 finish_wait(&device->ee_wait, &wait);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200466 spin_lock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700467 }
468}
469
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200470static void drbd_wait_ee_list_empty(struct drbd_device *device,
Andreas Gruenbacherd4da1532011-04-07 00:06:56 +0200471 struct list_head *head)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700472{
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200473 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200474 _drbd_wait_ee_list_empty(device, head);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200475 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700476}
477
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100478static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700479{
Philipp Reisnerb411b362009-09-25 16:07:19 -0700480 struct kvec iov = {
481 .iov_base = buf,
482 .iov_len = size,
483 };
484 struct msghdr msg = {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700485 .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
486 };
Al Virof730c842014-02-08 21:07:38 -0500487 return kernel_recvmsg(sock, &msg, &iov, 1, size, msg.msg_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700488}
489
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200490static int drbd_recv(struct drbd_connection *connection, void *buf, size_t size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700491{
Philipp Reisnerb411b362009-09-25 16:07:19 -0700492 int rv;
493
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200494 rv = drbd_recv_short(connection->data.socket, buf, size, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700495
Philipp Reisnerdbd08202012-08-17 16:55:47 +0200496 if (rv < 0) {
497 if (rv == -ECONNRESET)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200498 drbd_info(connection, "sock was reset by peer\n");
Philipp Reisnerdbd08202012-08-17 16:55:47 +0200499 else if (rv != -ERESTARTSYS)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200500 drbd_err(connection, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerdbd08202012-08-17 16:55:47 +0200501 } else if (rv == 0) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200502 if (test_bit(DISCONNECT_SENT, &connection->flags)) {
Philipp Reisnerb66623e2012-08-08 21:19:09 +0200503 long t;
504 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200505 t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
Philipp Reisnerb66623e2012-08-08 21:19:09 +0200506 rcu_read_unlock();
507
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200508 t = wait_event_timeout(connection->ping_wait, connection->cstate < C_WF_REPORT_PARAMS, t);
Philipp Reisnerb66623e2012-08-08 21:19:09 +0200509
Philipp Reisner599377a2012-08-17 14:50:22 +0200510 if (t)
511 goto out;
512 }
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200513 drbd_info(connection, "sock was shut down by peer\n");
Philipp Reisner599377a2012-08-17 14:50:22 +0200514 }
515
Philipp Reisnerb411b362009-09-25 16:07:19 -0700516 if (rv != size)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200517 conn_request_state(connection, NS(conn, C_BROKEN_PIPE), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700518
Philipp Reisner599377a2012-08-17 14:50:22 +0200519out:
Philipp Reisnerb411b362009-09-25 16:07:19 -0700520 return rv;
521}
522
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200523static int drbd_recv_all(struct drbd_connection *connection, void *buf, size_t size)
Andreas Gruenbacherc6967742011-03-17 17:15:20 +0100524{
525 int err;
526
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200527 err = drbd_recv(connection, buf, size);
Andreas Gruenbacherc6967742011-03-17 17:15:20 +0100528 if (err != size) {
529 if (err >= 0)
530 err = -EIO;
531 } else
532 err = 0;
533 return err;
534}
535
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200536static int drbd_recv_all_warn(struct drbd_connection *connection, void *buf, size_t size)
Andreas Gruenbachera5c31902011-03-24 03:28:04 +0100537{
538 int err;
539
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200540 err = drbd_recv_all(connection, buf, size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +0100541 if (err && !signal_pending(current))
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200542 drbd_warn(connection, "short read (expected size %d)\n", (int)size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +0100543 return err;
544}
545
Lars Ellenberg5dbf1672010-05-25 16:18:01 +0200546/* quoting tcp(7):
547 * On individual connections, the socket buffer size must be set prior to the
548 * listen(2) or connect(2) calls in order to have it take effect.
549 * This is our wrapper to do so.
550 */
551static void drbd_setbufsize(struct socket *sock, unsigned int snd,
552 unsigned int rcv)
553{
554 /* open coded SO_SNDBUF, SO_RCVBUF */
555 if (snd) {
556 sock->sk->sk_sndbuf = snd;
557 sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
558 }
559 if (rcv) {
560 sock->sk->sk_rcvbuf = rcv;
561 sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
562 }
563}
564
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200565static struct socket *drbd_try_connect(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700566{
567 const char *what;
568 struct socket *sock;
569 struct sockaddr_in6 src_in6;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200570 struct sockaddr_in6 peer_in6;
571 struct net_conf *nc;
572 int err, peer_addr_len, my_addr_len;
Andreas Gruenbacher69ef82d2011-05-11 14:34:35 +0200573 int sndbuf_size, rcvbuf_size, connect_int;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700574 int disconnect_on_error = 1;
575
Philipp Reisner44ed1672011-04-19 17:10:19 +0200576 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200577 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200578 if (!nc) {
579 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700580 return NULL;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200581 }
Philipp Reisner44ed1672011-04-19 17:10:19 +0200582 sndbuf_size = nc->sndbuf_size;
583 rcvbuf_size = nc->rcvbuf_size;
Andreas Gruenbacher69ef82d2011-05-11 14:34:35 +0200584 connect_int = nc->connect_int;
Andreas Gruenbacher089c0752011-06-14 18:28:09 +0200585 rcu_read_unlock();
Philipp Reisner44ed1672011-04-19 17:10:19 +0200586
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200587 my_addr_len = min_t(int, connection->my_addr_len, sizeof(src_in6));
588 memcpy(&src_in6, &connection->my_addr, my_addr_len);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200589
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200590 if (((struct sockaddr *)&connection->my_addr)->sa_family == AF_INET6)
Philipp Reisner44ed1672011-04-19 17:10:19 +0200591 src_in6.sin6_port = 0;
592 else
593 ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
594
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200595 peer_addr_len = min_t(int, connection->peer_addr_len, sizeof(src_in6));
596 memcpy(&peer_in6, &connection->peer_addr, peer_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700597
598 what = "sock_create_kern";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200599 err = sock_create_kern(((struct sockaddr *)&src_in6)->sa_family,
600 SOCK_STREAM, IPPROTO_TCP, &sock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700601 if (err < 0) {
602 sock = NULL;
603 goto out;
604 }
605
606 sock->sk->sk_rcvtimeo =
Andreas Gruenbacher69ef82d2011-05-11 14:34:35 +0200607 sock->sk->sk_sndtimeo = connect_int * HZ;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200608 drbd_setbufsize(sock, sndbuf_size, rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700609
610 /* explicitly bind to the configured IP as source IP
611 * for the outgoing connections.
612 * This is needed for multihomed hosts and to be
613 * able to use lo: interfaces for drbd.
614 * Make sure to use 0 as port number, so linux selects
615 * a free one dynamically.
616 */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700617 what = "bind before connect";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200618 err = sock->ops->bind(sock, (struct sockaddr *) &src_in6, my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700619 if (err < 0)
620 goto out;
621
622 /* connect may fail, peer not yet available.
623 * stay C_WF_CONNECTION, don't go Disconnecting! */
624 disconnect_on_error = 0;
625 what = "connect";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200626 err = sock->ops->connect(sock, (struct sockaddr *) &peer_in6, peer_addr_len, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700627
628out:
629 if (err < 0) {
630 if (sock) {
631 sock_release(sock);
632 sock = NULL;
633 }
634 switch (-err) {
635 /* timeout, busy, signal pending */
636 case ETIMEDOUT: case EAGAIN: case EINPROGRESS:
637 case EINTR: case ERESTARTSYS:
638 /* peer not (yet) available, network problem */
639 case ECONNREFUSED: case ENETUNREACH:
640 case EHOSTDOWN: case EHOSTUNREACH:
641 disconnect_on_error = 0;
642 break;
643 default:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200644 drbd_err(connection, "%s failed, err = %d\n", what, err);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700645 }
646 if (disconnect_on_error)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200647 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700648 }
Philipp Reisner44ed1672011-04-19 17:10:19 +0200649
Philipp Reisnerb411b362009-09-25 16:07:19 -0700650 return sock;
651}
652
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200653struct accept_wait_data {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200654 struct drbd_connection *connection;
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200655 struct socket *s_listen;
656 struct completion door_bell;
657 void (*original_sk_state_change)(struct sock *sk);
658
659};
660
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200661static void drbd_incoming_connection(struct sock *sk)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700662{
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200663 struct accept_wait_data *ad = sk->sk_user_data;
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200664 void (*state_change)(struct sock *sk);
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200665
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200666 state_change = ad->original_sk_state_change;
667 if (sk->sk_state == TCP_ESTABLISHED)
668 complete(&ad->door_bell);
669 state_change(sk);
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200670}
671
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200672static int prepare_listen_socket(struct drbd_connection *connection, struct accept_wait_data *ad)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700673{
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200674 int err, sndbuf_size, rcvbuf_size, my_addr_len;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200675 struct sockaddr_in6 my_addr;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200676 struct socket *s_listen;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200677 struct net_conf *nc;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700678 const char *what;
679
Philipp Reisner44ed1672011-04-19 17:10:19 +0200680 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200681 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200682 if (!nc) {
683 rcu_read_unlock();
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200684 return -EIO;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200685 }
Philipp Reisner44ed1672011-04-19 17:10:19 +0200686 sndbuf_size = nc->sndbuf_size;
687 rcvbuf_size = nc->rcvbuf_size;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200688 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700689
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200690 my_addr_len = min_t(int, connection->my_addr_len, sizeof(struct sockaddr_in6));
691 memcpy(&my_addr, &connection->my_addr, my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700692
693 what = "sock_create_kern";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200694 err = sock_create_kern(((struct sockaddr *)&my_addr)->sa_family,
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200695 SOCK_STREAM, IPPROTO_TCP, &s_listen);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700696 if (err) {
697 s_listen = NULL;
698 goto out;
699 }
700
Philipp Reisner98683652012-11-09 14:18:43 +0100701 s_listen->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
Philipp Reisner44ed1672011-04-19 17:10:19 +0200702 drbd_setbufsize(s_listen, sndbuf_size, rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700703
704 what = "bind before listen";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200705 err = s_listen->ops->bind(s_listen, (struct sockaddr *)&my_addr, my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700706 if (err < 0)
707 goto out;
708
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200709 ad->s_listen = s_listen;
710 write_lock_bh(&s_listen->sk->sk_callback_lock);
711 ad->original_sk_state_change = s_listen->sk->sk_state_change;
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200712 s_listen->sk->sk_state_change = drbd_incoming_connection;
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200713 s_listen->sk->sk_user_data = ad;
714 write_unlock_bh(&s_listen->sk->sk_callback_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700715
Philipp Reisner2820fd32012-07-12 10:22:48 +0200716 what = "listen";
717 err = s_listen->ops->listen(s_listen, 5);
718 if (err < 0)
719 goto out;
720
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200721 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700722out:
723 if (s_listen)
724 sock_release(s_listen);
725 if (err < 0) {
726 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200727 drbd_err(connection, "%s failed, err = %d\n", what, err);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200728 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700729 }
730 }
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200731
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200732 return -EIO;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200733}
734
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200735static void unregister_state_change(struct sock *sk, struct accept_wait_data *ad)
736{
737 write_lock_bh(&sk->sk_callback_lock);
738 sk->sk_state_change = ad->original_sk_state_change;
739 sk->sk_user_data = NULL;
740 write_unlock_bh(&sk->sk_callback_lock);
741}
742
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200743static struct socket *drbd_wait_for_connect(struct drbd_connection *connection, struct accept_wait_data *ad)
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200744{
745 int timeo, connect_int, err = 0;
746 struct socket *s_estab = NULL;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200747 struct net_conf *nc;
748
749 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200750 nc = rcu_dereference(connection->net_conf);
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200751 if (!nc) {
752 rcu_read_unlock();
753 return NULL;
754 }
755 connect_int = nc->connect_int;
756 rcu_read_unlock();
757
758 timeo = connect_int * HZ;
Akinobu Mita38b682b22013-04-29 16:21:31 -0700759 /* 28.5% random jitter */
760 timeo += (prandom_u32() & 1) ? timeo / 7 : -timeo / 7;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200761
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200762 err = wait_for_completion_interruptible_timeout(&ad->door_bell, timeo);
763 if (err <= 0)
764 return NULL;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200765
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200766 err = kernel_accept(ad->s_listen, &s_estab, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700767 if (err < 0) {
768 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200769 drbd_err(connection, "accept failed, err = %d\n", err);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200770 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700771 }
772 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700773
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200774 if (s_estab)
775 unregister_state_change(s_estab->sk, ad);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700776
777 return s_estab;
778}
779
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200780static int decode_header(struct drbd_connection *, void *, struct packet_info *);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700781
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200782static int send_first_packet(struct drbd_connection *connection, struct drbd_socket *sock,
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200783 enum drbd_packet cmd)
784{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200785 if (!conn_prepare_command(connection, sock))
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200786 return -EIO;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200787 return conn_send_command(connection, sock, cmd, 0, NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700788}
789
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200790static int receive_first_packet(struct drbd_connection *connection, struct socket *sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700791{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200792 unsigned int header_size = drbd_header_size(connection);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200793 struct packet_info pi;
Philipp Reisner4920e372014-03-18 14:40:13 +0100794 struct net_conf *nc;
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200795 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700796
Philipp Reisner4920e372014-03-18 14:40:13 +0100797 rcu_read_lock();
798 nc = rcu_dereference(connection->net_conf);
799 if (!nc) {
800 rcu_read_unlock();
801 return -EIO;
802 }
803 sock->sk->sk_rcvtimeo = nc->ping_timeo * 4 * HZ / 10;
804 rcu_read_unlock();
805
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200806 err = drbd_recv_short(sock, connection->data.rbuf, header_size, 0);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200807 if (err != header_size) {
808 if (err >= 0)
809 err = -EIO;
810 return err;
811 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200812 err = decode_header(connection, connection->data.rbuf, &pi);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200813 if (err)
814 return err;
815 return pi.cmd;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700816}
817
818/**
819 * drbd_socket_okay() - Free the socket if its connection is not okay
Philipp Reisnerb411b362009-09-25 16:07:19 -0700820 * @sock: pointer to the pointer to the socket.
821 */
Philipp Reisner5d0b17f2014-03-18 14:24:35 +0100822static bool drbd_socket_okay(struct socket **sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700823{
824 int rr;
825 char tb[4];
826
827 if (!*sock)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100828 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700829
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100830 rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700831
832 if (rr > 0 || rr == -EAGAIN) {
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100833 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700834 } else {
835 sock_release(*sock);
836 *sock = NULL;
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100837 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700838 }
839}
Philipp Reisner5d0b17f2014-03-18 14:24:35 +0100840
841static bool connection_established(struct drbd_connection *connection,
842 struct socket **sock1,
843 struct socket **sock2)
844{
845 struct net_conf *nc;
846 int timeout;
847 bool ok;
848
849 if (!*sock1 || !*sock2)
850 return false;
851
852 rcu_read_lock();
853 nc = rcu_dereference(connection->net_conf);
854 timeout = (nc->sock_check_timeo ?: nc->ping_timeo) * HZ / 10;
855 rcu_read_unlock();
856 schedule_timeout_interruptible(timeout);
857
858 ok = drbd_socket_okay(sock1);
859 ok = drbd_socket_okay(sock2) && ok;
860
861 return ok;
862}
863
Philipp Reisner2325eb62011-03-15 16:56:18 +0100864/* Gets called if a connection is established, or if a new minor gets created
865 in a connection */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200866int drbd_connected(struct drbd_peer_device *peer_device)
Philipp Reisner907599e2011-02-08 11:25:37 +0100867{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200868 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100869 int err;
Philipp Reisner907599e2011-02-08 11:25:37 +0100870
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200871 atomic_set(&device->packet_seq, 0);
872 device->peer_seq = 0;
Philipp Reisner907599e2011-02-08 11:25:37 +0100873
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200874 device->state_mutex = peer_device->connection->agreed_pro_version < 100 ?
875 &peer_device->connection->cstate_mutex :
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200876 &device->own_state_mutex;
Philipp Reisner8410da82011-02-11 20:11:10 +0100877
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200878 err = drbd_send_sync_param(peer_device);
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100879 if (!err)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200880 err = drbd_send_sizes(peer_device, 0, 0);
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100881 if (!err)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200882 err = drbd_send_uuids(peer_device);
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100883 if (!err)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200884 err = drbd_send_current_state(peer_device);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200885 clear_bit(USE_DEGR_WFC_T, &device->flags);
886 clear_bit(RESIZE_PENDING, &device->flags);
887 atomic_set(&device->ap_in_flight, 0);
888 mod_timer(&device->request_timer, jiffies + HZ); /* just start it here. */
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100889 return err;
Philipp Reisner907599e2011-02-08 11:25:37 +0100890}
Philipp Reisnerb411b362009-09-25 16:07:19 -0700891
892/*
893 * return values:
894 * 1 yes, we have a valid connection
895 * 0 oops, did not work out, please try again
896 * -1 peer talks different language,
897 * no point in trying again, please go standalone.
898 * -2 We do not have a network config...
899 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200900static int conn_connect(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700901{
Philipp Reisner7da35862011-12-19 22:42:56 +0100902 struct drbd_socket sock, msock;
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +0200903 struct drbd_peer_device *peer_device;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200904 struct net_conf *nc;
Philipp Reisner5d0b17f2014-03-18 14:24:35 +0100905 int vnr, timeout, h;
906 bool discard_my_data, ok;
Philipp Reisner197296f2012-03-26 16:47:11 +0200907 enum drbd_state_rv rv;
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200908 struct accept_wait_data ad = {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200909 .connection = connection,
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200910 .door_bell = COMPLETION_INITIALIZER_ONSTACK(ad.door_bell),
911 };
Philipp Reisnerb411b362009-09-25 16:07:19 -0700912
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200913 clear_bit(DISCONNECT_SENT, &connection->flags);
914 if (conn_request_state(connection, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700915 return -2;
916
Philipp Reisner7da35862011-12-19 22:42:56 +0100917 mutex_init(&sock.mutex);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200918 sock.sbuf = connection->data.sbuf;
919 sock.rbuf = connection->data.rbuf;
Philipp Reisner7da35862011-12-19 22:42:56 +0100920 sock.socket = NULL;
921 mutex_init(&msock.mutex);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200922 msock.sbuf = connection->meta.sbuf;
923 msock.rbuf = connection->meta.rbuf;
Philipp Reisner7da35862011-12-19 22:42:56 +0100924 msock.socket = NULL;
925
Andreas Gruenbacher0916e0e2011-03-21 14:10:15 +0100926 /* Assume that the peer only understands protocol 80 until we know better. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200927 connection->agreed_pro_version = 80;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700928
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200929 if (prepare_listen_socket(connection, &ad))
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200930 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700931
932 do {
Andreas Gruenbacher2bf89622011-03-28 16:33:12 +0200933 struct socket *s;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700934
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200935 s = drbd_try_connect(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700936 if (s) {
Philipp Reisner7da35862011-12-19 22:42:56 +0100937 if (!sock.socket) {
938 sock.socket = s;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200939 send_first_packet(connection, &sock, P_INITIAL_DATA);
Philipp Reisner7da35862011-12-19 22:42:56 +0100940 } else if (!msock.socket) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200941 clear_bit(RESOLVE_CONFLICTS, &connection->flags);
Philipp Reisner7da35862011-12-19 22:42:56 +0100942 msock.socket = s;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200943 send_first_packet(connection, &msock, P_INITIAL_META);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700944 } else {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200945 drbd_err(connection, "Logic error in conn_connect()\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700946 goto out_release_sockets;
947 }
948 }
949
Philipp Reisner5d0b17f2014-03-18 14:24:35 +0100950 if (connection_established(connection, &sock.socket, &msock.socket))
951 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700952
953retry:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200954 s = drbd_wait_for_connect(connection, &ad);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700955 if (s) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200956 int fp = receive_first_packet(connection, s);
Philipp Reisner7da35862011-12-19 22:42:56 +0100957 drbd_socket_okay(&sock.socket);
958 drbd_socket_okay(&msock.socket);
Philipp Reisner92f14952012-08-01 11:41:01 +0200959 switch (fp) {
Andreas Gruenbachere5d6f332011-03-28 16:44:40 +0200960 case P_INITIAL_DATA:
Philipp Reisner7da35862011-12-19 22:42:56 +0100961 if (sock.socket) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200962 drbd_warn(connection, "initial packet S crossed\n");
Philipp Reisner7da35862011-12-19 22:42:56 +0100963 sock_release(sock.socket);
Philipp Reisner80c6eed2012-08-01 14:53:39 +0200964 sock.socket = s;
965 goto randomize;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700966 }
Philipp Reisner7da35862011-12-19 22:42:56 +0100967 sock.socket = s;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700968 break;
Andreas Gruenbachere5d6f332011-03-28 16:44:40 +0200969 case P_INITIAL_META:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200970 set_bit(RESOLVE_CONFLICTS, &connection->flags);
Philipp Reisner7da35862011-12-19 22:42:56 +0100971 if (msock.socket) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200972 drbd_warn(connection, "initial packet M crossed\n");
Philipp Reisner7da35862011-12-19 22:42:56 +0100973 sock_release(msock.socket);
Philipp Reisner80c6eed2012-08-01 14:53:39 +0200974 msock.socket = s;
975 goto randomize;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700976 }
Philipp Reisner7da35862011-12-19 22:42:56 +0100977 msock.socket = s;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700978 break;
979 default:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200980 drbd_warn(connection, "Error receiving initial packet\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700981 sock_release(s);
Philipp Reisner80c6eed2012-08-01 14:53:39 +0200982randomize:
Akinobu Mita38b682b22013-04-29 16:21:31 -0700983 if (prandom_u32() & 1)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700984 goto retry;
985 }
986 }
987
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200988 if (connection->cstate <= C_DISCONNECTING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700989 goto out_release_sockets;
990 if (signal_pending(current)) {
991 flush_signals(current);
992 smp_rmb();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200993 if (get_t_state(&connection->receiver) == EXITING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700994 goto out_release_sockets;
995 }
996
Philipp Reisner5d0b17f2014-03-18 14:24:35 +0100997 ok = connection_established(connection, &sock.socket, &msock.socket);
Philipp Reisnerb666dbf2012-07-26 14:12:59 +0200998 } while (!ok);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700999
Philipp Reisner7a426fd2012-07-12 14:22:37 +02001000 if (ad.s_listen)
1001 sock_release(ad.s_listen);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001002
Philipp Reisner98683652012-11-09 14:18:43 +01001003 sock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
1004 msock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001005
Philipp Reisner7da35862011-12-19 22:42:56 +01001006 sock.socket->sk->sk_allocation = GFP_NOIO;
1007 msock.socket->sk->sk_allocation = GFP_NOIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001008
Philipp Reisner7da35862011-12-19 22:42:56 +01001009 sock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
1010 msock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001011
Philipp Reisnerb411b362009-09-25 16:07:19 -07001012 /* NOT YET ...
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001013 * sock.socket->sk->sk_sndtimeo = connection->net_conf->timeout*HZ/10;
Philipp Reisner7da35862011-12-19 22:42:56 +01001014 * sock.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
Andreas Gruenbacher60381782011-03-28 17:05:50 +02001015 * first set it to the P_CONNECTION_FEATURES timeout,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001016 * which we set to 4x the configured ping_timeout. */
Philipp Reisner44ed1672011-04-19 17:10:19 +02001017 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001018 nc = rcu_dereference(connection->net_conf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001019
Philipp Reisner7da35862011-12-19 22:42:56 +01001020 sock.socket->sk->sk_sndtimeo =
1021 sock.socket->sk->sk_rcvtimeo = nc->ping_timeo*4*HZ/10;
Philipp Reisner44ed1672011-04-19 17:10:19 +02001022
Philipp Reisner7da35862011-12-19 22:42:56 +01001023 msock.socket->sk->sk_rcvtimeo = nc->ping_int*HZ;
Philipp Reisner44ed1672011-04-19 17:10:19 +02001024 timeout = nc->timeout * HZ / 10;
Philipp Reisner08b165b2011-09-05 16:22:33 +02001025 discard_my_data = nc->discard_my_data;
Philipp Reisner44ed1672011-04-19 17:10:19 +02001026 rcu_read_unlock();
1027
Philipp Reisner7da35862011-12-19 22:42:56 +01001028 msock.socket->sk->sk_sndtimeo = timeout;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001029
1030 /* we don't want delays.
Lucas De Marchi25985ed2011-03-30 22:57:33 -03001031 * we use TCP_CORK where appropriate, though */
Philipp Reisner7da35862011-12-19 22:42:56 +01001032 drbd_tcp_nodelay(sock.socket);
1033 drbd_tcp_nodelay(msock.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001034
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001035 connection->data.socket = sock.socket;
1036 connection->meta.socket = msock.socket;
1037 connection->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001038
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001039 h = drbd_do_features(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001040 if (h <= 0)
1041 return h;
1042
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001043 if (connection->cram_hmac_tfm) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001044 /* drbd_request_state(device, NS(conn, WFAuth)); */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001045 switch (drbd_do_auth(connection)) {
Johannes Thomab10d96c2010-01-07 16:02:50 +01001046 case -1:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001047 drbd_err(connection, "Authentication of peer failed\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001048 return -1;
Johannes Thomab10d96c2010-01-07 16:02:50 +01001049 case 0:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001050 drbd_err(connection, "Authentication of peer failed, trying again.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01001051 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001052 }
1053 }
1054
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001055 connection->data.socket->sk->sk_sndtimeo = timeout;
1056 connection->data.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001057
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001058 if (drbd_send_protocol(connection) == -EOPNOTSUPP)
Philipp Reisner7e2455c2010-04-22 14:50:23 +02001059 return -1;
Philipp Reisner1e86ac42011-08-04 10:33:08 +02001060
Philipp Reisner31007742014-04-28 18:43:12 +02001061 /* Prevent a race between resync-handshake and
1062 * being promoted to Primary.
1063 *
1064 * Grab and release the state mutex, so we know that any current
1065 * drbd_set_role() is finished, and any incoming drbd_set_role
1066 * will see the STATE_SENT flag, and wait for it to be cleared.
1067 */
1068 idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
1069 mutex_lock(peer_device->device->state_mutex);
1070
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001071 set_bit(STATE_SENT, &connection->flags);
Philipp Reisner197296f2012-03-26 16:47:11 +02001072
Philipp Reisner31007742014-04-28 18:43:12 +02001073 idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
1074 mutex_unlock(peer_device->device->state_mutex);
1075
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02001076 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001077 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1078 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001079 kref_get(&device->kref);
Andreas Gruenbacher26ea8f92013-06-25 16:50:03 +02001080 rcu_read_unlock();
1081
Philipp Reisner08b165b2011-09-05 16:22:33 +02001082 if (discard_my_data)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001083 set_bit(DISCARD_MY_DATA, &device->flags);
Philipp Reisner08b165b2011-09-05 16:22:33 +02001084 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001085 clear_bit(DISCARD_MY_DATA, &device->flags);
Philipp Reisner08b165b2011-09-05 16:22:33 +02001086
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001087 drbd_connected(peer_device);
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001088 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02001089 rcu_read_lock();
1090 }
1091 rcu_read_unlock();
1092
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001093 rv = conn_request_state(connection, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE);
1094 if (rv < SS_SUCCESS || connection->cstate != C_WF_REPORT_PARAMS) {
1095 clear_bit(STATE_SENT, &connection->flags);
Philipp Reisner1e86ac42011-08-04 10:33:08 +02001096 return 0;
Philipp Reisnera1096a62012-04-06 12:07:34 +02001097 }
Philipp Reisner1e86ac42011-08-04 10:33:08 +02001098
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001099 drbd_thread_start(&connection->asender);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001100
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001101 mutex_lock(&connection->resource->conf_update);
Philipp Reisner08b165b2011-09-05 16:22:33 +02001102 /* The discard_my_data flag is a single-shot modifier to the next
1103 * connection attempt, the handshake of which is now well underway.
1104 * No need for rcu style copying of the whole struct
1105 * just to clear a single value. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001106 connection->net_conf->discard_my_data = 0;
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001107 mutex_unlock(&connection->resource->conf_update);
Philipp Reisner08b165b2011-09-05 16:22:33 +02001108
Philipp Reisnerd3fcb492011-04-13 14:46:05 -07001109 return h;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001110
1111out_release_sockets:
Philipp Reisner7a426fd2012-07-12 14:22:37 +02001112 if (ad.s_listen)
1113 sock_release(ad.s_listen);
Philipp Reisner7da35862011-12-19 22:42:56 +01001114 if (sock.socket)
1115 sock_release(sock.socket);
1116 if (msock.socket)
1117 sock_release(msock.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001118 return -1;
1119}
1120
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001121static int decode_header(struct drbd_connection *connection, void *header, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001122{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001123 unsigned int header_size = drbd_header_size(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001124
Andreas Gruenbacher0c8e36d2011-03-30 16:00:17 +02001125 if (header_size == sizeof(struct p_header100) &&
1126 *(__be32 *)header == cpu_to_be32(DRBD_MAGIC_100)) {
1127 struct p_header100 *h = header;
1128 if (h->pad != 0) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001129 drbd_err(connection, "Header padding is not zero\n");
Andreas Gruenbacher0c8e36d2011-03-30 16:00:17 +02001130 return -EINVAL;
1131 }
1132 pi->vnr = be16_to_cpu(h->volume);
1133 pi->cmd = be16_to_cpu(h->command);
1134 pi->size = be32_to_cpu(h->length);
1135 } else if (header_size == sizeof(struct p_header95) &&
1136 *(__be16 *)header == cpu_to_be16(DRBD_MAGIC_BIG)) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001137 struct p_header95 *h = header;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001138 pi->cmd = be16_to_cpu(h->command);
Andreas Gruenbacherb55d84b2011-03-22 13:17:47 +01001139 pi->size = be32_to_cpu(h->length);
1140 pi->vnr = 0;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001141 } else if (header_size == sizeof(struct p_header80) &&
1142 *(__be32 *)header == cpu_to_be32(DRBD_MAGIC)) {
1143 struct p_header80 *h = header;
1144 pi->cmd = be16_to_cpu(h->command);
1145 pi->size = be16_to_cpu(h->length);
Philipp Reisner77351055b2011-02-07 17:24:26 +01001146 pi->vnr = 0;
Philipp Reisner02918be2010-08-20 14:35:10 +02001147 } else {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001148 drbd_err(connection, "Wrong magic value 0x%08x in protocol version %d\n",
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001149 be32_to_cpu(*(__be32 *)header),
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001150 connection->agreed_pro_version);
Andreas Gruenbacher8172f3e2011-03-16 17:22:39 +01001151 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001152 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001153 pi->data = header + header_size;
Andreas Gruenbacher8172f3e2011-03-16 17:22:39 +01001154 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001155}
1156
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001157static int drbd_recv_header(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisner257d0af2011-01-26 12:15:29 +01001158{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001159 void *buffer = connection->data.rbuf;
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001160 int err;
Philipp Reisner257d0af2011-01-26 12:15:29 +01001161
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001162 err = drbd_recv_all_warn(connection, buffer, drbd_header_size(connection));
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001163 if (err)
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001164 return err;
Philipp Reisner257d0af2011-01-26 12:15:29 +01001165
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001166 err = decode_header(connection, buffer, pi);
1167 connection->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001168
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001169 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001170}
1171
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001172static void drbd_flush(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001173{
1174 int rv;
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001175 struct drbd_peer_device *peer_device;
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001176 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001177
Philipp Reisnere9526582013-11-22 15:53:41 +01001178 if (connection->resource->write_ordering >= WO_bdev_flush) {
Lars Ellenberg615e0872011-11-17 14:32:12 +01001179 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001180 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1181 struct drbd_device *device = peer_device->device;
1182
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001183 if (!get_ldev(device))
Lars Ellenberg615e0872011-11-17 14:32:12 +01001184 continue;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001185 kref_get(&device->kref);
Lars Ellenberg615e0872011-11-17 14:32:12 +01001186 rcu_read_unlock();
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001187
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001188 rv = blkdev_issue_flush(device->ldev->backing_bdev,
Lars Ellenberg615e0872011-11-17 14:32:12 +01001189 GFP_NOIO, NULL);
1190 if (rv) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001191 drbd_info(device, "local disk flush failed with status %d\n", rv);
Lars Ellenberg615e0872011-11-17 14:32:12 +01001192 /* would rather check on EOPNOTSUPP, but that is not reliable.
1193 * don't try again for ANY return value != 0
1194 * if (rv == -EOPNOTSUPP) */
Philipp Reisner8fe39aa2013-11-22 13:22:13 +01001195 drbd_bump_write_ordering(connection->resource, NULL, WO_drain_io);
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001196 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001197 put_ldev(device);
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001198 kref_put(&device->kref, drbd_destroy_device);
Lars Ellenberg615e0872011-11-17 14:32:12 +01001199
1200 rcu_read_lock();
1201 if (rv)
1202 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001203 }
Lars Ellenberg615e0872011-11-17 14:32:12 +01001204 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07001205 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001206}
1207
1208/**
1209 * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it.
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001210 * @device: DRBD device.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001211 * @epoch: Epoch object.
1212 * @ev: Epoch event.
1213 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001214static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *connection,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001215 struct drbd_epoch *epoch,
1216 enum epoch_event ev)
1217{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001218 int epoch_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001219 struct drbd_epoch *next_epoch;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001220 enum finish_epoch rv = FE_STILL_LIVE;
1221
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001222 spin_lock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001223 do {
1224 next_epoch = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001225
1226 epoch_size = atomic_read(&epoch->epoch_size);
1227
1228 switch (ev & ~EV_CLEANUP) {
1229 case EV_PUT:
1230 atomic_dec(&epoch->active);
1231 break;
1232 case EV_GOT_BARRIER_NR:
1233 set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001234 break;
1235 case EV_BECAME_LAST:
1236 /* nothing to do*/
1237 break;
1238 }
1239
Philipp Reisnerb411b362009-09-25 16:07:19 -07001240 if (epoch_size != 0 &&
1241 atomic_read(&epoch->active) == 0 &&
Philipp Reisner80f9fd52011-07-18 15:45:15 +02001242 (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) || ev & EV_CLEANUP)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001243 if (!(ev & EV_CLEANUP)) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001244 spin_unlock(&connection->epoch_lock);
1245 drbd_send_b_ack(epoch->connection, epoch->barrier_nr, epoch_size);
1246 spin_lock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001247 }
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001248#if 0
1249 /* FIXME: dec unacked on connection, once we have
1250 * something to count pending connection packets in. */
Philipp Reisner80f9fd52011-07-18 15:45:15 +02001251 if (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags))
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001252 dec_unacked(epoch->connection);
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001253#endif
Philipp Reisnerb411b362009-09-25 16:07:19 -07001254
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001255 if (connection->current_epoch != epoch) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001256 next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
1257 list_del(&epoch->list);
1258 ev = EV_BECAME_LAST | (ev & EV_CLEANUP);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001259 connection->epochs--;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001260 kfree(epoch);
1261
1262 if (rv == FE_STILL_LIVE)
1263 rv = FE_DESTROYED;
1264 } else {
1265 epoch->flags = 0;
1266 atomic_set(&epoch->epoch_size, 0);
Uwe Kleine-König698f9312010-07-02 20:41:51 +02001267 /* atomic_set(&epoch->active, 0); is already zero */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001268 if (rv == FE_STILL_LIVE)
1269 rv = FE_RECYCLED;
1270 }
1271 }
1272
1273 if (!next_epoch)
1274 break;
1275
1276 epoch = next_epoch;
1277 } while (1);
1278
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001279 spin_unlock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001280
Philipp Reisnerb411b362009-09-25 16:07:19 -07001281 return rv;
1282}
1283
Philipp Reisner8fe39aa2013-11-22 13:22:13 +01001284static enum write_ordering_e
1285max_allowed_wo(struct drbd_backing_dev *bdev, enum write_ordering_e wo)
1286{
1287 struct disk_conf *dc;
1288
1289 dc = rcu_dereference(bdev->disk_conf);
1290
1291 if (wo == WO_bdev_flush && !dc->disk_flushes)
1292 wo = WO_drain_io;
1293 if (wo == WO_drain_io && !dc->disk_drain)
1294 wo = WO_none;
1295
1296 return wo;
1297}
1298
Philipp Reisnerb411b362009-09-25 16:07:19 -07001299/**
1300 * drbd_bump_write_ordering() - Fall back to an other write ordering method
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001301 * @connection: DRBD connection.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001302 * @wo: Write ordering method to try.
1303 */
Philipp Reisner8fe39aa2013-11-22 13:22:13 +01001304void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backing_dev *bdev,
1305 enum write_ordering_e wo)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001306{
Philipp Reisnere9526582013-11-22 15:53:41 +01001307 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001308 enum write_ordering_e pwo;
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001309 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001310 static char *write_ordering_str[] = {
1311 [WO_none] = "none",
1312 [WO_drain_io] = "drain",
1313 [WO_bdev_flush] = "flush",
Philipp Reisnerb411b362009-09-25 16:07:19 -07001314 };
1315
Philipp Reisnere9526582013-11-22 15:53:41 +01001316 pwo = resource->write_ordering;
Lars Ellenberg70df7092013-12-20 11:17:02 +01001317 if (wo != WO_bdev_flush)
1318 wo = min(pwo, wo);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001319 rcu_read_lock();
Philipp Reisnere9526582013-11-22 15:53:41 +01001320 idr_for_each_entry(&resource->devices, device, vnr) {
Philipp Reisner8fe39aa2013-11-22 13:22:13 +01001321 if (get_ldev(device)) {
1322 wo = max_allowed_wo(device->ldev, wo);
1323 if (device->ldev == bdev)
1324 bdev = NULL;
1325 put_ldev(device);
1326 }
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001327 }
Philipp Reisner8fe39aa2013-11-22 13:22:13 +01001328
1329 if (bdev)
1330 wo = max_allowed_wo(bdev, wo);
1331
Lars Ellenberg70df7092013-12-20 11:17:02 +01001332 rcu_read_unlock();
1333
Philipp Reisnere9526582013-11-22 15:53:41 +01001334 resource->write_ordering = wo;
1335 if (pwo != resource->write_ordering || wo == WO_bdev_flush)
1336 drbd_info(resource, "Method to ensure write ordering: %s\n", write_ordering_str[resource->write_ordering]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001337}
1338
1339/**
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001340 * drbd_submit_peer_request()
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001341 * @device: DRBD device.
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001342 * @peer_req: peer request
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001343 * @rw: flag field, see bio->bi_rw
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001344 *
1345 * May spread the pages to multiple bios,
1346 * depending on bio_add_page restrictions.
1347 *
1348 * Returns 0 if all bios have been submitted,
1349 * -ENOMEM if we could not allocate enough bios,
1350 * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a
1351 * single page to an empty bio (which should never happen and likely indicates
1352 * that the lower level IO stack is in some way broken). This has been observed
1353 * on certain Xen deployments.
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001354 */
1355/* TODO allocate from our own bio_set. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001356int drbd_submit_peer_request(struct drbd_device *device,
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001357 struct drbd_peer_request *peer_req,
1358 const unsigned rw, const int fault_type)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001359{
1360 struct bio *bios = NULL;
1361 struct bio *bio;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001362 struct page *page = peer_req->pages;
1363 sector_t sector = peer_req->i.sector;
1364 unsigned ds = peer_req->i.size;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001365 unsigned n_bios = 0;
1366 unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT;
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001367 int err = -ENOMEM;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001368
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001369 if (peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) {
1370 /* wait for all pending IO completions, before we start
1371 * zeroing things out. */
1372 conn_wait_active_ee_empty(first_peer_device(device)->connection);
1373 if (blkdev_issue_zeroout(device->ldev->backing_bdev,
1374 sector, ds >> 9, GFP_NOIO))
1375 peer_req->flags |= EE_WAS_ERROR;
1376 drbd_endio_write_sec_final(peer_req);
1377 return 0;
1378 }
1379
Lars Ellenberg54ed4ed2014-06-25 17:52:38 +02001380 /* Discards don't have any payload.
1381 * But the scsi layer still expects a bio_vec it can use internally,
1382 * see sd_setup_discard_cmnd() and blk_add_request_payload(). */
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001383 if (peer_req->flags & EE_IS_TRIM)
Lars Ellenberg54ed4ed2014-06-25 17:52:38 +02001384 nr_pages = 1;
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001385
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001386 /* In most cases, we will only need one bio. But in case the lower
1387 * level restrictions happen to be different at this offset on this
1388 * side than those of the sending peer, we may need to submit the
Lars Ellenberg9476f392011-02-23 17:02:01 +01001389 * request in more than one bio.
1390 *
1391 * Plain bio_alloc is good enough here, this is no DRBD internally
1392 * generated bio, but a bio allocated on behalf of the peer.
1393 */
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001394next_bio:
1395 bio = bio_alloc(GFP_NOIO, nr_pages);
1396 if (!bio) {
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001397 drbd_err(device, "submit_ee: Allocation of a bio failed (nr_pages=%u)\n", nr_pages);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001398 goto fail;
1399 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001400 /* > peer_req->i.sector, unless this is the first bio */
Kent Overstreet4f024f32013-10-11 15:44:27 -07001401 bio->bi_iter.bi_sector = sector;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001402 bio->bi_bdev = device->ldev->backing_bdev;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001403 bio->bi_rw = rw;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001404 bio->bi_private = peer_req;
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001405 bio->bi_end_io = drbd_peer_request_endio;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001406
1407 bio->bi_next = bios;
1408 bios = bio;
1409 ++n_bios;
1410
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001411 if (rw & REQ_DISCARD) {
1412 bio->bi_iter.bi_size = ds;
1413 goto submit;
1414 }
1415
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001416 page_chain_for_each(page) {
1417 unsigned len = min_t(unsigned, ds, PAGE_SIZE);
1418 if (!bio_add_page(bio, page, len, 0)) {
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001419 /* A single page must always be possible!
1420 * But in case it fails anyways,
1421 * we deal with it, and complain (below). */
1422 if (bio->bi_vcnt == 0) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001423 drbd_err(device,
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001424 "bio_add_page failed for len=%u, "
1425 "bi_vcnt=0 (bi_sector=%llu)\n",
Kent Overstreet4f024f32013-10-11 15:44:27 -07001426 len, (uint64_t)bio->bi_iter.bi_sector);
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001427 err = -ENOSPC;
1428 goto fail;
1429 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001430 goto next_bio;
1431 }
1432 ds -= len;
1433 sector += len >> 9;
1434 --nr_pages;
1435 }
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001436 D_ASSERT(device, ds == 0);
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001437submit:
1438 D_ASSERT(device, page == NULL);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001439
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001440 atomic_set(&peer_req->pending_bios, n_bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001441 do {
1442 bio = bios;
1443 bios = bios->bi_next;
1444 bio->bi_next = NULL;
1445
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001446 drbd_generic_make_request(device, fault_type, bio);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001447 } while (bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001448 return 0;
1449
1450fail:
1451 while (bios) {
1452 bio = bios;
1453 bios = bios->bi_next;
1454 bio_put(bio);
1455 }
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001456 return err;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001457}
1458
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001459static void drbd_remove_epoch_entry_interval(struct drbd_device *device,
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001460 struct drbd_peer_request *peer_req)
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001461{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001462 struct drbd_interval *i = &peer_req->i;
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001463
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001464 drbd_remove_interval(&device->write_requests, i);
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001465 drbd_clear_interval(i);
1466
Andreas Gruenbacher6c852be2011-02-04 15:38:52 +01001467 /* Wake up any processes waiting for this peer request to complete. */
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001468 if (i->waiting)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001469 wake_up(&device->misc_wait);
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001470}
1471
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001472static void conn_wait_active_ee_empty(struct drbd_connection *connection)
Philipp Reisner77fede52011-11-10 21:19:11 +01001473{
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001474 struct drbd_peer_device *peer_device;
Philipp Reisner77fede52011-11-10 21:19:11 +01001475 int vnr;
1476
1477 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001478 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1479 struct drbd_device *device = peer_device->device;
1480
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001481 kref_get(&device->kref);
Philipp Reisner77fede52011-11-10 21:19:11 +01001482 rcu_read_unlock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001483 drbd_wait_ee_list_empty(device, &device->active_ee);
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001484 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisner77fede52011-11-10 21:19:11 +01001485 rcu_read_lock();
1486 }
1487 rcu_read_unlock();
1488}
1489
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001490static struct drbd_peer_device *
1491conn_peer_device(struct drbd_connection *connection, int volume_number)
1492{
1493 return idr_find(&connection->peer_devices, volume_number);
1494}
1495
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001496static int receive_Barrier(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001497{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001498 int rv;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001499 struct p_barrier *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001500 struct drbd_epoch *epoch;
1501
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001502 /* FIXME these are unacked on connection,
1503 * not a specific (peer)device.
1504 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001505 connection->current_epoch->barrier_nr = p->barrier;
1506 connection->current_epoch->connection = connection;
1507 rv = drbd_may_finish_epoch(connection, connection->current_epoch, EV_GOT_BARRIER_NR);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001508
1509 /* P_BARRIER_ACK may imply that the corresponding extent is dropped from
1510 * the activity log, which means it would not be resynced in case the
1511 * R_PRIMARY crashes now.
1512 * Therefore we must send the barrier_ack after the barrier request was
1513 * completed. */
Philipp Reisnere9526582013-11-22 15:53:41 +01001514 switch (connection->resource->write_ordering) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001515 case WO_none:
1516 if (rv == FE_RECYCLED)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001517 return 0;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001518
1519 /* receiver context, in the writeout path of the other node.
1520 * avoid potential distributed deadlock */
1521 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1522 if (epoch)
1523 break;
1524 else
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001525 drbd_warn(connection, "Allocation of an epoch failed, slowing down\n");
Philipp Reisner2451fc32010-08-24 13:43:11 +02001526 /* Fall through */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001527
1528 case WO_bdev_flush:
1529 case WO_drain_io:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001530 conn_wait_active_ee_empty(connection);
1531 drbd_flush(connection);
Philipp Reisner2451fc32010-08-24 13:43:11 +02001532
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001533 if (atomic_read(&connection->current_epoch->epoch_size)) {
Philipp Reisner2451fc32010-08-24 13:43:11 +02001534 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1535 if (epoch)
1536 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001537 }
1538
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001539 return 0;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001540 default:
Philipp Reisnere9526582013-11-22 15:53:41 +01001541 drbd_err(connection, "Strangeness in connection->write_ordering %d\n",
1542 connection->resource->write_ordering);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001543 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001544 }
1545
1546 epoch->flags = 0;
1547 atomic_set(&epoch->epoch_size, 0);
1548 atomic_set(&epoch->active, 0);
1549
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001550 spin_lock(&connection->epoch_lock);
1551 if (atomic_read(&connection->current_epoch->epoch_size)) {
1552 list_add(&epoch->list, &connection->current_epoch->list);
1553 connection->current_epoch = epoch;
1554 connection->epochs++;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001555 } else {
1556 /* The current_epoch got recycled while we allocated this one... */
1557 kfree(epoch);
1558 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001559 spin_unlock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001560
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001561 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001562}
1563
1564/* used from receive_RSDataReply (recv_resync_read)
1565 * and from receive_Data */
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +01001566static struct drbd_peer_request *
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001567read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001568 struct packet_info *pi) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001569{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001570 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001571 const sector_t capacity = drbd_get_capacity(device->this_bdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001572 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001573 struct page *page;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001574 int dgs, ds, err;
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001575 int data_size = pi->size;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001576 void *dig_in = peer_device->connection->int_dig_in;
1577 void *dig_vv = peer_device->connection->int_dig_vv;
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001578 unsigned long *data;
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001579 struct p_trim *trim = (pi->cmd == P_TRIM) ? pi->data : NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001580
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001581 dgs = 0;
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001582 if (!trim && peer_device->connection->peer_integrity_tfm) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001583 dgs = crypto_hash_digestsize(peer_device->connection->peer_integrity_tfm);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001584 /*
1585 * FIXME: Receive the incoming digest into the receive buffer
1586 * here, together with its struct p_data?
1587 */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001588 err = drbd_recv_all_warn(peer_device->connection, dig_in, dgs);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001589 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001590 return NULL;
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001591 data_size -= dgs;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001592 }
1593
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001594 if (trim) {
1595 D_ASSERT(peer_device, data_size == 0);
1596 data_size = be32_to_cpu(trim->size);
1597 }
1598
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01001599 if (!expect(IS_ALIGNED(data_size, 512)))
1600 return NULL;
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001601 /* prepare for larger trim requests. */
1602 if (!trim && !expect(data_size <= DRBD_MAX_BIO_SIZE))
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01001603 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001604
Lars Ellenberg66660322010-04-06 12:15:04 +02001605 /* even though we trust out peer,
1606 * we sometimes have to double check. */
1607 if (sector + (data_size>>9) > capacity) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001608 drbd_err(device, "request from peer beyond end of local disk: "
Lars Ellenbergfdda6542011-01-24 15:11:01 +01001609 "capacity: %llus < sector: %llus + size: %u\n",
Lars Ellenberg66660322010-04-06 12:15:04 +02001610 (unsigned long long)capacity,
1611 (unsigned long long)sector, data_size);
1612 return NULL;
1613 }
1614
Philipp Reisnerb411b362009-09-25 16:07:19 -07001615 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
1616 * "criss-cross" setup, that might cause write-out on some other DRBD,
1617 * which in turn might block on the other node at this very place. */
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001618 peer_req = drbd_alloc_peer_req(peer_device, id, sector, data_size, trim == NULL, GFP_NOIO);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001619 if (!peer_req)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001620 return NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001621
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001622 if (trim)
Lars Ellenberg81a35372012-07-30 09:00:54 +02001623 return peer_req;
Lars Ellenberga73ff322012-06-25 19:15:38 +02001624
Philipp Reisnerb411b362009-09-25 16:07:19 -07001625 ds = data_size;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001626 page = peer_req->pages;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001627 page_chain_for_each(page) {
1628 unsigned len = min_t(int, ds, PAGE_SIZE);
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001629 data = kmap(page);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001630 err = drbd_recv_all_warn(peer_device->connection, data, len);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001631 if (drbd_insert_fault(device, DRBD_FAULT_RECEIVE)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001632 drbd_err(device, "Fault injection: Corrupting data on receive\n");
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001633 data[0] = data[0] ^ (unsigned long)-1;
1634 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001635 kunmap(page);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001636 if (err) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001637 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001638 return NULL;
1639 }
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001640 ds -= len;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001641 }
1642
1643 if (dgs) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001644 drbd_csum_ee(peer_device->connection->peer_integrity_tfm, peer_req, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001645 if (memcmp(dig_in, dig_vv, dgs)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001646 drbd_err(device, "Digest integrity check FAILED: %llus +%u\n",
Lars Ellenberg470be442010-11-10 10:36:52 +01001647 (unsigned long long)sector, data_size);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001648 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001649 return NULL;
1650 }
1651 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001652 device->recv_cnt += data_size>>9;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001653 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001654}
1655
1656/* drbd_drain_block() just takes a data block
1657 * out of the socket input buffer, and discards it.
1658 */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001659static int drbd_drain_block(struct drbd_peer_device *peer_device, int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001660{
1661 struct page *page;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001662 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001663 void *data;
1664
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001665 if (!data_size)
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001666 return 0;
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001667
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001668 page = drbd_alloc_pages(peer_device, 1, 1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001669
1670 data = kmap(page);
1671 while (data_size) {
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001672 unsigned int len = min_t(int, data_size, PAGE_SIZE);
1673
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001674 err = drbd_recv_all_warn(peer_device->connection, data, len);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001675 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001676 break;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001677 data_size -= len;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001678 }
1679 kunmap(page);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001680 drbd_free_pages(peer_device->device, page, 0);
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001681 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001682}
1683
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001684static int recv_dless_read(struct drbd_peer_device *peer_device, struct drbd_request *req,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001685 sector_t sector, int data_size)
1686{
Kent Overstreet79886132013-11-23 17:19:00 -08001687 struct bio_vec bvec;
1688 struct bvec_iter iter;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001689 struct bio *bio;
Kent Overstreet79886132013-11-23 17:19:00 -08001690 int dgs, err, expect;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001691 void *dig_in = peer_device->connection->int_dig_in;
1692 void *dig_vv = peer_device->connection->int_dig_vv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001693
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001694 dgs = 0;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001695 if (peer_device->connection->peer_integrity_tfm) {
1696 dgs = crypto_hash_digestsize(peer_device->connection->peer_integrity_tfm);
1697 err = drbd_recv_all_warn(peer_device->connection, dig_in, dgs);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001698 if (err)
1699 return err;
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001700 data_size -= dgs;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001701 }
1702
Philipp Reisnerb411b362009-09-25 16:07:19 -07001703 /* optimistically update recv_cnt. if receiving fails below,
1704 * we disconnect anyways, and counters will be reset. */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001705 peer_device->device->recv_cnt += data_size>>9;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001706
1707 bio = req->master_bio;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001708 D_ASSERT(peer_device->device, sector == bio->bi_iter.bi_sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001709
Kent Overstreet79886132013-11-23 17:19:00 -08001710 bio_for_each_segment(bvec, bio, iter) {
1711 void *mapped = kmap(bvec.bv_page) + bvec.bv_offset;
1712 expect = min_t(int, data_size, bvec.bv_len);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001713 err = drbd_recv_all_warn(peer_device->connection, mapped, expect);
Kent Overstreet79886132013-11-23 17:19:00 -08001714 kunmap(bvec.bv_page);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001715 if (err)
1716 return err;
1717 data_size -= expect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001718 }
1719
1720 if (dgs) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001721 drbd_csum_bio(peer_device->connection->peer_integrity_tfm, bio, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001722 if (memcmp(dig_in, dig_vv, dgs)) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001723 drbd_err(peer_device, "Digest integrity check FAILED. Broken NICs?\n");
Andreas Gruenbacher28284ce2011-03-16 17:54:02 +01001724 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001725 }
1726 }
1727
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001728 D_ASSERT(peer_device->device, data_size == 0);
Andreas Gruenbacher28284ce2011-03-16 17:54:02 +01001729 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001730}
1731
Andreas Gruenbachera990be42011-04-06 17:56:48 +02001732/*
1733 * e_end_resync_block() is called in asender context via
1734 * drbd_finish_peer_reqs().
1735 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001736static int e_end_resync_block(struct drbd_work *w, int unused)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001737{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001738 struct drbd_peer_request *peer_req =
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001739 container_of(w, struct drbd_peer_request, w);
1740 struct drbd_peer_device *peer_device = peer_req->peer_device;
1741 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001742 sector_t sector = peer_req->i.sector;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001743 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001744
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001745 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001746
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001747 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001748 drbd_set_in_sync(device, sector, peer_req->i.size);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001749 err = drbd_send_ack(peer_device, P_RS_WRITE_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001750 } else {
1751 /* Record failure to sync */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001752 drbd_rs_failed_io(device, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001753
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001754 err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001755 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001756 dec_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001757
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001758 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001759}
1760
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001761static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t sector,
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001762 struct packet_info *pi) __releases(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001763{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001764 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001765 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001766
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001767 peer_req = read_in_block(peer_device, ID_SYNCER, sector, pi);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001768 if (!peer_req)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001769 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001770
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001771 dec_rs_pending(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001772
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001773 inc_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001774 /* corresponding dec_unacked() in e_end_resync_block()
1775 * respective _drbd_clear_done_ee */
1776
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001777 peer_req->w.cb = e_end_resync_block;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001778
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001779 spin_lock_irq(&device->resource->req_lock);
Lars Ellenbergb9ed7082014-04-23 12:15:35 +02001780 list_add_tail(&peer_req->w.list, &device->sync_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001781 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001782
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001783 atomic_add(pi->size >> 9, &device->rs_sect_ev);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001784 if (drbd_submit_peer_request(device, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0)
Andreas Gruenbachere1c1b0f2011-03-16 17:58:27 +01001785 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001786
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001787 /* don't care for the reason here */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001788 drbd_err(device, "submit failed, triggering re-connect\n");
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001789 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001790 list_del(&peer_req->w.list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001791 spin_unlock_irq(&device->resource->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02001792
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001793 drbd_free_peer_req(device, peer_req);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001794fail:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001795 put_ldev(device);
Andreas Gruenbachere1c1b0f2011-03-16 17:58:27 +01001796 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001797}
1798
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001799static struct drbd_request *
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001800find_request(struct drbd_device *device, struct rb_root *root, u64 id,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001801 sector_t sector, bool missing_ok, const char *func)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001802{
1803 struct drbd_request *req;
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001804
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001805 /* Request object according to our peer */
1806 req = (struct drbd_request *)(unsigned long)id;
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001807 if (drbd_contains_interval(root, sector, &req->i) && req->i.local)
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001808 return req;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001809 if (!missing_ok) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001810 drbd_err(device, "%s: failed to find request 0x%lx, sector %llus\n", func,
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001811 (unsigned long)id, (unsigned long long)sector);
1812 }
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001813 return NULL;
1814}
1815
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001816static int receive_DataReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001817{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001818 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001819 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001820 struct drbd_request *req;
1821 sector_t sector;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001822 int err;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001823 struct p_data *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001824
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001825 peer_device = conn_peer_device(connection, pi->vnr);
1826 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001827 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001828 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001829
1830 sector = be64_to_cpu(p->sector);
1831
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001832 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001833 req = find_request(device, &device->read_requests, p->block_id, sector, false, __func__);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001834 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001835 if (unlikely(!req))
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001836 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001837
Bart Van Assche24c48302011-05-21 18:32:29 +02001838 /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid
Philipp Reisnerb411b362009-09-25 16:07:19 -07001839 * special casing it there for the various failure cases.
1840 * still no race with drbd_fail_pending_reads */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001841 err = recv_dless_read(peer_device, req, sector, pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001842 if (!err)
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001843 req_mod(req, DATA_RECEIVED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001844 /* else: nothing. handled from drbd_disconnect...
1845 * I don't think we may complete this just yet
1846 * in case we are "on-disconnect: freeze" */
1847
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001848 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001849}
1850
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001851static int receive_RSDataReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001852{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001853 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001854 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001855 sector_t sector;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001856 int err;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001857 struct p_data *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001858
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001859 peer_device = conn_peer_device(connection, pi->vnr);
1860 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001861 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001862 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001863
1864 sector = be64_to_cpu(p->sector);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001865 D_ASSERT(device, p->block_id == ID_SYNCER);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001866
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001867 if (get_ldev(device)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001868 /* data is submitted to disk within recv_resync_read.
1869 * corresponding put_ldev done below on error,
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001870 * or in drbd_peer_request_endio. */
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001871 err = recv_resync_read(peer_device, sector, pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001872 } else {
1873 if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001874 drbd_err(device, "Can not write resync data to local disk.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001875
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001876 err = drbd_drain_block(peer_device, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001877
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001878 drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001879 }
1880
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001881 atomic_add(pi->size >> 9, &device->rs_sect_in);
Philipp Reisner778f2712010-07-06 11:14:00 +02001882
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001883 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001884}
1885
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001886static void restart_conflicting_writes(struct drbd_device *device,
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001887 sector_t sector, int size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001888{
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001889 struct drbd_interval *i;
1890 struct drbd_request *req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001891
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001892 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001893 if (!i->local)
1894 continue;
1895 req = container_of(i, struct drbd_request, i);
1896 if (req->rq_state & RQ_LOCAL_PENDING ||
1897 !(req->rq_state & RQ_POSTPONED))
1898 continue;
Lars Ellenberg2312f0b32011-11-24 10:36:25 +01001899 /* as it is RQ_POSTPONED, this will cause it to
1900 * be queued on the retry workqueue. */
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02001901 __req_mod(req, CONFLICT_RESOLVED, NULL);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001902 }
1903}
1904
Andreas Gruenbachera990be42011-04-06 17:56:48 +02001905/*
1906 * e_end_block() is called in asender context via drbd_finish_peer_reqs().
Philipp Reisnerb411b362009-09-25 16:07:19 -07001907 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001908static int e_end_block(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001909{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001910 struct drbd_peer_request *peer_req =
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001911 container_of(w, struct drbd_peer_request, w);
1912 struct drbd_peer_device *peer_device = peer_req->peer_device;
1913 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001914 sector_t sector = peer_req->i.sector;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001915 int err = 0, pcmd;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001916
Philipp Reisner303d1442011-04-13 16:24:47 -07001917 if (peer_req->flags & EE_SEND_WRITE_ACK) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001918 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001919 pcmd = (device->state.conn >= C_SYNC_SOURCE &&
1920 device->state.conn <= C_PAUSED_SYNC_T &&
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001921 peer_req->flags & EE_MAY_SET_IN_SYNC) ?
Philipp Reisnerb411b362009-09-25 16:07:19 -07001922 P_RS_WRITE_ACK : P_WRITE_ACK;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001923 err = drbd_send_ack(peer_device, pcmd, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001924 if (pcmd == P_RS_WRITE_ACK)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001925 drbd_set_in_sync(device, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001926 } else {
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001927 err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001928 /* we expect it to be marked out of sync anyways...
1929 * maybe assert this? */
1930 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001931 dec_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001932 }
Lars Ellenberg08d0dab2014-03-20 11:19:22 +01001933
Philipp Reisnerb411b362009-09-25 16:07:19 -07001934 /* we delete from the conflict detection hash _after_ we sent out the
1935 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */
Philipp Reisner302bdea2011-04-21 11:36:49 +02001936 if (peer_req->flags & EE_IN_INTERVAL_TREE) {
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001937 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001938 D_ASSERT(device, !drbd_interval_empty(&peer_req->i));
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001939 drbd_remove_epoch_entry_interval(device, peer_req);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001940 if (peer_req->flags & EE_RESTART_REQUESTS)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001941 restart_conflicting_writes(device, sector, peer_req->i.size);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001942 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +01001943 } else
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001944 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001945
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001946 drbd_may_finish_epoch(first_peer_device(device)->connection, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001947
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001948 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001949}
1950
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001951static int e_send_ack(struct drbd_work *w, enum drbd_packet ack)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001952{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001953 struct drbd_peer_request *peer_req =
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001954 container_of(w, struct drbd_peer_request, w);
1955 struct drbd_peer_device *peer_device = peer_req->peer_device;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001956 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001957
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001958 err = drbd_send_ack(peer_device, ack, peer_req);
1959 dec_unacked(peer_device->device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001960
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001961 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001962}
1963
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02001964static int e_send_superseded(struct drbd_work *w, int unused)
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001965{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001966 return e_send_ack(w, P_SUPERSEDED);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001967}
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001968
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001969static int e_send_retry_write(struct drbd_work *w, int unused)
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001970{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001971 struct drbd_peer_request *peer_req =
1972 container_of(w, struct drbd_peer_request, w);
1973 struct drbd_connection *connection = peer_req->peer_device->connection;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001974
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001975 return e_send_ack(w, connection->agreed_pro_version >= 100 ?
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02001976 P_RETRY_WRITE : P_SUPERSEDED);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001977}
1978
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001979static bool seq_greater(u32 a, u32 b)
1980{
1981 /*
1982 * We assume 32-bit wrap-around here.
1983 * For 24-bit wrap-around, we would have to shift:
1984 * a <<= 8; b <<= 8;
1985 */
1986 return (s32)a - (s32)b > 0;
1987}
1988
1989static u32 seq_max(u32 a, u32 b)
1990{
1991 return seq_greater(a, b) ? a : b;
1992}
1993
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001994static void update_peer_seq(struct drbd_peer_device *peer_device, unsigned int peer_seq)
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001995{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001996 struct drbd_device *device = peer_device->device;
Lars Ellenberg3c13b682011-02-23 16:10:01 +01001997 unsigned int newest_peer_seq;
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001998
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001999 if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002000 spin_lock(&device->peer_seq_lock);
2001 newest_peer_seq = seq_max(device->peer_seq, peer_seq);
2002 device->peer_seq = newest_peer_seq;
2003 spin_unlock(&device->peer_seq_lock);
2004 /* wake up only if we actually changed device->peer_seq */
Lars Ellenberg3c13b682011-02-23 16:10:01 +01002005 if (peer_seq == newest_peer_seq)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002006 wake_up(&device->seq_wait);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002007 }
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01002008}
2009
Lars Ellenbergd93f6302012-03-26 15:49:13 +02002010static inline int overlaps(sector_t s1, int l1, sector_t s2, int l2)
2011{
2012 return !((s1 + (l1>>9) <= s2) || (s1 >= s2 + (l2>>9)));
2013}
2014
2015/* maybe change sync_ee into interval trees as well? */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002016static bool overlapping_resync_write(struct drbd_device *device, struct drbd_peer_request *peer_req)
Lars Ellenbergd93f6302012-03-26 15:49:13 +02002017{
2018 struct drbd_peer_request *rs_req;
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01002019 bool rv = 0;
2020
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002021 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002022 list_for_each_entry(rs_req, &device->sync_ee, w.list) {
Lars Ellenbergd93f6302012-03-26 15:49:13 +02002023 if (overlaps(peer_req->i.sector, peer_req->i.size,
2024 rs_req->i.sector, rs_req->i.size)) {
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01002025 rv = 1;
2026 break;
2027 }
2028 }
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002029 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01002030
2031 return rv;
2032}
2033
Philipp Reisnerb411b362009-09-25 16:07:19 -07002034/* Called from receive_Data.
2035 * Synchronize packets on sock with packets on msock.
2036 *
2037 * This is here so even when a P_DATA packet traveling via sock overtook an Ack
2038 * packet traveling on msock, they are still processed in the order they have
2039 * been sent.
2040 *
2041 * Note: we don't care for Ack packets overtaking P_DATA packets.
2042 *
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002043 * In case packet_seq is larger than device->peer_seq number, there are
Philipp Reisnerb411b362009-09-25 16:07:19 -07002044 * outstanding packets on the msock. We wait for them to arrive.
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002045 * In case we are the logically next packet, we update device->peer_seq
Philipp Reisnerb411b362009-09-25 16:07:19 -07002046 * ourselves. Correctly handles 32bit wrap around.
2047 *
2048 * Assume we have a 10 GBit connection, that is about 1<<30 byte per second,
2049 * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds
2050 * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have
2051 * 1<<9 == 512 seconds aka ages for the 32bit wrap around...
2052 *
2053 * returns 0 if we may process the packet,
2054 * -ERESTARTSYS if we were interrupted (by disconnect signal). */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002055static int wait_for_and_update_peer_seq(struct drbd_peer_device *peer_device, const u32 peer_seq)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002056{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002057 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002058 DEFINE_WAIT(wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002059 long timeout;
Philipp Reisnerb874d232013-10-23 10:59:16 +02002060 int ret = 0, tp;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002061
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002062 if (!test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags))
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002063 return 0;
2064
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002065 spin_lock(&device->peer_seq_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002066 for (;;) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002067 if (!seq_greater(peer_seq - 1, device->peer_seq)) {
2068 device->peer_seq = seq_max(device->peer_seq, peer_seq);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002069 break;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002070 }
Philipp Reisnerb874d232013-10-23 10:59:16 +02002071
Philipp Reisnerb411b362009-09-25 16:07:19 -07002072 if (signal_pending(current)) {
2073 ret = -ERESTARTSYS;
2074 break;
2075 }
Philipp Reisnerb874d232013-10-23 10:59:16 +02002076
2077 rcu_read_lock();
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002078 tp = rcu_dereference(first_peer_device(device)->connection->net_conf)->two_primaries;
Philipp Reisnerb874d232013-10-23 10:59:16 +02002079 rcu_read_unlock();
2080
2081 if (!tp)
2082 break;
2083
2084 /* Only need to wait if two_primaries is enabled */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002085 prepare_to_wait(&device->seq_wait, &wait, TASK_INTERRUPTIBLE);
2086 spin_unlock(&device->peer_seq_lock);
Philipp Reisner44ed1672011-04-19 17:10:19 +02002087 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002088 timeout = rcu_dereference(peer_device->connection->net_conf)->ping_timeo*HZ/10;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002089 rcu_read_unlock();
Andreas Gruenbacher71b1c1e2011-03-01 15:40:43 +01002090 timeout = schedule_timeout(timeout);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002091 spin_lock(&device->peer_seq_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002092 if (!timeout) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002093 ret = -ETIMEDOUT;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002094 drbd_err(device, "Timed out waiting for missing ack packets; disconnecting\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002095 break;
2096 }
2097 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002098 spin_unlock(&device->peer_seq_lock);
2099 finish_wait(&device->seq_wait, &wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002100 return ret;
2101}
2102
Lars Ellenberg688593c2010-11-17 22:25:03 +01002103/* see also bio_flags_to_wire()
2104 * DRBD_REQ_*, because we need to semantically map the flags to data packet
2105 * flags and back. We may replicate to other kernel versions. */
Andreas Gruenbacher81f0ffd2011-08-30 16:22:33 +02002106static unsigned long wire_flags_to_bio(u32 dpf)
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02002107{
Lars Ellenberg688593c2010-11-17 22:25:03 +01002108 return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
2109 (dpf & DP_FUA ? REQ_FUA : 0) |
2110 (dpf & DP_FLUSH ? REQ_FLUSH : 0) |
2111 (dpf & DP_DISCARD ? REQ_DISCARD : 0);
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02002112}
2113
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002114static void fail_postponed_requests(struct drbd_device *device, sector_t sector,
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002115 unsigned int size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002116{
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002117 struct drbd_interval *i;
2118
2119 repeat:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002120 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002121 struct drbd_request *req;
2122 struct bio_and_error m;
2123
2124 if (!i->local)
2125 continue;
2126 req = container_of(i, struct drbd_request, i);
2127 if (!(req->rq_state & RQ_POSTPONED))
2128 continue;
2129 req->rq_state &= ~RQ_POSTPONED;
2130 __req_mod(req, NEG_ACKED, &m);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002131 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002132 if (m.bio)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002133 complete_master_bio(device, &m);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002134 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002135 goto repeat;
2136 }
2137}
2138
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002139static int handle_write_conflicts(struct drbd_device *device,
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002140 struct drbd_peer_request *peer_req)
2141{
Andreas Gruenbachere33b32d2011-08-30 15:38:04 +02002142 struct drbd_connection *connection = peer_req->peer_device->connection;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002143 bool resolve_conflicts = test_bit(RESOLVE_CONFLICTS, &connection->flags);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002144 sector_t sector = peer_req->i.sector;
2145 const unsigned int size = peer_req->i.size;
2146 struct drbd_interval *i;
2147 bool equal;
2148 int err;
2149
2150 /*
2151 * Inserting the peer request into the write_requests tree will prevent
2152 * new conflicting local requests from being added.
2153 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002154 drbd_insert_interval(&device->write_requests, &peer_req->i);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002155
2156 repeat:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002157 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002158 if (i == &peer_req->i)
2159 continue;
Lars Ellenberg08d0dab2014-03-20 11:19:22 +01002160 if (i->completed)
2161 continue;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002162
2163 if (!i->local) {
2164 /*
2165 * Our peer has sent a conflicting remote request; this
2166 * should not happen in a two-node setup. Wait for the
2167 * earlier peer request to complete.
2168 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002169 err = drbd_wait_misc(device, i);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002170 if (err)
2171 goto out;
2172 goto repeat;
2173 }
2174
2175 equal = i->sector == sector && i->size == size;
2176 if (resolve_conflicts) {
2177 /*
2178 * If the peer request is fully contained within the
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002179 * overlapping request, it can be considered overwritten
2180 * and thus superseded; otherwise, it will be retried
2181 * once all overlapping requests have completed.
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002182 */
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002183 bool superseded = i->sector <= sector && i->sector +
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002184 (i->size >> 9) >= sector + (size >> 9);
2185
2186 if (!equal)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002187 drbd_alert(device, "Concurrent writes detected: "
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002188 "local=%llus +%u, remote=%llus +%u, "
2189 "assuming %s came first\n",
2190 (unsigned long long)i->sector, i->size,
2191 (unsigned long long)sector, size,
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002192 superseded ? "local" : "remote");
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002193
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002194 inc_unacked(device);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002195 peer_req->w.cb = superseded ? e_send_superseded :
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002196 e_send_retry_write;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002197 list_add_tail(&peer_req->w.list, &device->done_ee);
Andreas Gruenbachere33b32d2011-08-30 15:38:04 +02002198 wake_asender(connection);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002199
2200 err = -ENOENT;
2201 goto out;
2202 } else {
2203 struct drbd_request *req =
2204 container_of(i, struct drbd_request, i);
2205
2206 if (!equal)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002207 drbd_alert(device, "Concurrent writes detected: "
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002208 "local=%llus +%u, remote=%llus +%u\n",
2209 (unsigned long long)i->sector, i->size,
2210 (unsigned long long)sector, size);
2211
2212 if (req->rq_state & RQ_LOCAL_PENDING ||
2213 !(req->rq_state & RQ_POSTPONED)) {
2214 /*
2215 * Wait for the node with the discard flag to
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002216 * decide if this request has been superseded
2217 * or needs to be retried.
2218 * Requests that have been superseded will
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002219 * disappear from the write_requests tree.
2220 *
2221 * In addition, wait for the conflicting
2222 * request to finish locally before submitting
2223 * the conflicting peer request.
2224 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002225 err = drbd_wait_misc(device, &req->i);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002226 if (err) {
Andreas Gruenbachere33b32d2011-08-30 15:38:04 +02002227 _conn_request_state(connection, NS(conn, C_TIMEOUT), CS_HARD);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002228 fail_postponed_requests(device, sector, size);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002229 goto out;
2230 }
2231 goto repeat;
2232 }
2233 /*
2234 * Remember to restart the conflicting requests after
2235 * the new peer request has completed.
2236 */
2237 peer_req->flags |= EE_RESTART_REQUESTS;
2238 }
2239 }
2240 err = 0;
2241
2242 out:
2243 if (err)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002244 drbd_remove_epoch_entry_interval(device, peer_req);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002245 return err;
2246}
2247
Philipp Reisnerb411b362009-09-25 16:07:19 -07002248/* mirrored write */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002249static int receive_Data(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002250{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002251 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002252 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002253 sector_t sector;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002254 struct drbd_peer_request *peer_req;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02002255 struct p_data *p = pi->data;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002256 u32 peer_seq = be32_to_cpu(p->seq_num);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002257 int rw = WRITE;
2258 u32 dp_flags;
Philipp Reisner302bdea2011-04-21 11:36:49 +02002259 int err, tp;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002260
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002261 peer_device = conn_peer_device(connection, pi->vnr);
2262 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002263 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002264 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002265
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002266 if (!get_ldev(device)) {
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002267 int err2;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002268
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002269 err = wait_for_and_update_peer_seq(peer_device, peer_seq);
2270 drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002271 atomic_inc(&connection->current_epoch->epoch_size);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002272 err2 = drbd_drain_block(peer_device, pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002273 if (!err)
2274 err = err2;
2275 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002276 }
2277
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01002278 /*
2279 * Corresponding put_ldev done either below (on various errors), or in
2280 * drbd_peer_request_endio, if we successfully submit the data at the
2281 * end of this function.
2282 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002283
2284 sector = be64_to_cpu(p->sector);
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02002285 peer_req = read_in_block(peer_device, p->block_id, sector, pi);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002286 if (!peer_req) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002287 put_ldev(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002288 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002289 }
2290
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002291 peer_req->w.cb = e_end_block;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002292
Lars Ellenberg688593c2010-11-17 22:25:03 +01002293 dp_flags = be32_to_cpu(p->dp_flags);
Andreas Gruenbacher81f0ffd2011-08-30 16:22:33 +02002294 rw |= wire_flags_to_bio(dp_flags);
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02002295 if (pi->cmd == P_TRIM) {
2296 struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev);
2297 peer_req->flags |= EE_IS_TRIM;
2298 if (!blk_queue_discard(q))
2299 peer_req->flags |= EE_IS_TRIM_USE_ZEROOUT;
2300 D_ASSERT(peer_device, peer_req->i.size > 0);
2301 D_ASSERT(peer_device, rw & REQ_DISCARD);
2302 D_ASSERT(peer_device, peer_req->pages == NULL);
2303 } else if (peer_req->pages == NULL) {
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02002304 D_ASSERT(device, peer_req->i.size == 0);
2305 D_ASSERT(device, dp_flags & DP_FLUSH);
Lars Ellenberga73ff322012-06-25 19:15:38 +02002306 }
Lars Ellenberg688593c2010-11-17 22:25:03 +01002307
2308 if (dp_flags & DP_MAY_SET_IN_SYNC)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002309 peer_req->flags |= EE_MAY_SET_IN_SYNC;
Lars Ellenberg688593c2010-11-17 22:25:03 +01002310
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002311 spin_lock(&connection->epoch_lock);
2312 peer_req->epoch = connection->current_epoch;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002313 atomic_inc(&peer_req->epoch->epoch_size);
2314 atomic_inc(&peer_req->epoch->active);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002315 spin_unlock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002316
Philipp Reisner302bdea2011-04-21 11:36:49 +02002317 rcu_read_lock();
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002318 tp = rcu_dereference(peer_device->connection->net_conf)->two_primaries;
Philipp Reisner302bdea2011-04-21 11:36:49 +02002319 rcu_read_unlock();
2320 if (tp) {
2321 peer_req->flags |= EE_IN_INTERVAL_TREE;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002322 err = wait_for_and_update_peer_seq(peer_device, peer_seq);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002323 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002324 goto out_interrupted;
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002325 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002326 err = handle_write_conflicts(device, peer_req);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002327 if (err) {
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002328 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002329 if (err == -ENOENT) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002330 put_ldev(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002331 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002332 }
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002333 goto out_interrupted;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002334 }
Philipp Reisnerb874d232013-10-23 10:59:16 +02002335 } else {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002336 update_peer_seq(peer_device, peer_seq);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002337 spin_lock_irq(&device->resource->req_lock);
Philipp Reisnerb874d232013-10-23 10:59:16 +02002338 }
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02002339 /* if we use the zeroout fallback code, we process synchronously
2340 * and we wait for all pending requests, respectively wait for
2341 * active_ee to become empty in drbd_submit_peer_request();
2342 * better not add ourselves here. */
2343 if ((peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) == 0)
Lars Ellenbergb9ed7082014-04-23 12:15:35 +02002344 list_add_tail(&peer_req->w.list, &device->active_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002345 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002346
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002347 if (device->state.conn == C_SYNC_TARGET)
2348 wait_event(device->ee_wait, !overlapping_resync_write(device, peer_req));
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01002349
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002350 if (peer_device->connection->agreed_pro_version < 100) {
Philipp Reisner44ed1672011-04-19 17:10:19 +02002351 rcu_read_lock();
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002352 switch (rcu_dereference(peer_device->connection->net_conf)->wire_protocol) {
Philipp Reisner303d1442011-04-13 16:24:47 -07002353 case DRBD_PROT_C:
2354 dp_flags |= DP_SEND_WRITE_ACK;
2355 break;
2356 case DRBD_PROT_B:
2357 dp_flags |= DP_SEND_RECEIVE_ACK;
2358 break;
2359 }
Philipp Reisner44ed1672011-04-19 17:10:19 +02002360 rcu_read_unlock();
Philipp Reisner303d1442011-04-13 16:24:47 -07002361 }
2362
2363 if (dp_flags & DP_SEND_WRITE_ACK) {
2364 peer_req->flags |= EE_SEND_WRITE_ACK;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002365 inc_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002366 /* corresponding dec_unacked() in e_end_block()
2367 * respective _drbd_clear_done_ee */
Philipp Reisner303d1442011-04-13 16:24:47 -07002368 }
2369
2370 if (dp_flags & DP_SEND_RECEIVE_ACK) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002371 /* I really don't like it that the receiver thread
2372 * sends on the msock, but anyways */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002373 drbd_send_ack(first_peer_device(device), P_RECV_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002374 }
2375
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002376 if (device->state.pdsk < D_INCONSISTENT) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002377 /* In case we have the only disk of the cluster, */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002378 drbd_set_out_of_sync(device, peer_req->i.sector, peer_req->i.size);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002379 peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
2380 peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
Lars Ellenberg4dd726f2014-02-11 11:15:36 +01002381 drbd_al_begin_io(device, &peer_req->i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002382 }
2383
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002384 err = drbd_submit_peer_request(device, peer_req, rw, DRBD_FAULT_DT_WR);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002385 if (!err)
2386 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002387
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002388 /* don't care for the reason here */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002389 drbd_err(device, "submit failed, triggering re-connect\n");
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002390 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002391 list_del(&peer_req->w.list);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002392 drbd_remove_epoch_entry_interval(device, peer_req);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002393 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002394 if (peer_req->flags & EE_CALL_AL_COMPLETE_IO)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002395 drbd_al_complete_io(device, &peer_req->i);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002396
Philipp Reisnerb411b362009-09-25 16:07:19 -07002397out_interrupted:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002398 drbd_may_finish_epoch(connection, peer_req->epoch, EV_PUT + EV_CLEANUP);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002399 put_ldev(device);
2400 drbd_free_peer_req(device, peer_req);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002401 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002402}
2403
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002404/* We may throttle resync, if the lower device seems to be busy,
2405 * and current sync rate is above c_min_rate.
2406 *
2407 * To decide whether or not the lower device is busy, we use a scheme similar
2408 * to MD RAID is_mddev_idle(): if the partition stats reveal "significant"
2409 * (more than 64 sectors) of activity we cannot account for with our own resync
2410 * activity, it obviously is "busy".
2411 *
2412 * The current sync rate used here uses only the most recent two step marks,
2413 * to have a short time average so we can react faster.
2414 */
Lars Ellenberge8299872014-04-28 18:43:19 +02002415bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector)
2416{
2417 struct lc_element *tmp;
2418 bool throttle = true;
2419
2420 if (!drbd_rs_c_min_rate_throttle(device))
2421 return false;
2422
2423 spin_lock_irq(&device->al_lock);
2424 tmp = lc_find(device->resync, BM_SECT_TO_EXT(sector));
2425 if (tmp) {
2426 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
2427 if (test_bit(BME_PRIORITY, &bm_ext->flags))
2428 throttle = false;
2429 /* Do not slow down if app IO is already waiting for this extent */
2430 }
2431 spin_unlock_irq(&device->al_lock);
2432
2433 return throttle;
2434}
2435
2436bool drbd_rs_c_min_rate_throttle(struct drbd_device *device)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002437{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002438 struct gendisk *disk = device->ldev->backing_bdev->bd_contains->bd_disk;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002439 unsigned long db, dt, dbdt;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002440 unsigned int c_min_rate;
Lars Ellenberge8299872014-04-28 18:43:19 +02002441 int curr_events;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002442
2443 rcu_read_lock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002444 c_min_rate = rcu_dereference(device->ldev->disk_conf)->c_min_rate;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002445 rcu_read_unlock();
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002446
2447 /* feature disabled? */
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002448 if (c_min_rate == 0)
Lars Ellenberge8299872014-04-28 18:43:19 +02002449 return false;
Philipp Reisnere3555d82010-11-07 15:56:29 +01002450
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002451 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
2452 (int)part_stat_read(&disk->part0, sectors[1]) -
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002453 atomic_read(&device->rs_sect_ev);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002454 if (!device->rs_last_events || curr_events - device->rs_last_events > 64) {
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002455 unsigned long rs_left;
2456 int i;
2457
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002458 device->rs_last_events = curr_events;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002459
2460 /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
2461 * approx. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002462 i = (device->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
Lars Ellenberg2649f082010-11-05 10:05:47 +01002463
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002464 if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T)
2465 rs_left = device->ov_left;
Lars Ellenberg2649f082010-11-05 10:05:47 +01002466 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002467 rs_left = drbd_bm_total_weight(device) - device->rs_failed;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002468
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002469 dt = ((long)jiffies - (long)device->rs_mark_time[i]) / HZ;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002470 if (!dt)
2471 dt++;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002472 db = device->rs_mark_left[i] - rs_left;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002473 dbdt = Bit2KB(db/dt);
2474
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002475 if (dbdt > c_min_rate)
Lars Ellenberge8299872014-04-28 18:43:19 +02002476 return true;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002477 }
Lars Ellenberge8299872014-04-28 18:43:19 +02002478 return false;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002479}
2480
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002481static int receive_DataRequest(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002482{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002483 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002484 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002485 sector_t sector;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002486 sector_t capacity;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002487 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002488 struct digest_info *di = NULL;
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002489 int size, verb;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002490 unsigned int fault_type;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02002491 struct p_block_req *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002492
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002493 peer_device = conn_peer_device(connection, pi->vnr);
2494 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002495 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002496 device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002497 capacity = drbd_get_capacity(device->this_bdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002498
2499 sector = be64_to_cpu(p->sector);
2500 size = be32_to_cpu(p->blksize);
2501
Andreas Gruenbacherc670a392011-02-21 12:41:39 +01002502 if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002503 drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
Philipp Reisnerb411b362009-09-25 16:07:19 -07002504 (unsigned long long)sector, size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002505 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002506 }
2507 if (sector + (size>>9) > capacity) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002508 drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
Philipp Reisnerb411b362009-09-25 16:07:19 -07002509 (unsigned long long)sector, size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002510 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002511 }
2512
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002513 if (!get_ldev_if_state(device, D_UP_TO_DATE)) {
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002514 verb = 1;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002515 switch (pi->cmd) {
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002516 case P_DATA_REQUEST:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002517 drbd_send_ack_rp(peer_device, P_NEG_DREPLY, p);
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002518 break;
2519 case P_RS_DATA_REQUEST:
2520 case P_CSUM_RS_REQUEST:
2521 case P_OV_REQUEST:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002522 drbd_send_ack_rp(peer_device, P_NEG_RS_DREPLY , p);
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002523 break;
2524 case P_OV_REPLY:
2525 verb = 0;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002526 dec_rs_pending(device);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002527 drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size, ID_IN_SYNC);
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002528 break;
2529 default:
Andreas Gruenbacher49ba9b12011-03-25 00:35:45 +01002530 BUG();
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002531 }
2532 if (verb && __ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002533 drbd_err(device, "Can not satisfy peer's read request, "
Philipp Reisnerb411b362009-09-25 16:07:19 -07002534 "no local data.\n");
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002535
Lars Ellenberga821cc42010-09-06 12:31:37 +02002536 /* drain possibly payload */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002537 return drbd_drain_block(peer_device, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002538 }
2539
2540 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
2541 * "criss-cross" setup, that might cause write-out on some other DRBD,
2542 * which in turn might block on the other node at this very place. */
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02002543 peer_req = drbd_alloc_peer_req(peer_device, p->block_id, sector, size,
2544 true /* has real payload */, GFP_NOIO);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002545 if (!peer_req) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002546 put_ldev(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002547 return -ENOMEM;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002548 }
2549
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002550 switch (pi->cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002551 case P_DATA_REQUEST:
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002552 peer_req->w.cb = w_e_end_data_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002553 fault_type = DRBD_FAULT_DT_RD;
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002554 /* application IO, don't drbd_rs_begin_io */
2555 goto submit;
2556
Philipp Reisnerb411b362009-09-25 16:07:19 -07002557 case P_RS_DATA_REQUEST:
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002558 peer_req->w.cb = w_e_end_rsdata_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002559 fault_type = DRBD_FAULT_RS_RD;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002560 /* used in the sector offset progress display */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002561 device->bm_resync_fo = BM_SECT_TO_BIT(sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002562 break;
2563
2564 case P_OV_REPLY:
2565 case P_CSUM_RS_REQUEST:
2566 fault_type = DRBD_FAULT_RS_RD;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002567 di = kmalloc(sizeof(*di) + pi->size, GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002568 if (!di)
2569 goto out_free_e;
2570
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002571 di->digest_size = pi->size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002572 di->digest = (((char *)di)+sizeof(struct digest_info));
2573
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002574 peer_req->digest = di;
2575 peer_req->flags |= EE_HAS_DIGEST;
Lars Ellenbergc36c3ce2010-08-11 20:42:55 +02002576
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002577 if (drbd_recv_all(peer_device->connection, di->digest, pi->size))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002578 goto out_free_e;
2579
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002580 if (pi->cmd == P_CSUM_RS_REQUEST) {
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002581 D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002582 peer_req->w.cb = w_e_end_csum_rs_req;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002583 /* used in the sector offset progress display */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002584 device->bm_resync_fo = BM_SECT_TO_BIT(sector);
Lars Ellenbergaaaba342014-03-18 12:30:09 +01002585 /* remember to report stats in drbd_resync_finished */
2586 device->use_csums = true;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002587 } else if (pi->cmd == P_OV_REPLY) {
Lars Ellenberg2649f082010-11-05 10:05:47 +01002588 /* track progress, we may need to throttle */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002589 atomic_add(size >> 9, &device->rs_sect_in);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002590 peer_req->w.cb = w_e_end_ov_reply;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002591 dec_rs_pending(device);
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002592 /* drbd_rs_begin_io done when we sent this request,
2593 * but accounting still needs to be done. */
2594 goto submit_for_resync;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002595 }
2596 break;
2597
2598 case P_OV_REQUEST:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002599 if (device->ov_start_sector == ~(sector_t)0 &&
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002600 peer_device->connection->agreed_pro_version >= 90) {
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002601 unsigned long now = jiffies;
2602 int i;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002603 device->ov_start_sector = sector;
2604 device->ov_position = sector;
2605 device->ov_left = drbd_bm_bits(device) - BM_SECT_TO_BIT(sector);
2606 device->rs_total = device->ov_left;
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002607 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002608 device->rs_mark_left[i] = device->ov_left;
2609 device->rs_mark_time[i] = now;
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002610 }
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002611 drbd_info(device, "Online Verify start sector: %llu\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07002612 (unsigned long long)sector);
2613 }
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002614 peer_req->w.cb = w_e_end_ov_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002615 fault_type = DRBD_FAULT_RS_RD;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002616 break;
2617
Philipp Reisnerb411b362009-09-25 16:07:19 -07002618 default:
Andreas Gruenbacher49ba9b12011-03-25 00:35:45 +01002619 BUG();
Philipp Reisnerb411b362009-09-25 16:07:19 -07002620 }
2621
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002622 /* Throttle, drbd_rs_begin_io and submit should become asynchronous
2623 * wrt the receiver, but it is not as straightforward as it may seem.
2624 * Various places in the resync start and stop logic assume resync
2625 * requests are processed in order, requeuing this on the worker thread
2626 * introduces a bunch of new code for synchronization between threads.
2627 *
2628 * Unlimited throttling before drbd_rs_begin_io may stall the resync
2629 * "forever", throttling after drbd_rs_begin_io will lock that extent
2630 * for application writes for the same time. For now, just throttle
2631 * here, where the rest of the code expects the receiver to sleep for
2632 * a while, anyways.
2633 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002634
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002635 /* Throttle before drbd_rs_begin_io, as that locks out application IO;
2636 * this defers syncer requests for some time, before letting at least
2637 * on request through. The resync controller on the receiving side
2638 * will adapt to the incoming rate accordingly.
2639 *
2640 * We cannot throttle here if remote is Primary/SyncTarget:
2641 * we would also throttle its application reads.
2642 * In that case, throttling is done on the SyncTarget only.
2643 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002644 if (device->state.peer != R_PRIMARY && drbd_rs_should_slow_down(device, sector))
Philipp Reisnere3555d82010-11-07 15:56:29 +01002645 schedule_timeout_uninterruptible(HZ/10);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002646 if (drbd_rs_begin_io(device, sector))
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002647 goto out_free_e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002648
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002649submit_for_resync:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002650 atomic_add(size >> 9, &device->rs_sect_ev);
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002651
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002652submit:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002653 inc_unacked(device);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002654 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002655 list_add_tail(&peer_req->w.list, &device->read_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002656 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002657
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002658 if (drbd_submit_peer_request(device, peer_req, READ, fault_type) == 0)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002659 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002660
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002661 /* don't care for the reason here */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002662 drbd_err(device, "submit failed, triggering re-connect\n");
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002663 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002664 list_del(&peer_req->w.list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002665 spin_unlock_irq(&device->resource->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002666 /* no drbd_rs_complete_io(), we are dropping the connection anyways */
2667
Philipp Reisnerb411b362009-09-25 16:07:19 -07002668out_free_e:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002669 put_ldev(device);
2670 drbd_free_peer_req(device, peer_req);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002671 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002672}
2673
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002674/**
2675 * drbd_asb_recover_0p - Recover after split-brain with no remaining primaries
2676 */
2677static int drbd_asb_recover_0p(struct drbd_peer_device *peer_device) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002678{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002679 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002680 int self, peer, rv = -100;
2681 unsigned long ch_self, ch_peer;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002682 enum drbd_after_sb_p after_sb_0p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002683
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002684 self = device->ldev->md.uuid[UI_BITMAP] & 1;
2685 peer = device->p_uuid[UI_BITMAP] & 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002686
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002687 ch_peer = device->p_uuid[UI_SIZE];
2688 ch_self = device->comm_bm_set;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002689
Philipp Reisner44ed1672011-04-19 17:10:19 +02002690 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002691 after_sb_0p = rcu_dereference(peer_device->connection->net_conf)->after_sb_0p;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002692 rcu_read_unlock();
2693 switch (after_sb_0p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002694 case ASB_CONSENSUS:
2695 case ASB_DISCARD_SECONDARY:
2696 case ASB_CALL_HELPER:
Philipp Reisner44ed1672011-04-19 17:10:19 +02002697 case ASB_VIOLENTLY:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002698 drbd_err(device, "Configuration error.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002699 break;
2700 case ASB_DISCONNECT:
2701 break;
2702 case ASB_DISCARD_YOUNGER_PRI:
2703 if (self == 0 && peer == 1) {
2704 rv = -1;
2705 break;
2706 }
2707 if (self == 1 && peer == 0) {
2708 rv = 1;
2709 break;
2710 }
2711 /* Else fall through to one of the other strategies... */
2712 case ASB_DISCARD_OLDER_PRI:
2713 if (self == 0 && peer == 1) {
2714 rv = 1;
2715 break;
2716 }
2717 if (self == 1 && peer == 0) {
2718 rv = -1;
2719 break;
2720 }
2721 /* Else fall through to one of the other strategies... */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002722 drbd_warn(device, "Discard younger/older primary did not find a decision\n"
Philipp Reisnerb411b362009-09-25 16:07:19 -07002723 "Using discard-least-changes instead\n");
2724 case ASB_DISCARD_ZERO_CHG:
2725 if (ch_peer == 0 && ch_self == 0) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002726 rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002727 ? -1 : 1;
2728 break;
2729 } else {
2730 if (ch_peer == 0) { rv = 1; break; }
2731 if (ch_self == 0) { rv = -1; break; }
2732 }
Philipp Reisner44ed1672011-04-19 17:10:19 +02002733 if (after_sb_0p == ASB_DISCARD_ZERO_CHG)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002734 break;
2735 case ASB_DISCARD_LEAST_CHG:
2736 if (ch_self < ch_peer)
2737 rv = -1;
2738 else if (ch_self > ch_peer)
2739 rv = 1;
2740 else /* ( ch_self == ch_peer ) */
2741 /* Well, then use something else. */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002742 rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002743 ? -1 : 1;
2744 break;
2745 case ASB_DISCARD_LOCAL:
2746 rv = -1;
2747 break;
2748 case ASB_DISCARD_REMOTE:
2749 rv = 1;
2750 }
2751
2752 return rv;
2753}
2754
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002755/**
2756 * drbd_asb_recover_1p - Recover after split-brain with one remaining primary
2757 */
2758static int drbd_asb_recover_1p(struct drbd_peer_device *peer_device) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002759{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002760 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002761 int hg, rv = -100;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002762 enum drbd_after_sb_p after_sb_1p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002763
Philipp Reisner44ed1672011-04-19 17:10:19 +02002764 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002765 after_sb_1p = rcu_dereference(peer_device->connection->net_conf)->after_sb_1p;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002766 rcu_read_unlock();
2767 switch (after_sb_1p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002768 case ASB_DISCARD_YOUNGER_PRI:
2769 case ASB_DISCARD_OLDER_PRI:
2770 case ASB_DISCARD_LEAST_CHG:
2771 case ASB_DISCARD_LOCAL:
2772 case ASB_DISCARD_REMOTE:
Philipp Reisner44ed1672011-04-19 17:10:19 +02002773 case ASB_DISCARD_ZERO_CHG:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002774 drbd_err(device, "Configuration error.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002775 break;
2776 case ASB_DISCONNECT:
2777 break;
2778 case ASB_CONSENSUS:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002779 hg = drbd_asb_recover_0p(peer_device);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002780 if (hg == -1 && device->state.role == R_SECONDARY)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002781 rv = hg;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002782 if (hg == 1 && device->state.role == R_PRIMARY)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002783 rv = hg;
2784 break;
2785 case ASB_VIOLENTLY:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002786 rv = drbd_asb_recover_0p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002787 break;
2788 case ASB_DISCARD_SECONDARY:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002789 return device->state.role == R_PRIMARY ? 1 : -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002790 case ASB_CALL_HELPER:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002791 hg = drbd_asb_recover_0p(peer_device);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002792 if (hg == -1 && device->state.role == R_PRIMARY) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002793 enum drbd_state_rv rv2;
2794
Philipp Reisnerb411b362009-09-25 16:07:19 -07002795 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2796 * we might be here in C_WF_REPORT_PARAMS which is transient.
2797 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002798 rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002799 if (rv2 != SS_SUCCESS) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002800 drbd_khelper(device, "pri-lost-after-sb");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002801 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002802 drbd_warn(device, "Successfully gave up primary role.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002803 rv = hg;
2804 }
2805 } else
2806 rv = hg;
2807 }
2808
2809 return rv;
2810}
2811
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002812/**
2813 * drbd_asb_recover_2p - Recover after split-brain with two remaining primaries
2814 */
2815static int drbd_asb_recover_2p(struct drbd_peer_device *peer_device) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002816{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002817 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002818 int hg, rv = -100;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002819 enum drbd_after_sb_p after_sb_2p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002820
Philipp Reisner44ed1672011-04-19 17:10:19 +02002821 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002822 after_sb_2p = rcu_dereference(peer_device->connection->net_conf)->after_sb_2p;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002823 rcu_read_unlock();
2824 switch (after_sb_2p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002825 case ASB_DISCARD_YOUNGER_PRI:
2826 case ASB_DISCARD_OLDER_PRI:
2827 case ASB_DISCARD_LEAST_CHG:
2828 case ASB_DISCARD_LOCAL:
2829 case ASB_DISCARD_REMOTE:
2830 case ASB_CONSENSUS:
2831 case ASB_DISCARD_SECONDARY:
Philipp Reisner44ed1672011-04-19 17:10:19 +02002832 case ASB_DISCARD_ZERO_CHG:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002833 drbd_err(device, "Configuration error.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002834 break;
2835 case ASB_VIOLENTLY:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002836 rv = drbd_asb_recover_0p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002837 break;
2838 case ASB_DISCONNECT:
2839 break;
2840 case ASB_CALL_HELPER:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002841 hg = drbd_asb_recover_0p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002842 if (hg == -1) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002843 enum drbd_state_rv rv2;
2844
Philipp Reisnerb411b362009-09-25 16:07:19 -07002845 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2846 * we might be here in C_WF_REPORT_PARAMS which is transient.
2847 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002848 rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002849 if (rv2 != SS_SUCCESS) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002850 drbd_khelper(device, "pri-lost-after-sb");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002851 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002852 drbd_warn(device, "Successfully gave up primary role.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002853 rv = hg;
2854 }
2855 } else
2856 rv = hg;
2857 }
2858
2859 return rv;
2860}
2861
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002862static void drbd_uuid_dump(struct drbd_device *device, char *text, u64 *uuid,
Philipp Reisnerb411b362009-09-25 16:07:19 -07002863 u64 bits, u64 flags)
2864{
2865 if (!uuid) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002866 drbd_info(device, "%s uuid info vanished while I was looking!\n", text);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002867 return;
2868 }
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002869 drbd_info(device, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07002870 text,
2871 (unsigned long long)uuid[UI_CURRENT],
2872 (unsigned long long)uuid[UI_BITMAP],
2873 (unsigned long long)uuid[UI_HISTORY_START],
2874 (unsigned long long)uuid[UI_HISTORY_END],
2875 (unsigned long long)bits,
2876 (unsigned long long)flags);
2877}
2878
2879/*
2880 100 after split brain try auto recover
2881 2 C_SYNC_SOURCE set BitMap
2882 1 C_SYNC_SOURCE use BitMap
2883 0 no Sync
2884 -1 C_SYNC_TARGET use BitMap
2885 -2 C_SYNC_TARGET set BitMap
2886 -100 after split brain, disconnect
2887-1000 unrelated data
Philipp Reisner4a23f262011-01-11 17:42:17 +01002888-1091 requires proto 91
2889-1096 requires proto 96
Philipp Reisnerb411b362009-09-25 16:07:19 -07002890 */
Lars Ellenberg44a4d552013-11-22 12:40:58 +01002891static int drbd_uuid_compare(struct drbd_device *const device, int *rule_nr) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002892{
Lars Ellenberg44a4d552013-11-22 12:40:58 +01002893 struct drbd_peer_device *const peer_device = first_peer_device(device);
2894 struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002895 u64 self, peer;
2896 int i, j;
2897
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002898 self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2899 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002900
2901 *rule_nr = 10;
2902 if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED)
2903 return 0;
2904
2905 *rule_nr = 20;
2906 if ((self == UUID_JUST_CREATED || self == (u64)0) &&
2907 peer != UUID_JUST_CREATED)
2908 return -2;
2909
2910 *rule_nr = 30;
2911 if (self != UUID_JUST_CREATED &&
2912 (peer == UUID_JUST_CREATED || peer == (u64)0))
2913 return 2;
2914
2915 if (self == peer) {
2916 int rct, dc; /* roles at crash time */
2917
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002918 if (device->p_uuid[UI_BITMAP] == (u64)0 && device->ldev->md.uuid[UI_BITMAP] != (u64)0) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002919
Lars Ellenberg44a4d552013-11-22 12:40:58 +01002920 if (connection->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002921 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002922
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002923 if ((device->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
2924 (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002925 drbd_info(device, "was SyncSource, missed the resync finished event, corrected myself:\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002926 drbd_uuid_move_history(device);
2927 device->ldev->md.uuid[UI_HISTORY_START] = device->ldev->md.uuid[UI_BITMAP];
2928 device->ldev->md.uuid[UI_BITMAP] = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002929
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002930 drbd_uuid_dump(device, "self", device->ldev->md.uuid,
2931 device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002932 *rule_nr = 34;
2933 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002934 drbd_info(device, "was SyncSource (peer failed to write sync_uuid)\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002935 *rule_nr = 36;
2936 }
2937
2938 return 1;
2939 }
2940
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002941 if (device->ldev->md.uuid[UI_BITMAP] == (u64)0 && device->p_uuid[UI_BITMAP] != (u64)0) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002942
Lars Ellenberg44a4d552013-11-22 12:40:58 +01002943 if (connection->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002944 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002945
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002946 if ((device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_BITMAP] & ~((u64)1)) &&
2947 (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002948 drbd_info(device, "was SyncTarget, peer missed the resync finished event, corrected peer:\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002949
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002950 device->p_uuid[UI_HISTORY_START + 1] = device->p_uuid[UI_HISTORY_START];
2951 device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_BITMAP];
2952 device->p_uuid[UI_BITMAP] = 0UL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002953
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002954 drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002955 *rule_nr = 35;
2956 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002957 drbd_info(device, "was SyncTarget (failed to write sync_uuid)\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002958 *rule_nr = 37;
2959 }
2960
2961 return -1;
2962 }
2963
2964 /* Common power [off|failure] */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002965 rct = (test_bit(CRASHED_PRIMARY, &device->flags) ? 1 : 0) +
2966 (device->p_uuid[UI_FLAGS] & 2);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002967 /* lowest bit is set when we were primary,
2968 * next bit (weight 2) is set when peer was primary */
2969 *rule_nr = 40;
2970
2971 switch (rct) {
2972 case 0: /* !self_pri && !peer_pri */ return 0;
2973 case 1: /* self_pri && !peer_pri */ return 1;
2974 case 2: /* !self_pri && peer_pri */ return -1;
2975 case 3: /* self_pri && peer_pri */
Lars Ellenberg44a4d552013-11-22 12:40:58 +01002976 dc = test_bit(RESOLVE_CONFLICTS, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002977 return dc ? -1 : 1;
2978 }
2979 }
2980
2981 *rule_nr = 50;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002982 peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002983 if (self == peer)
2984 return -1;
2985
2986 *rule_nr = 51;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002987 peer = device->p_uuid[UI_HISTORY_START] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002988 if (self == peer) {
Lars Ellenberg44a4d552013-11-22 12:40:58 +01002989 if (connection->agreed_pro_version < 96 ?
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002990 (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
2991 (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
2992 peer + UUID_NEW_BM_OFFSET == (device->p_uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002993 /* The last P_SYNC_UUID did not get though. Undo the last start of
2994 resync as sync source modifications of the peer's UUIDs. */
2995
Lars Ellenberg44a4d552013-11-22 12:40:58 +01002996 if (connection->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002997 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002998
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002999 device->p_uuid[UI_BITMAP] = device->p_uuid[UI_HISTORY_START];
3000 device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_HISTORY_START + 1];
Philipp Reisner4a23f262011-01-11 17:42:17 +01003001
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003002 drbd_info(device, "Lost last syncUUID packet, corrected:\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003003 drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
Philipp Reisner4a23f262011-01-11 17:42:17 +01003004
Philipp Reisnerb411b362009-09-25 16:07:19 -07003005 return -1;
3006 }
3007 }
3008
3009 *rule_nr = 60;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003010 self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003011 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003012 peer = device->p_uuid[i] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003013 if (self == peer)
3014 return -2;
3015 }
3016
3017 *rule_nr = 70;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003018 self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
3019 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003020 if (self == peer)
3021 return 1;
3022
3023 *rule_nr = 71;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003024 self = device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003025 if (self == peer) {
Lars Ellenberg44a4d552013-11-22 12:40:58 +01003026 if (connection->agreed_pro_version < 96 ?
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003027 (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
3028 (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
3029 self + UUID_NEW_BM_OFFSET == (device->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003030 /* The last P_SYNC_UUID did not get though. Undo the last start of
3031 resync as sync source modifications of our UUIDs. */
3032
Lars Ellenberg44a4d552013-11-22 12:40:58 +01003033 if (connection->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01003034 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003035
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003036 __drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_HISTORY_START]);
3037 __drbd_uuid_set(device, UI_HISTORY_START, device->ldev->md.uuid[UI_HISTORY_START + 1]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003038
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003039 drbd_info(device, "Last syncUUID did not get through, corrected:\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003040 drbd_uuid_dump(device, "self", device->ldev->md.uuid,
3041 device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003042
3043 return 1;
3044 }
3045 }
3046
3047
3048 *rule_nr = 80;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003049 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003050 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003051 self = device->ldev->md.uuid[i] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003052 if (self == peer)
3053 return 2;
3054 }
3055
3056 *rule_nr = 90;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003057 self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
3058 peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003059 if (self == peer && self != ((u64)0))
3060 return 100;
3061
3062 *rule_nr = 100;
3063 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003064 self = device->ldev->md.uuid[i] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003065 for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003066 peer = device->p_uuid[j] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003067 if (self == peer)
3068 return -100;
3069 }
3070 }
3071
3072 return -1000;
3073}
3074
3075/* drbd_sync_handshake() returns the new conn state on success, or
3076 CONN_MASK (-1) on failure.
3077 */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003078static enum drbd_conns drbd_sync_handshake(struct drbd_peer_device *peer_device,
3079 enum drbd_role peer_role,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003080 enum drbd_disk_state peer_disk) __must_hold(local)
3081{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003082 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003083 enum drbd_conns rv = C_MASK;
3084 enum drbd_disk_state mydisk;
Philipp Reisner44ed1672011-04-19 17:10:19 +02003085 struct net_conf *nc;
Andreas Gruenbacher6dff2902011-06-28 14:18:12 +02003086 int hg, rule_nr, rr_conflict, tentative;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003087
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003088 mydisk = device->state.disk;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003089 if (mydisk == D_NEGOTIATING)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003090 mydisk = device->new_state_tmp.disk;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003091
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003092 drbd_info(device, "drbd_sync_handshake:\n");
Philipp Reisner9f2247b2012-08-16 14:25:58 +02003093
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003094 spin_lock_irq(&device->ldev->md.uuid_lock);
3095 drbd_uuid_dump(device, "self", device->ldev->md.uuid, device->comm_bm_set, 0);
3096 drbd_uuid_dump(device, "peer", device->p_uuid,
3097 device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003098
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003099 hg = drbd_uuid_compare(device, &rule_nr);
3100 spin_unlock_irq(&device->ldev->md.uuid_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003101
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003102 drbd_info(device, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003103
3104 if (hg == -1000) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003105 drbd_alert(device, "Unrelated data, aborting!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003106 return C_MASK;
3107 }
Philipp Reisner4a23f262011-01-11 17:42:17 +01003108 if (hg < -1000) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003109 drbd_alert(device, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003110 return C_MASK;
3111 }
3112
3113 if ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) ||
3114 (peer_disk == D_INCONSISTENT && mydisk > D_INCONSISTENT)) {
3115 int f = (hg == -100) || abs(hg) == 2;
3116 hg = mydisk > D_INCONSISTENT ? 1 : -1;
3117 if (f)
3118 hg = hg*2;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003119 drbd_info(device, "Becoming sync %s due to disk states.\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07003120 hg > 0 ? "source" : "target");
3121 }
3122
Adam Gandelman3a11a482010-04-08 16:48:23 -07003123 if (abs(hg) == 100)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003124 drbd_khelper(device, "initial-split-brain");
Adam Gandelman3a11a482010-04-08 16:48:23 -07003125
Philipp Reisner44ed1672011-04-19 17:10:19 +02003126 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003127 nc = rcu_dereference(peer_device->connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +02003128
3129 if (hg == 100 || (hg == -100 && nc->always_asbp)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003130 int pcount = (device->state.role == R_PRIMARY)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003131 + (peer_role == R_PRIMARY);
3132 int forced = (hg == -100);
3133
3134 switch (pcount) {
3135 case 0:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003136 hg = drbd_asb_recover_0p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003137 break;
3138 case 1:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003139 hg = drbd_asb_recover_1p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003140 break;
3141 case 2:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003142 hg = drbd_asb_recover_2p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003143 break;
3144 }
3145 if (abs(hg) < 100) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003146 drbd_warn(device, "Split-Brain detected, %d primaries, "
Philipp Reisnerb411b362009-09-25 16:07:19 -07003147 "automatically solved. Sync from %s node\n",
3148 pcount, (hg < 0) ? "peer" : "this");
3149 if (forced) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003150 drbd_warn(device, "Doing a full sync, since"
Philipp Reisnerb411b362009-09-25 16:07:19 -07003151 " UUIDs where ambiguous.\n");
3152 hg = hg*2;
3153 }
3154 }
3155 }
3156
3157 if (hg == -100) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003158 if (test_bit(DISCARD_MY_DATA, &device->flags) && !(device->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003159 hg = -1;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003160 if (!test_bit(DISCARD_MY_DATA, &device->flags) && (device->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003161 hg = 1;
3162
3163 if (abs(hg) < 100)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003164 drbd_warn(device, "Split-Brain detected, manually solved. "
Philipp Reisnerb411b362009-09-25 16:07:19 -07003165 "Sync from %s node\n",
3166 (hg < 0) ? "peer" : "this");
3167 }
Philipp Reisner44ed1672011-04-19 17:10:19 +02003168 rr_conflict = nc->rr_conflict;
Andreas Gruenbacher6dff2902011-06-28 14:18:12 +02003169 tentative = nc->tentative;
Philipp Reisner44ed1672011-04-19 17:10:19 +02003170 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07003171
3172 if (hg == -100) {
Lars Ellenberg580b9762010-02-26 23:15:23 +01003173 /* FIXME this log message is not correct if we end up here
3174 * after an attempted attach on a diskless node.
3175 * We just refuse to attach -- well, we drop the "connection"
3176 * to that disk, in a way... */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003177 drbd_alert(device, "Split-Brain detected but unresolved, dropping connection!\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003178 drbd_khelper(device, "split-brain");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003179 return C_MASK;
3180 }
3181
3182 if (hg > 0 && mydisk <= D_INCONSISTENT) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003183 drbd_err(device, "I shall become SyncSource, but I am inconsistent!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003184 return C_MASK;
3185 }
3186
3187 if (hg < 0 && /* by intention we do not use mydisk here. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003188 device->state.role == R_PRIMARY && device->state.disk >= D_CONSISTENT) {
Philipp Reisner44ed1672011-04-19 17:10:19 +02003189 switch (rr_conflict) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003190 case ASB_CALL_HELPER:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003191 drbd_khelper(device, "pri-lost");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003192 /* fall through */
3193 case ASB_DISCONNECT:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003194 drbd_err(device, "I shall become SyncTarget, but I am primary!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003195 return C_MASK;
3196 case ASB_VIOLENTLY:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003197 drbd_warn(device, "Becoming SyncTarget, violating the stable-data"
Philipp Reisnerb411b362009-09-25 16:07:19 -07003198 "assumption\n");
3199 }
3200 }
3201
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003202 if (tentative || test_bit(CONN_DRY_RUN, &peer_device->connection->flags)) {
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003203 if (hg == 0)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003204 drbd_info(device, "dry-run connect: No resync, would become Connected immediately.\n");
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003205 else
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003206 drbd_info(device, "dry-run connect: Would become %s, doing a %s resync.",
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003207 drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET),
3208 abs(hg) >= 2 ? "full" : "bit-map based");
3209 return C_MASK;
3210 }
3211
Philipp Reisnerb411b362009-09-25 16:07:19 -07003212 if (abs(hg) >= 2) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003213 drbd_info(device, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003214 if (drbd_bitmap_io(device, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003215 BM_LOCKED_SET_ALLOWED))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003216 return C_MASK;
3217 }
3218
3219 if (hg > 0) { /* become sync source. */
3220 rv = C_WF_BITMAP_S;
3221 } else if (hg < 0) { /* become sync target */
3222 rv = C_WF_BITMAP_T;
3223 } else {
3224 rv = C_CONNECTED;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003225 if (drbd_bm_total_weight(device)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003226 drbd_info(device, "No resync, but %lu bits in bitmap!\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003227 drbd_bm_total_weight(device));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003228 }
3229 }
3230
3231 return rv;
3232}
3233
Philipp Reisnerf179d762011-05-16 17:31:47 +02003234static enum drbd_after_sb_p convert_after_sb(enum drbd_after_sb_p peer)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003235{
3236 /* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */
Philipp Reisnerf179d762011-05-16 17:31:47 +02003237 if (peer == ASB_DISCARD_REMOTE)
3238 return ASB_DISCARD_LOCAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003239
3240 /* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */
Philipp Reisnerf179d762011-05-16 17:31:47 +02003241 if (peer == ASB_DISCARD_LOCAL)
3242 return ASB_DISCARD_REMOTE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003243
3244 /* everything else is valid if they are equal on both sides. */
Philipp Reisnerf179d762011-05-16 17:31:47 +02003245 return peer;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003246}
3247
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003248static int receive_protocol(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003249{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003250 struct p_protocol *p = pi->data;
Philipp Reisner036b17e2011-05-16 17:38:11 +02003251 enum drbd_after_sb_p p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
3252 int p_proto, p_discard_my_data, p_two_primaries, cf;
3253 struct net_conf *nc, *old_net_conf, *new_net_conf = NULL;
3254 char integrity_alg[SHARED_SECRET_MAX] = "";
Andreas Gruenbacheraccdbcc2011-07-15 17:41:09 +02003255 struct crypto_hash *peer_integrity_tfm = NULL;
Philipp Reisner7aca6c72011-05-17 10:12:56 +02003256 void *int_dig_in = NULL, *int_dig_vv = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003257
Philipp Reisnerb411b362009-09-25 16:07:19 -07003258 p_proto = be32_to_cpu(p->protocol);
3259 p_after_sb_0p = be32_to_cpu(p->after_sb_0p);
3260 p_after_sb_1p = be32_to_cpu(p->after_sb_1p);
3261 p_after_sb_2p = be32_to_cpu(p->after_sb_2p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003262 p_two_primaries = be32_to_cpu(p->two_primaries);
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003263 cf = be32_to_cpu(p->conn_flags);
Andreas Gruenbacher6139f602011-05-06 20:00:02 +02003264 p_discard_my_data = cf & CF_DISCARD_MY_DATA;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003265
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003266 if (connection->agreed_pro_version >= 87) {
Andreas Gruenbacher86db0612011-04-28 15:24:18 +02003267 int err;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003268
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02003269 if (pi->size > sizeof(integrity_alg))
Andreas Gruenbacher86db0612011-04-28 15:24:18 +02003270 return -EIO;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003271 err = drbd_recv_all(connection, integrity_alg, pi->size);
Andreas Gruenbacher86db0612011-04-28 15:24:18 +02003272 if (err)
3273 return err;
Philipp Reisner036b17e2011-05-16 17:38:11 +02003274 integrity_alg[SHARED_SECRET_MAX - 1] = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003275 }
3276
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003277 if (pi->cmd != P_PROTOCOL_UPDATE) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003278 clear_bit(CONN_DRY_RUN, &connection->flags);
Philipp Reisner036b17e2011-05-16 17:38:11 +02003279
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003280 if (cf & CF_DRY_RUN)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003281 set_bit(CONN_DRY_RUN, &connection->flags);
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003282
3283 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003284 nc = rcu_dereference(connection->net_conf);
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003285
3286 if (p_proto != nc->wire_protocol) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003287 drbd_err(connection, "incompatible %s settings\n", "protocol");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003288 goto disconnect_rcu_unlock;
3289 }
3290
3291 if (convert_after_sb(p_after_sb_0p) != nc->after_sb_0p) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003292 drbd_err(connection, "incompatible %s settings\n", "after-sb-0pri");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003293 goto disconnect_rcu_unlock;
3294 }
3295
3296 if (convert_after_sb(p_after_sb_1p) != nc->after_sb_1p) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003297 drbd_err(connection, "incompatible %s settings\n", "after-sb-1pri");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003298 goto disconnect_rcu_unlock;
3299 }
3300
3301 if (convert_after_sb(p_after_sb_2p) != nc->after_sb_2p) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003302 drbd_err(connection, "incompatible %s settings\n", "after-sb-2pri");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003303 goto disconnect_rcu_unlock;
3304 }
3305
3306 if (p_discard_my_data && nc->discard_my_data) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003307 drbd_err(connection, "incompatible %s settings\n", "discard-my-data");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003308 goto disconnect_rcu_unlock;
3309 }
3310
3311 if (p_two_primaries != nc->two_primaries) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003312 drbd_err(connection, "incompatible %s settings\n", "allow-two-primaries");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003313 goto disconnect_rcu_unlock;
3314 }
3315
3316 if (strcmp(integrity_alg, nc->integrity_alg)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003317 drbd_err(connection, "incompatible %s settings\n", "data-integrity-alg");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003318 goto disconnect_rcu_unlock;
3319 }
3320
3321 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07003322 }
3323
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003324 if (integrity_alg[0]) {
3325 int hash_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003326
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003327 /*
3328 * We can only change the peer data integrity algorithm
3329 * here. Changing our own data integrity algorithm
3330 * requires that we send a P_PROTOCOL_UPDATE packet at
3331 * the same time; otherwise, the peer has no way to
3332 * tell between which packets the algorithm should
3333 * change.
3334 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003335
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003336 peer_integrity_tfm = crypto_alloc_hash(integrity_alg, 0, CRYPTO_ALG_ASYNC);
3337 if (!peer_integrity_tfm) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003338 drbd_err(connection, "peer data-integrity-alg %s not supported\n",
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003339 integrity_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003340 goto disconnect;
3341 }
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003342
3343 hash_size = crypto_hash_digestsize(peer_integrity_tfm);
3344 int_dig_in = kmalloc(hash_size, GFP_KERNEL);
3345 int_dig_vv = kmalloc(hash_size, GFP_KERNEL);
3346 if (!(int_dig_in && int_dig_vv)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003347 drbd_err(connection, "Allocation of buffers for data integrity checking failed\n");
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003348 goto disconnect;
3349 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003350 }
3351
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003352 new_net_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL);
3353 if (!new_net_conf) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003354 drbd_err(connection, "Allocation of new net_conf failed\n");
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003355 goto disconnect;
3356 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003357
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003358 mutex_lock(&connection->data.mutex);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003359 mutex_lock(&connection->resource->conf_update);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003360 old_net_conf = connection->net_conf;
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003361 *new_net_conf = *old_net_conf;
3362
3363 new_net_conf->wire_protocol = p_proto;
3364 new_net_conf->after_sb_0p = convert_after_sb(p_after_sb_0p);
3365 new_net_conf->after_sb_1p = convert_after_sb(p_after_sb_1p);
3366 new_net_conf->after_sb_2p = convert_after_sb(p_after_sb_2p);
3367 new_net_conf->two_primaries = p_two_primaries;
3368
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003369 rcu_assign_pointer(connection->net_conf, new_net_conf);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003370 mutex_unlock(&connection->resource->conf_update);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003371 mutex_unlock(&connection->data.mutex);
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003372
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003373 crypto_free_hash(connection->peer_integrity_tfm);
3374 kfree(connection->int_dig_in);
3375 kfree(connection->int_dig_vv);
3376 connection->peer_integrity_tfm = peer_integrity_tfm;
3377 connection->int_dig_in = int_dig_in;
3378 connection->int_dig_vv = int_dig_vv;
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003379
3380 if (strcmp(old_net_conf->integrity_alg, integrity_alg))
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003381 drbd_info(connection, "peer data-integrity-alg: %s\n",
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003382 integrity_alg[0] ? integrity_alg : "(none)");
3383
3384 synchronize_rcu();
3385 kfree(old_net_conf);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003386 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003387
Philipp Reisner44ed1672011-04-19 17:10:19 +02003388disconnect_rcu_unlock:
3389 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07003390disconnect:
Andreas Gruenbacherb792c352011-07-15 16:48:49 +02003391 crypto_free_hash(peer_integrity_tfm);
Philipp Reisner036b17e2011-05-16 17:38:11 +02003392 kfree(int_dig_in);
3393 kfree(int_dig_vv);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003394 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003395 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003396}
3397
3398/* helper function
3399 * input: alg name, feature name
3400 * return: NULL (alg name was "")
3401 * ERR_PTR(error) if something goes wrong
3402 * or the crypto hash ptr, if it worked out ok. */
Lars Ellenberg8ce953a2014-02-27 09:46:18 +01003403static struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_device *device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003404 const char *alg, const char *name)
3405{
3406 struct crypto_hash *tfm;
3407
3408 if (!alg[0])
3409 return NULL;
3410
3411 tfm = crypto_alloc_hash(alg, 0, CRYPTO_ALG_ASYNC);
3412 if (IS_ERR(tfm)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003413 drbd_err(device, "Can not allocate \"%s\" as %s (reason: %ld)\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07003414 alg, name, PTR_ERR(tfm));
3415 return tfm;
3416 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003417 return tfm;
3418}
3419
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003420static int ignore_remaining_packet(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003421{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003422 void *buffer = connection->data.rbuf;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003423 int size = pi->size;
3424
3425 while (size) {
3426 int s = min_t(int, size, DRBD_SOCKET_BUFFER_SIZE);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003427 s = drbd_recv(connection, buffer, s);
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003428 if (s <= 0) {
3429 if (s < 0)
3430 return s;
3431 break;
3432 }
3433 size -= s;
3434 }
3435 if (size)
3436 return -EIO;
3437 return 0;
3438}
3439
3440/*
3441 * config_unknown_volume - device configuration command for unknown volume
3442 *
3443 * When a device is added to an existing connection, the node on which the
3444 * device is added first will send configuration commands to its peer but the
3445 * peer will not know about the device yet. It will warn and ignore these
3446 * commands. Once the device is added on the second node, the second node will
3447 * send the same device configuration commands, but in the other direction.
3448 *
3449 * (We can also end up here if drbd is misconfigured.)
3450 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003451static int config_unknown_volume(struct drbd_connection *connection, struct packet_info *pi)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003452{
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003453 drbd_warn(connection, "%s packet received for volume %u, which is not configured locally\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02003454 cmdname(pi->cmd), pi->vnr);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003455 return ignore_remaining_packet(connection, pi);
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003456}
3457
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003458static int receive_SyncParam(struct drbd_connection *connection, struct packet_info *pi)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003459{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003460 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003461 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003462 struct p_rs_param_95 *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003463 unsigned int header_size, data_size, exp_max_sz;
3464 struct crypto_hash *verify_tfm = NULL;
3465 struct crypto_hash *csums_tfm = NULL;
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003466 struct net_conf *old_net_conf, *new_net_conf = NULL;
Philipp Reisner813472c2011-05-03 16:47:02 +02003467 struct disk_conf *old_disk_conf = NULL, *new_disk_conf = NULL;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003468 const int apv = connection->agreed_pro_version;
Philipp Reisner813472c2011-05-03 16:47:02 +02003469 struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
Philipp Reisner778f2712010-07-06 11:14:00 +02003470 int fifo_size = 0;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003471 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003472
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003473 peer_device = conn_peer_device(connection, pi->vnr);
3474 if (!peer_device)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003475 return config_unknown_volume(connection, pi);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003476 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003477
3478 exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param)
3479 : apv == 88 ? sizeof(struct p_rs_param)
3480 + SHARED_SECRET_MAX
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003481 : apv <= 94 ? sizeof(struct p_rs_param_89)
3482 : /* apv >= 95 */ sizeof(struct p_rs_param_95);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003483
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003484 if (pi->size > exp_max_sz) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003485 drbd_err(device, "SyncParam packet too long: received %u, expected <= %u bytes\n",
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003486 pi->size, exp_max_sz);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003487 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003488 }
3489
3490 if (apv <= 88) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003491 header_size = sizeof(struct p_rs_param);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003492 data_size = pi->size - header_size;
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003493 } else if (apv <= 94) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003494 header_size = sizeof(struct p_rs_param_89);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003495 data_size = pi->size - header_size;
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02003496 D_ASSERT(device, data_size == 0);
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003497 } else {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003498 header_size = sizeof(struct p_rs_param_95);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003499 data_size = pi->size - header_size;
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02003500 D_ASSERT(device, data_size == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003501 }
3502
3503 /* initialize verify_alg and csums_alg */
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003504 p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003505 memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
3506
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003507 err = drbd_recv_all(peer_device->connection, p, header_size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003508 if (err)
3509 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003510
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003511 mutex_lock(&connection->resource->conf_update);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003512 old_net_conf = peer_device->connection->net_conf;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003513 if (get_ldev(device)) {
Philipp Reisner813472c2011-05-03 16:47:02 +02003514 new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3515 if (!new_disk_conf) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003516 put_ldev(device);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003517 mutex_unlock(&connection->resource->conf_update);
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003518 drbd_err(device, "Allocation of new disk_conf failed\n");
Philipp Reisner813472c2011-05-03 16:47:02 +02003519 return -ENOMEM;
3520 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003521
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003522 old_disk_conf = device->ldev->disk_conf;
Philipp Reisner813472c2011-05-03 16:47:02 +02003523 *new_disk_conf = *old_disk_conf;
3524
Andreas Gruenbacher6394b932011-05-11 14:29:52 +02003525 new_disk_conf->resync_rate = be32_to_cpu(p->resync_rate);
Philipp Reisner813472c2011-05-03 16:47:02 +02003526 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003527
3528 if (apv >= 88) {
3529 if (apv == 88) {
Philipp Reisner5de73822012-03-28 10:17:32 +02003530 if (data_size > SHARED_SECRET_MAX || data_size == 0) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003531 drbd_err(device, "verify-alg of wrong size, "
Philipp Reisner5de73822012-03-28 10:17:32 +02003532 "peer wants %u, accepting only up to %u byte\n",
3533 data_size, SHARED_SECRET_MAX);
Philipp Reisner813472c2011-05-03 16:47:02 +02003534 err = -EIO;
3535 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003536 }
3537
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003538 err = drbd_recv_all(peer_device->connection, p->verify_alg, data_size);
Philipp Reisner813472c2011-05-03 16:47:02 +02003539 if (err)
3540 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003541 /* we expect NUL terminated string */
3542 /* but just in case someone tries to be evil */
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02003543 D_ASSERT(device, p->verify_alg[data_size-1] == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003544 p->verify_alg[data_size-1] = 0;
3545
3546 } else /* apv >= 89 */ {
3547 /* we still expect NUL terminated strings */
3548 /* but just in case someone tries to be evil */
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02003549 D_ASSERT(device, p->verify_alg[SHARED_SECRET_MAX-1] == 0);
3550 D_ASSERT(device, p->csums_alg[SHARED_SECRET_MAX-1] == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003551 p->verify_alg[SHARED_SECRET_MAX-1] = 0;
3552 p->csums_alg[SHARED_SECRET_MAX-1] = 0;
3553 }
3554
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003555 if (strcmp(old_net_conf->verify_alg, p->verify_alg)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003556 if (device->state.conn == C_WF_REPORT_PARAMS) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003557 drbd_err(device, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003558 old_net_conf->verify_alg, p->verify_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003559 goto disconnect;
3560 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003561 verify_tfm = drbd_crypto_alloc_digest_safe(device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003562 p->verify_alg, "verify-alg");
3563 if (IS_ERR(verify_tfm)) {
3564 verify_tfm = NULL;
3565 goto disconnect;
3566 }
3567 }
3568
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003569 if (apv >= 89 && strcmp(old_net_conf->csums_alg, p->csums_alg)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003570 if (device->state.conn == C_WF_REPORT_PARAMS) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003571 drbd_err(device, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003572 old_net_conf->csums_alg, p->csums_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003573 goto disconnect;
3574 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003575 csums_tfm = drbd_crypto_alloc_digest_safe(device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003576 p->csums_alg, "csums-alg");
3577 if (IS_ERR(csums_tfm)) {
3578 csums_tfm = NULL;
3579 goto disconnect;
3580 }
3581 }
3582
Philipp Reisner813472c2011-05-03 16:47:02 +02003583 if (apv > 94 && new_disk_conf) {
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003584 new_disk_conf->c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
3585 new_disk_conf->c_delay_target = be32_to_cpu(p->c_delay_target);
3586 new_disk_conf->c_fill_target = be32_to_cpu(p->c_fill_target);
3587 new_disk_conf->c_max_rate = be32_to_cpu(p->c_max_rate);
Philipp Reisner778f2712010-07-06 11:14:00 +02003588
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003589 fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003590 if (fifo_size != device->rs_plan_s->size) {
Philipp Reisner813472c2011-05-03 16:47:02 +02003591 new_plan = fifo_alloc(fifo_size);
3592 if (!new_plan) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003593 drbd_err(device, "kmalloc of fifo_buffer failed");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003594 put_ldev(device);
Philipp Reisner778f2712010-07-06 11:14:00 +02003595 goto disconnect;
3596 }
3597 }
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003598 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003599
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003600 if (verify_tfm || csums_tfm) {
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003601 new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
3602 if (!new_net_conf) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003603 drbd_err(device, "Allocation of new net_conf failed\n");
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003604 goto disconnect;
3605 }
3606
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003607 *new_net_conf = *old_net_conf;
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003608
3609 if (verify_tfm) {
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003610 strcpy(new_net_conf->verify_alg, p->verify_alg);
3611 new_net_conf->verify_alg_len = strlen(p->verify_alg) + 1;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003612 crypto_free_hash(peer_device->connection->verify_tfm);
3613 peer_device->connection->verify_tfm = verify_tfm;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003614 drbd_info(device, "using verify-alg: \"%s\"\n", p->verify_alg);
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003615 }
3616 if (csums_tfm) {
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003617 strcpy(new_net_conf->csums_alg, p->csums_alg);
3618 new_net_conf->csums_alg_len = strlen(p->csums_alg) + 1;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003619 crypto_free_hash(peer_device->connection->csums_tfm);
3620 peer_device->connection->csums_tfm = csums_tfm;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003621 drbd_info(device, "using csums-alg: \"%s\"\n", p->csums_alg);
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003622 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003623 rcu_assign_pointer(connection->net_conf, new_net_conf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003624 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003625 }
3626
Philipp Reisner813472c2011-05-03 16:47:02 +02003627 if (new_disk_conf) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003628 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
3629 put_ldev(device);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003630 }
Philipp Reisner813472c2011-05-03 16:47:02 +02003631
3632 if (new_plan) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003633 old_plan = device->rs_plan_s;
3634 rcu_assign_pointer(device->rs_plan_s, new_plan);
Philipp Reisner813472c2011-05-03 16:47:02 +02003635 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003636
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003637 mutex_unlock(&connection->resource->conf_update);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003638 synchronize_rcu();
3639 if (new_net_conf)
3640 kfree(old_net_conf);
3641 kfree(old_disk_conf);
Philipp Reisner813472c2011-05-03 16:47:02 +02003642 kfree(old_plan);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003643
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003644 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003645
Philipp Reisner813472c2011-05-03 16:47:02 +02003646reconnect:
3647 if (new_disk_conf) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003648 put_ldev(device);
Philipp Reisner813472c2011-05-03 16:47:02 +02003649 kfree(new_disk_conf);
3650 }
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003651 mutex_unlock(&connection->resource->conf_update);
Philipp Reisner813472c2011-05-03 16:47:02 +02003652 return -EIO;
3653
Philipp Reisnerb411b362009-09-25 16:07:19 -07003654disconnect:
Philipp Reisner813472c2011-05-03 16:47:02 +02003655 kfree(new_plan);
3656 if (new_disk_conf) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003657 put_ldev(device);
Philipp Reisner813472c2011-05-03 16:47:02 +02003658 kfree(new_disk_conf);
3659 }
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003660 mutex_unlock(&connection->resource->conf_update);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003661 /* just for completeness: actually not needed,
3662 * as this is not reached if csums_tfm was ok. */
3663 crypto_free_hash(csums_tfm);
3664 /* but free the verify_tfm again, if csums_tfm did not work out */
3665 crypto_free_hash(verify_tfm);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003666 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003667 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003668}
3669
Philipp Reisnerb411b362009-09-25 16:07:19 -07003670/* warn if the arguments differ by more than 12.5% */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003671static void warn_if_differ_considerably(struct drbd_device *device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003672 const char *s, sector_t a, sector_t b)
3673{
3674 sector_t d;
3675 if (a == 0 || b == 0)
3676 return;
3677 d = (a > b) ? (a - b) : (b - a);
3678 if (d > (a>>3) || d > (b>>3))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003679 drbd_warn(device, "Considerable difference in %s: %llus vs. %llus\n", s,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003680 (unsigned long long)a, (unsigned long long)b);
3681}
3682
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003683static int receive_sizes(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003684{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003685 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003686 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003687 struct p_sizes *p = pi->data;
Philipp Reisnere96c9632013-06-25 16:50:07 +02003688 enum determine_dev_size dd = DS_UNCHANGED;
Lars Ellenberg6a8d68b2014-03-18 12:22:14 +01003689 sector_t p_size, p_usize, p_csize, my_usize;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003690 int ldsc = 0; /* local disk size changed */
Philipp Reisnere89b5912010-03-24 17:11:33 +01003691 enum dds_flags ddsf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003692
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003693 peer_device = conn_peer_device(connection, pi->vnr);
3694 if (!peer_device)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003695 return config_unknown_volume(connection, pi);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003696 device = peer_device->device;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003697
Philipp Reisnerb411b362009-09-25 16:07:19 -07003698 p_size = be64_to_cpu(p->d_size);
3699 p_usize = be64_to_cpu(p->u_size);
Lars Ellenberg6a8d68b2014-03-18 12:22:14 +01003700 p_csize = be64_to_cpu(p->c_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003701
Philipp Reisnerb411b362009-09-25 16:07:19 -07003702 /* just store the peer's disk size for now.
3703 * we still need to figure out whether we accept that. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003704 device->p_size = p_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003705
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003706 if (get_ldev(device)) {
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003707 rcu_read_lock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003708 my_usize = rcu_dereference(device->ldev->disk_conf)->disk_size;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003709 rcu_read_unlock();
3710
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003711 warn_if_differ_considerably(device, "lower level device sizes",
3712 p_size, drbd_get_max_capacity(device->ldev));
3713 warn_if_differ_considerably(device, "user requested size",
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003714 p_usize, my_usize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003715
3716 /* if this is the first connect, or an otherwise expected
3717 * param exchange, choose the minimum */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003718 if (device->state.conn == C_WF_REPORT_PARAMS)
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003719 p_usize = min_not_zero(my_usize, p_usize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003720
3721 /* Never shrink a device with usable data during connect.
3722 But allow online shrinking if we are connected. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003723 if (drbd_new_dev_size(device, device->ldev, p_usize, 0) <
3724 drbd_get_capacity(device->this_bdev) &&
3725 device->state.disk >= D_OUTDATED &&
3726 device->state.conn < C_CONNECTED) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003727 drbd_err(device, "The peer's disk size is too small!\n");
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003728 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003729 put_ldev(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003730 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003731 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003732
3733 if (my_usize != p_usize) {
3734 struct disk_conf *old_disk_conf, *new_disk_conf = NULL;
3735
3736 new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3737 if (!new_disk_conf) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003738 drbd_err(device, "Allocation of new disk_conf failed\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003739 put_ldev(device);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003740 return -ENOMEM;
3741 }
3742
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003743 mutex_lock(&connection->resource->conf_update);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003744 old_disk_conf = device->ldev->disk_conf;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003745 *new_disk_conf = *old_disk_conf;
3746 new_disk_conf->disk_size = p_usize;
3747
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003748 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003749 mutex_unlock(&connection->resource->conf_update);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003750 synchronize_rcu();
3751 kfree(old_disk_conf);
3752
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003753 drbd_info(device, "Peer sets u_size to %lu sectors\n",
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003754 (unsigned long)my_usize);
3755 }
3756
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003757 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003758 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003759
Lars Ellenberg20c68fd2014-04-28 18:43:25 +02003760 device->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
Lars Ellenberg20c68fd2014-04-28 18:43:25 +02003761 /* Leave drbd_reconsider_max_bio_size() before drbd_determine_dev_size().
3762 In case we cleared the QUEUE_FLAG_DISCARD from our queue in
3763 drbd_reconsider_max_bio_size(), we can be sure that after
3764 drbd_determine_dev_size() no REQ_DISCARDs are in the queue. */
3765
Philipp Reisnere89b5912010-03-24 17:11:33 +01003766 ddsf = be16_to_cpu(p->dds_flags);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003767 if (get_ldev(device)) {
Philipp Reisner8fe39aa2013-11-22 13:22:13 +01003768 drbd_reconsider_max_bio_size(device, device->ldev);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003769 dd = drbd_determine_dev_size(device, ddsf, NULL);
3770 put_ldev(device);
Philipp Reisnere96c9632013-06-25 16:50:07 +02003771 if (dd == DS_ERROR)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003772 return -EIO;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003773 drbd_md_sync(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003774 } else {
Lars Ellenberg6a8d68b2014-03-18 12:22:14 +01003775 /*
3776 * I am diskless, need to accept the peer's *current* size.
3777 * I must NOT accept the peers backing disk size,
3778 * it may have been larger than mine all along...
3779 *
3780 * At this point, the peer knows more about my disk, or at
3781 * least about what we last agreed upon, than myself.
3782 * So if his c_size is less than his d_size, the most likely
3783 * reason is that *my* d_size was smaller last time we checked.
3784 *
3785 * However, if he sends a zero current size,
3786 * take his (user-capped or) backing disk size anyways.
3787 */
Philipp Reisner8fe39aa2013-11-22 13:22:13 +01003788 drbd_reconsider_max_bio_size(device, NULL);
Lars Ellenberg6a8d68b2014-03-18 12:22:14 +01003789 drbd_set_my_capacity(device, p_csize ?: p_usize ?: p_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003790 }
3791
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003792 if (get_ldev(device)) {
3793 if (device->ldev->known_size != drbd_get_capacity(device->ldev->backing_bdev)) {
3794 device->ldev->known_size = drbd_get_capacity(device->ldev->backing_bdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003795 ldsc = 1;
3796 }
3797
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003798 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003799 }
3800
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003801 if (device->state.conn > C_WF_REPORT_PARAMS) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003802 if (be64_to_cpu(p->c_size) !=
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003803 drbd_get_capacity(device->this_bdev) || ldsc) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003804 /* we have different sizes, probably peer
3805 * needs to know my new size... */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003806 drbd_send_sizes(peer_device, 0, ddsf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003807 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003808 if (test_and_clear_bit(RESIZE_PENDING, &device->flags) ||
3809 (dd == DS_GREW && device->state.conn == C_CONNECTED)) {
3810 if (device->state.pdsk >= D_INCONSISTENT &&
3811 device->state.disk >= D_INCONSISTENT) {
Philipp Reisnere89b5912010-03-24 17:11:33 +01003812 if (ddsf & DDSF_NO_RESYNC)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003813 drbd_info(device, "Resync of new storage suppressed with --assume-clean\n");
Philipp Reisnere89b5912010-03-24 17:11:33 +01003814 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003815 resync_after_online_grow(device);
Philipp Reisnere89b5912010-03-24 17:11:33 +01003816 } else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003817 set_bit(RESYNC_AFTER_NEG, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003818 }
3819 }
3820
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003821 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003822}
3823
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003824static int receive_uuids(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003825{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003826 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003827 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003828 struct p_uuids *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003829 u64 *p_uuid;
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003830 int i, updated_uuids = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003831
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003832 peer_device = conn_peer_device(connection, pi->vnr);
3833 if (!peer_device)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003834 return config_unknown_volume(connection, pi);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003835 device = peer_device->device;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003836
Philipp Reisnerb411b362009-09-25 16:07:19 -07003837 p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO);
Jing Wang063eacf2012-10-25 15:00:56 +08003838 if (!p_uuid) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003839 drbd_err(device, "kmalloc of p_uuid failed\n");
Jing Wang063eacf2012-10-25 15:00:56 +08003840 return false;
3841 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003842
3843 for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
3844 p_uuid[i] = be64_to_cpu(p->uuid[i]);
3845
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003846 kfree(device->p_uuid);
3847 device->p_uuid = p_uuid;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003848
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003849 if (device->state.conn < C_CONNECTED &&
3850 device->state.disk < D_INCONSISTENT &&
3851 device->state.role == R_PRIMARY &&
3852 (device->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003853 drbd_err(device, "Can only connect to data with current UUID=%016llX\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003854 (unsigned long long)device->ed_uuid);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003855 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003856 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003857 }
3858
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003859 if (get_ldev(device)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003860 int skip_initial_sync =
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003861 device->state.conn == C_CONNECTED &&
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003862 peer_device->connection->agreed_pro_version >= 90 &&
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003863 device->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003864 (p_uuid[UI_FLAGS] & 8);
3865 if (skip_initial_sync) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003866 drbd_info(device, "Accepted new current UUID, preparing to skip initial sync\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003867 drbd_bitmap_io(device, &drbd_bmio_clear_n_write,
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003868 "clear_n_write from receive_uuids",
3869 BM_LOCKED_TEST_ALLOWED);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003870 _drbd_uuid_set(device, UI_CURRENT, p_uuid[UI_CURRENT]);
3871 _drbd_uuid_set(device, UI_BITMAP, 0);
3872 _drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
Philipp Reisnerb411b362009-09-25 16:07:19 -07003873 CS_VERBOSE, NULL);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003874 drbd_md_sync(device);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003875 updated_uuids = 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003876 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003877 put_ldev(device);
3878 } else if (device->state.disk < D_INCONSISTENT &&
3879 device->state.role == R_PRIMARY) {
Philipp Reisner18a50fa2010-06-21 14:14:15 +02003880 /* I am a diskless primary, the peer just created a new current UUID
3881 for me. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003882 updated_uuids = drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003883 }
3884
3885 /* Before we test for the disk state, we should wait until an eventually
3886 ongoing cluster wide state change is finished. That is important if
3887 we are primary and are detaching from our disk. We need to see the
3888 new disk state... */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003889 mutex_lock(device->state_mutex);
3890 mutex_unlock(device->state_mutex);
3891 if (device->state.conn >= C_CONNECTED && device->state.disk < D_INCONSISTENT)
3892 updated_uuids |= drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003893
3894 if (updated_uuids)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003895 drbd_print_uuids(device, "receiver updated UUIDs to");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003896
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003897 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003898}
3899
3900/**
3901 * convert_state() - Converts the peer's view of the cluster state to our point of view
3902 * @ps: The state as seen by the peer.
3903 */
3904static union drbd_state convert_state(union drbd_state ps)
3905{
3906 union drbd_state ms;
3907
3908 static enum drbd_conns c_tab[] = {
Philipp Reisner369bea62011-07-06 23:04:44 +02003909 [C_WF_REPORT_PARAMS] = C_WF_REPORT_PARAMS,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003910 [C_CONNECTED] = C_CONNECTED,
3911
3912 [C_STARTING_SYNC_S] = C_STARTING_SYNC_T,
3913 [C_STARTING_SYNC_T] = C_STARTING_SYNC_S,
3914 [C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */
3915 [C_VERIFY_S] = C_VERIFY_T,
3916 [C_MASK] = C_MASK,
3917 };
3918
3919 ms.i = ps.i;
3920
3921 ms.conn = c_tab[ps.conn];
3922 ms.peer = ps.role;
3923 ms.role = ps.peer;
3924 ms.pdsk = ps.disk;
3925 ms.disk = ps.pdsk;
3926 ms.peer_isp = (ps.aftr_isp | ps.user_isp);
3927
3928 return ms;
3929}
3930
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003931static int receive_req_state(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003932{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003933 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003934 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003935 struct p_req_state *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003936 union drbd_state mask, val;
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +01003937 enum drbd_state_rv rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003938
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003939 peer_device = conn_peer_device(connection, pi->vnr);
3940 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003941 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003942 device = peer_device->device;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003943
Philipp Reisnerb411b362009-09-25 16:07:19 -07003944 mask.i = be32_to_cpu(p->mask);
3945 val.i = be32_to_cpu(p->val);
3946
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003947 if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags) &&
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003948 mutex_is_locked(device->state_mutex)) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003949 drbd_send_sr_reply(peer_device, SS_CONCURRENT_ST_CHG);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003950 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003951 }
3952
3953 mask = convert_state(mask);
3954 val = convert_state(val);
3955
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003956 rv = drbd_change_state(device, CS_VERBOSE, mask, val);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003957 drbd_send_sr_reply(peer_device, rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003958
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003959 drbd_md_sync(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003960
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003961 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003962}
3963
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003964static int receive_req_conn_state(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003965{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003966 struct p_req_state *p = pi->data;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003967 union drbd_state mask, val;
3968 enum drbd_state_rv rv;
3969
3970 mask.i = be32_to_cpu(p->mask);
3971 val.i = be32_to_cpu(p->val);
3972
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003973 if (test_bit(RESOLVE_CONFLICTS, &connection->flags) &&
3974 mutex_is_locked(&connection->cstate_mutex)) {
3975 conn_send_sr_reply(connection, SS_CONCURRENT_ST_CHG);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003976 return 0;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003977 }
3978
3979 mask = convert_state(mask);
3980 val = convert_state(val);
3981
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003982 rv = conn_request_state(connection, mask, val, CS_VERBOSE | CS_LOCAL_ONLY | CS_IGN_OUTD_FAIL);
3983 conn_send_sr_reply(connection, rv);
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003984
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003985 return 0;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003986}
3987
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003988static int receive_state(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003989{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003990 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003991 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003992 struct p_state *p = pi->data;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003993 union drbd_state os, ns, peer_state;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003994 enum drbd_disk_state real_peer_disk;
Philipp Reisner65d922c2010-06-16 16:18:09 +02003995 enum chg_state_flags cs_flags;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003996 int rv;
3997
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003998 peer_device = conn_peer_device(connection, pi->vnr);
3999 if (!peer_device)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004000 return config_unknown_volume(connection, pi);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004001 device = peer_device->device;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004002
Philipp Reisnerb411b362009-09-25 16:07:19 -07004003 peer_state.i = be32_to_cpu(p->state);
4004
4005 real_peer_disk = peer_state.disk;
4006 if (peer_state.disk == D_NEGOTIATING) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004007 real_peer_disk = device->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004008 drbd_info(device, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004009 }
4010
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004011 spin_lock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004012 retry:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004013 os = ns = drbd_read_state(device);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004014 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004015
Lars Ellenberg545752d2011-12-05 14:39:25 +01004016 /* If some other part of the code (asender thread, timeout)
4017 * already decided to close the connection again,
4018 * we must not "re-establish" it here. */
4019 if (os.conn <= C_TEAR_DOWN)
Lars Ellenberg58ffa582012-07-26 14:09:49 +02004020 return -ECONNRESET;
Lars Ellenberg545752d2011-12-05 14:39:25 +01004021
Lars Ellenberg40424e42011-09-26 15:24:56 +02004022 /* If this is the "end of sync" confirmation, usually the peer disk
4023 * transitions from D_INCONSISTENT to D_UP_TO_DATE. For empty (0 bits
4024 * set) resync started in PausedSyncT, or if the timing of pause-/
4025 * unpause-sync events has been "just right", the peer disk may
4026 * transition from D_CONSISTENT to D_UP_TO_DATE as well.
4027 */
4028 if ((os.pdsk == D_INCONSISTENT || os.pdsk == D_CONSISTENT) &&
4029 real_peer_disk == D_UP_TO_DATE &&
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02004030 os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
4031 /* If we are (becoming) SyncSource, but peer is still in sync
4032 * preparation, ignore its uptodate-ness to avoid flapping, it
4033 * will change to inconsistent once the peer reaches active
4034 * syncing states.
4035 * It may have changed syncer-paused flags, however, so we
4036 * cannot ignore this completely. */
4037 if (peer_state.conn > C_CONNECTED &&
4038 peer_state.conn < C_SYNC_SOURCE)
4039 real_peer_disk = D_INCONSISTENT;
4040
4041 /* if peer_state changes to connected at the same time,
4042 * it explicitly notifies us that it finished resync.
4043 * Maybe we should finish it up, too? */
4044 else if (os.conn >= C_SYNC_SOURCE &&
4045 peer_state.conn == C_CONNECTED) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004046 if (drbd_bm_total_weight(device) <= device->rs_failed)
4047 drbd_resync_finished(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004048 return 0;
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02004049 }
4050 }
4051
Lars Ellenberg02b91b52012-06-28 18:26:52 +02004052 /* explicit verify finished notification, stop sector reached. */
4053 if (os.conn == C_VERIFY_T && os.disk == D_UP_TO_DATE &&
4054 peer_state.conn == C_CONNECTED && real_peer_disk == D_UP_TO_DATE) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004055 ov_out_of_sync_print(device);
4056 drbd_resync_finished(device);
Lars Ellenberg58ffa582012-07-26 14:09:49 +02004057 return 0;
Lars Ellenberg02b91b52012-06-28 18:26:52 +02004058 }
4059
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02004060 /* peer says his disk is inconsistent, while we think it is uptodate,
4061 * and this happens while the peer still thinks we have a sync going on,
4062 * but we think we are already done with the sync.
4063 * We ignore this to avoid flapping pdsk.
4064 * This should not happen, if the peer is a recent version of drbd. */
4065 if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT &&
4066 os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE)
4067 real_peer_disk = D_UP_TO_DATE;
4068
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004069 if (ns.conn == C_WF_REPORT_PARAMS)
4070 ns.conn = C_CONNECTED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004071
Philipp Reisner67531712010-10-27 12:21:30 +02004072 if (peer_state.conn == C_AHEAD)
4073 ns.conn = C_BEHIND;
4074
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004075 if (device->p_uuid && peer_state.disk >= D_NEGOTIATING &&
4076 get_ldev_if_state(device, D_NEGOTIATING)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004077 int cr; /* consider resync */
4078
4079 /* if we established a new connection */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004080 cr = (os.conn < C_CONNECTED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004081 /* if we had an established connection
4082 * and one of the nodes newly attaches a disk */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004083 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07004084 (peer_state.disk == D_NEGOTIATING ||
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004085 os.disk == D_NEGOTIATING));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004086 /* if we have both been inconsistent, and the peer has been
4087 * forced to be UpToDate with --overwrite-data */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004088 cr |= test_bit(CONSIDER_RESYNC, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004089 /* if we had been plain connected, and the admin requested to
4090 * start a sync by "invalidate" or "invalidate-remote" */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004091 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07004092 (peer_state.conn >= C_STARTING_SYNC_S &&
4093 peer_state.conn <= C_WF_BITMAP_T));
4094
4095 if (cr)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004096 ns.conn = drbd_sync_handshake(peer_device, peer_state.role, real_peer_disk);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004097
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004098 put_ldev(device);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004099 if (ns.conn == C_MASK) {
4100 ns.conn = C_CONNECTED;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004101 if (device->state.disk == D_NEGOTIATING) {
4102 drbd_force_state(device, NS(disk, D_FAILED));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004103 } else if (peer_state.disk == D_NEGOTIATING) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004104 drbd_err(device, "Disk attach process on the peer node was aborted.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004105 peer_state.disk = D_DISKLESS;
Lars Ellenberg580b9762010-02-26 23:15:23 +01004106 real_peer_disk = D_DISKLESS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004107 } else {
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004108 if (test_and_clear_bit(CONN_DRY_RUN, &peer_device->connection->flags))
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004109 return -EIO;
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02004110 D_ASSERT(device, os.conn == C_WF_REPORT_PARAMS);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004111 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004112 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004113 }
4114 }
4115 }
4116
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004117 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004118 if (os.i != drbd_read_state(device).i)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004119 goto retry;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004120 clear_bit(CONSIDER_RESYNC, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004121 ns.peer = peer_state.role;
4122 ns.pdsk = real_peer_disk;
4123 ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004124 if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004125 ns.disk = device->new_state_tmp.disk;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004126 cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004127 if (ns.pdsk == D_CONSISTENT && drbd_suspended(device) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
4128 test_bit(NEW_CUR_UUID, &device->flags)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004129 /* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this
Philipp Reisner481c6f52010-06-22 14:03:27 +02004130 for temporal network outages! */
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004131 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004132 drbd_err(device, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004133 tl_clear(peer_device->connection);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004134 drbd_uuid_new_current(device);
4135 clear_bit(NEW_CUR_UUID, &device->flags);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004136 conn_request_state(peer_device->connection, NS2(conn, C_PROTOCOL_ERROR, susp, 0), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004137 return -EIO;
Philipp Reisner481c6f52010-06-22 14:03:27 +02004138 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004139 rv = _drbd_set_state(device, ns, cs_flags, NULL);
4140 ns = drbd_read_state(device);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004141 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004142
4143 if (rv < SS_SUCCESS) {
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004144 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004145 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004146 }
4147
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004148 if (os.conn > C_WF_REPORT_PARAMS) {
4149 if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07004150 peer_state.disk != D_NEGOTIATING ) {
4151 /* we want resync, peer has not yet decided to sync... */
4152 /* Nowadays only used when forcing a node into primary role and
4153 setting its disk to UpToDate with that */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004154 drbd_send_uuids(peer_device);
4155 drbd_send_current_state(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004156 }
4157 }
4158
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004159 clear_bit(DISCARD_MY_DATA, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004160
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004161 drbd_md_sync(device); /* update connected indicator, la_size_sect, ... */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004162
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004163 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004164}
4165
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004166static int receive_sync_uuid(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004167{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004168 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004169 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004170 struct p_rs_uuid *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004171
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004172 peer_device = conn_peer_device(connection, pi->vnr);
4173 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004174 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004175 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004176
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004177 wait_event(device->misc_wait,
4178 device->state.conn == C_WF_SYNC_UUID ||
4179 device->state.conn == C_BEHIND ||
4180 device->state.conn < C_CONNECTED ||
4181 device->state.disk < D_NEGOTIATING);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004182
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02004183 /* D_ASSERT(device, device->state.conn == C_WF_SYNC_UUID ); */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004184
Philipp Reisnerb411b362009-09-25 16:07:19 -07004185 /* Here the _drbd_uuid_ functions are right, current should
4186 _not_ be rotated into the history */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004187 if (get_ldev_if_state(device, D_NEGOTIATING)) {
4188 _drbd_uuid_set(device, UI_CURRENT, be64_to_cpu(p->uuid));
4189 _drbd_uuid_set(device, UI_BITMAP, 0UL);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004190
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004191 drbd_print_uuids(device, "updated sync uuid");
4192 drbd_start_resync(device, C_SYNC_TARGET);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004193
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004194 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004195 } else
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004196 drbd_err(device, "Ignoring SyncUUID packet!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004197
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004198 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004199}
4200
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004201/**
4202 * receive_bitmap_plain
4203 *
4204 * Return 0 when done, 1 when another iteration is needed, and a negative error
4205 * code upon failure.
4206 */
4207static int
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004208receive_bitmap_plain(struct drbd_peer_device *peer_device, unsigned int size,
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004209 unsigned long *p, struct bm_xfer_ctx *c)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004210{
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004211 unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE -
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004212 drbd_header_size(peer_device->connection);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004213 unsigned int num_words = min_t(size_t, data_size / sizeof(*p),
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004214 c->bm_words - c->word_offset);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004215 unsigned int want = num_words * sizeof(*p);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004216 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004217
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004218 if (want != size) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004219 drbd_err(peer_device, "%s:want (%u) != size (%u)\n", __func__, want, size);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004220 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004221 }
4222 if (want == 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004223 return 0;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004224 err = drbd_recv_all(peer_device->connection, p, want);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004225 if (err)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004226 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004227
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004228 drbd_bm_merge_lel(peer_device->device, c->word_offset, num_words, p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004229
4230 c->word_offset += num_words;
4231 c->bit_offset = c->word_offset * BITS_PER_LONG;
4232 if (c->bit_offset > c->bm_bits)
4233 c->bit_offset = c->bm_bits;
4234
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004235 return 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004236}
4237
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004238static enum drbd_bitmap_code dcbp_get_code(struct p_compressed_bm *p)
4239{
4240 return (enum drbd_bitmap_code)(p->encoding & 0x0f);
4241}
4242
4243static int dcbp_get_start(struct p_compressed_bm *p)
4244{
4245 return (p->encoding & 0x80) != 0;
4246}
4247
4248static int dcbp_get_pad_bits(struct p_compressed_bm *p)
4249{
4250 return (p->encoding >> 4) & 0x7;
4251}
4252
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004253/**
4254 * recv_bm_rle_bits
4255 *
4256 * Return 0 when done, 1 when another iteration is needed, and a negative error
4257 * code upon failure.
4258 */
4259static int
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004260recv_bm_rle_bits(struct drbd_peer_device *peer_device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07004261 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01004262 struct bm_xfer_ctx *c,
4263 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004264{
4265 struct bitstream bs;
4266 u64 look_ahead;
4267 u64 rl;
4268 u64 tmp;
4269 unsigned long s = c->bit_offset;
4270 unsigned long e;
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004271 int toggle = dcbp_get_start(p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004272 int have;
4273 int bits;
4274
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004275 bitstream_init(&bs, p->code, len, dcbp_get_pad_bits(p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004276
4277 bits = bitstream_get_bits(&bs, &look_ahead, 64);
4278 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004279 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004280
4281 for (have = bits; have > 0; s += rl, toggle = !toggle) {
4282 bits = vli_decode_bits(&rl, look_ahead);
4283 if (bits <= 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004284 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004285
4286 if (toggle) {
4287 e = s + rl -1;
4288 if (e >= c->bm_bits) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004289 drbd_err(peer_device, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004290 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004291 }
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004292 _drbd_bm_set_bits(peer_device->device, s, e);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004293 }
4294
4295 if (have < bits) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004296 drbd_err(peer_device, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07004297 have, bits, look_ahead,
4298 (unsigned int)(bs.cur.b - p->code),
4299 (unsigned int)bs.buf_len);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004300 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004301 }
Lars Ellenbergd2da5b02013-10-23 10:59:18 +02004302 /* if we consumed all 64 bits, assign 0; >> 64 is "undefined"; */
4303 if (likely(bits < 64))
4304 look_ahead >>= bits;
4305 else
4306 look_ahead = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004307 have -= bits;
4308
4309 bits = bitstream_get_bits(&bs, &tmp, 64 - have);
4310 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004311 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004312 look_ahead |= tmp << have;
4313 have += bits;
4314 }
4315
4316 c->bit_offset = s;
4317 bm_xfer_ctx_bit_to_word_offset(c);
4318
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004319 return (s != c->bm_bits);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004320}
4321
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004322/**
4323 * decode_bitmap_c
4324 *
4325 * Return 0 when done, 1 when another iteration is needed, and a negative error
4326 * code upon failure.
4327 */
4328static int
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004329decode_bitmap_c(struct drbd_peer_device *peer_device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07004330 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01004331 struct bm_xfer_ctx *c,
4332 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004333{
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004334 if (dcbp_get_code(p) == RLE_VLI_Bits)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004335 return recv_bm_rle_bits(peer_device, p, c, len - sizeof(*p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004336
4337 /* other variants had been implemented for evaluation,
4338 * but have been dropped as this one turned out to be "best"
4339 * during all our tests. */
4340
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004341 drbd_err(peer_device, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
4342 conn_request_state(peer_device->connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004343 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004344}
4345
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004346void INFO_bm_xfer_stats(struct drbd_device *device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07004347 const char *direction, struct bm_xfer_ctx *c)
4348{
4349 /* what would it take to transfer it "plaintext" */
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02004350 unsigned int header_size = drbd_header_size(first_peer_device(device)->connection);
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004351 unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE - header_size;
4352 unsigned int plain =
4353 header_size * (DIV_ROUND_UP(c->bm_words, data_size) + 1) +
4354 c->bm_words * sizeof(unsigned long);
4355 unsigned int total = c->bytes[0] + c->bytes[1];
4356 unsigned int r;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004357
4358 /* total can not be zero. but just in case: */
4359 if (total == 0)
4360 return;
4361
4362 /* don't report if not compressed */
4363 if (total >= plain)
4364 return;
4365
4366 /* total < plain. check for overflow, still */
4367 r = (total > UINT_MAX/1000) ? (total / (plain/1000))
4368 : (1000 * total / plain);
4369
4370 if (r > 1000)
4371 r = 1000;
4372
4373 r = 1000 - r;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004374 drbd_info(device, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
Philipp Reisnerb411b362009-09-25 16:07:19 -07004375 "total %u; compression: %u.%u%%\n",
4376 direction,
4377 c->bytes[1], c->packets[1],
4378 c->bytes[0], c->packets[0],
4379 total, r/10, r % 10);
4380}
4381
4382/* Since we are processing the bitfield from lower addresses to higher,
4383 it does not matter if the process it in 32 bit chunks or 64 bit
4384 chunks as long as it is little endian. (Understand it as byte stream,
4385 beginning with the lowest byte...) If we would use big endian
4386 we would need to process it from the highest address to the lowest,
4387 in order to be agnostic to the 32 vs 64 bits issue.
4388
4389 returns 0 on failure, 1 if we successfully received it. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004390static int receive_bitmap(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004391{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004392 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004393 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004394 struct bm_xfer_ctx c;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004395 int err;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004396
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004397 peer_device = conn_peer_device(connection, pi->vnr);
4398 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004399 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004400 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004401
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004402 drbd_bm_lock(device, "receive bitmap", BM_LOCKED_SET_ALLOWED);
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004403 /* you are supposed to send additional out-of-sync information
4404 * if you actually set bits during this phase */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004405
Philipp Reisnerb411b362009-09-25 16:07:19 -07004406 c = (struct bm_xfer_ctx) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004407 .bm_bits = drbd_bm_bits(device),
4408 .bm_words = drbd_bm_words(device),
Philipp Reisnerb411b362009-09-25 16:07:19 -07004409 };
4410
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004411 for(;;) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004412 if (pi->cmd == P_BITMAP)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004413 err = receive_bitmap_plain(peer_device, pi->size, pi->data, &c);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004414 else if (pi->cmd == P_COMPRESSED_BITMAP) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004415 /* MAYBE: sanity check that we speak proto >= 90,
4416 * and the feature is enabled! */
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004417 struct p_compressed_bm *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004418
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004419 if (pi->size > DRBD_SOCKET_BUFFER_SIZE - drbd_header_size(connection)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004420 drbd_err(device, "ReportCBitmap packet too large\n");
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004421 err = -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004422 goto out;
4423 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004424 if (pi->size <= sizeof(*p)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004425 drbd_err(device, "ReportCBitmap packet too small (l:%u)\n", pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004426 err = -EIO;
Andreas Gruenbacher78fcbda2010-12-10 22:18:27 +01004427 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004428 }
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004429 err = drbd_recv_all(peer_device->connection, p, pi->size);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004430 if (err)
4431 goto out;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004432 err = decode_bitmap_c(peer_device, p, &c, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004433 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004434 drbd_warn(device, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", pi->cmd);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004435 err = -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004436 goto out;
4437 }
4438
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004439 c.packets[pi->cmd == P_BITMAP]++;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004440 c.bytes[pi->cmd == P_BITMAP] += drbd_header_size(connection) + pi->size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004441
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004442 if (err <= 0) {
4443 if (err < 0)
4444 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004445 break;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004446 }
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004447 err = drbd_recv_header(peer_device->connection, pi);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004448 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004449 goto out;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004450 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004451
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004452 INFO_bm_xfer_stats(device, "receive", &c);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004453
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004454 if (device->state.conn == C_WF_BITMAP_T) {
Andreas Gruenbacherde1f8e42010-12-10 21:04:00 +01004455 enum drbd_state_rv rv;
4456
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004457 err = drbd_send_bitmap(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004458 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004459 goto out;
4460 /* Omit CS_ORDERED with this state transition to avoid deadlocks. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004461 rv = _drbd_request_state(device, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02004462 D_ASSERT(device, rv == SS_SUCCESS);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004463 } else if (device->state.conn != C_WF_BITMAP_S) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004464 /* admin may have requested C_DISCONNECTING,
4465 * other threads may have noticed network errors */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004466 drbd_info(device, "unexpected cstate (%s) in receive_bitmap\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004467 drbd_conn_str(device->state.conn));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004468 }
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004469 err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004470
Philipp Reisnerb411b362009-09-25 16:07:19 -07004471 out:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004472 drbd_bm_unlock(device);
4473 if (!err && device->state.conn == C_WF_BITMAP_S)
4474 drbd_start_resync(device, C_SYNC_SOURCE);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004475 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004476}
4477
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004478static int receive_skip(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004479{
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004480 drbd_warn(connection, "skipping unknown optional packet type %d, l: %d!\n",
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004481 pi->cmd, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004482
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004483 return ignore_remaining_packet(connection, pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004484}
4485
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004486static int receive_UnplugRemote(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004487{
Philipp Reisnerb411b362009-09-25 16:07:19 -07004488 /* Make sure we've acked all the TCP data associated
4489 * with the data requests being unplugged */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004490 drbd_tcp_quickack(connection->data.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004491
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004492 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004493}
4494
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004495static int receive_out_of_sync(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisner73a01a12010-10-27 14:33:00 +02004496{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004497 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004498 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004499 struct p_block_desc *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004500
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004501 peer_device = conn_peer_device(connection, pi->vnr);
4502 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004503 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004504 device = peer_device->device;
Philipp Reisner73a01a12010-10-27 14:33:00 +02004505
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004506 switch (device->state.conn) {
Lars Ellenbergf735e3632010-12-17 21:06:18 +01004507 case C_WF_SYNC_UUID:
4508 case C_WF_BITMAP_T:
4509 case C_BEHIND:
4510 break;
4511 default:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004512 drbd_err(device, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004513 drbd_conn_str(device->state.conn));
Lars Ellenbergf735e3632010-12-17 21:06:18 +01004514 }
4515
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004516 drbd_set_out_of_sync(device, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
Philipp Reisner73a01a12010-10-27 14:33:00 +02004517
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004518 return 0;
Philipp Reisner73a01a12010-10-27 14:33:00 +02004519}
4520
Philipp Reisner02918be2010-08-20 14:35:10 +02004521struct data_cmd {
4522 int expect_payload;
4523 size_t pkt_size;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004524 int (*fn)(struct drbd_connection *, struct packet_info *);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004525};
4526
Philipp Reisner02918be2010-08-20 14:35:10 +02004527static struct data_cmd drbd_cmd_handler[] = {
4528 [P_DATA] = { 1, sizeof(struct p_data), receive_Data },
4529 [P_DATA_REPLY] = { 1, sizeof(struct p_data), receive_DataReply },
4530 [P_RS_DATA_REPLY] = { 1, sizeof(struct p_data), receive_RSDataReply } ,
4531 [P_BARRIER] = { 0, sizeof(struct p_barrier), receive_Barrier } ,
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004532 [P_BITMAP] = { 1, 0, receive_bitmap } ,
4533 [P_COMPRESSED_BITMAP] = { 1, 0, receive_bitmap } ,
4534 [P_UNPLUG_REMOTE] = { 0, 0, receive_UnplugRemote },
Philipp Reisner02918be2010-08-20 14:35:10 +02004535 [P_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
4536 [P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004537 [P_SYNC_PARAM] = { 1, 0, receive_SyncParam },
4538 [P_SYNC_PARAM89] = { 1, 0, receive_SyncParam },
Philipp Reisner02918be2010-08-20 14:35:10 +02004539 [P_PROTOCOL] = { 1, sizeof(struct p_protocol), receive_protocol },
4540 [P_UUIDS] = { 0, sizeof(struct p_uuids), receive_uuids },
4541 [P_SIZES] = { 0, sizeof(struct p_sizes), receive_sizes },
4542 [P_STATE] = { 0, sizeof(struct p_state), receive_state },
4543 [P_STATE_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_state },
4544 [P_SYNC_UUID] = { 0, sizeof(struct p_rs_uuid), receive_sync_uuid },
4545 [P_OV_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
4546 [P_OV_REPLY] = { 1, sizeof(struct p_block_req), receive_DataRequest },
4547 [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest },
4548 [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip },
Philipp Reisner73a01a12010-10-27 14:33:00 +02004549 [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004550 [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_conn_state },
Philipp Reisner036b17e2011-05-16 17:38:11 +02004551 [P_PROTOCOL_UPDATE] = { 1, sizeof(struct p_protocol), receive_protocol },
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02004552 [P_TRIM] = { 0, sizeof(struct p_trim), receive_Data },
Philipp Reisner02918be2010-08-20 14:35:10 +02004553};
4554
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004555static void drbdd(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004556{
Philipp Reisner77351055b2011-02-07 17:24:26 +01004557 struct packet_info pi;
Philipp Reisner02918be2010-08-20 14:35:10 +02004558 size_t shs; /* sub header size */
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004559 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004560
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004561 while (get_t_state(&connection->receiver) == RUNNING) {
Andreas Gruenbacherdeebe192011-03-25 00:01:04 +01004562 struct data_cmd *cmd;
4563
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004564 drbd_thread_current_set_cpu(&connection->receiver);
4565 if (drbd_recv_header(connection, &pi))
Philipp Reisner02918be2010-08-20 14:35:10 +02004566 goto err_out;
4567
Andreas Gruenbacherdeebe192011-03-25 00:01:04 +01004568 cmd = &drbd_cmd_handler[pi.cmd];
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004569 if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) || !cmd->fn)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004570 drbd_err(connection, "Unexpected data packet %s (0x%04x)",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004571 cmdname(pi.cmd), pi.cmd);
Philipp Reisner02918be2010-08-20 14:35:10 +02004572 goto err_out;
Lars Ellenberg0b33a912009-11-16 15:58:04 +01004573 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004574
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004575 shs = cmd->pkt_size;
4576 if (pi.size > shs && !cmd->expect_payload) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004577 drbd_err(connection, "No payload expected %s l:%d\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004578 cmdname(pi.cmd), pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02004579 goto err_out;
4580 }
4581
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02004582 if (shs) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004583 err = drbd_recv_all_warn(connection, pi.data, shs);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004584 if (err)
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02004585 goto err_out;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004586 pi.size -= shs;
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02004587 }
4588
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004589 err = cmd->fn(connection, &pi);
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004590 if (err) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004591 drbd_err(connection, "error receiving %s, e: %d l: %d!\n",
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004592 cmdname(pi.cmd), err, pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02004593 goto err_out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004594 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004595 }
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004596 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004597
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004598 err_out:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004599 conn_request_state(connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004600}
4601
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004602static void conn_disconnect(struct drbd_connection *connection)
Philipp Reisnerf70b35112010-06-24 14:34:40 +02004603{
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02004604 struct drbd_peer_device *peer_device;
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004605 enum drbd_conns oc;
Philipp Reisner376694a2011-11-07 10:54:28 +01004606 int vnr;
Philipp Reisnerf70b35112010-06-24 14:34:40 +02004607
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004608 if (connection->cstate == C_STANDALONE)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004609 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004610
Lars Ellenberg545752d2011-12-05 14:39:25 +01004611 /* We are about to start the cleanup after connection loss.
4612 * Make sure drbd_make_request knows about that.
4613 * Usually we should be in some network failure state already,
4614 * but just in case we are not, we fix it up here.
4615 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004616 conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
Lars Ellenberg545752d2011-12-05 14:39:25 +01004617
Philipp Reisnerb411b362009-09-25 16:07:19 -07004618 /* asender does not clean up anything. it must not interfere, either */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004619 drbd_thread_stop(&connection->asender);
4620 drbd_free_sock(connection);
Philipp Reisner360cc742011-02-08 14:29:53 +01004621
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02004622 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02004623 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
4624 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004625 kref_get(&device->kref);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02004626 rcu_read_unlock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004627 drbd_disconnected(peer_device);
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02004628 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02004629 rcu_read_lock();
4630 }
4631 rcu_read_unlock();
4632
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004633 if (!list_empty(&connection->current_epoch->list))
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004634 drbd_err(connection, "ASSERTION FAILED: connection->current_epoch->list not empty\n");
Philipp Reisner12038a32011-11-09 19:18:00 +01004635 /* ok, no more ee's on the fly, it is safe to reset the epoch_size */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004636 atomic_set(&connection->current_epoch->epoch_size, 0);
4637 connection->send.seen_any_write_yet = false;
Philipp Reisner12038a32011-11-09 19:18:00 +01004638
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004639 drbd_info(connection, "Connection closed\n");
Philipp Reisner360cc742011-02-08 14:29:53 +01004640
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004641 if (conn_highest_role(connection) == R_PRIMARY && conn_highest_pdsk(connection) >= D_UNKNOWN)
4642 conn_try_outdate_peer_async(connection);
Philipp Reisnercb703452011-03-24 11:03:07 +01004643
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004644 spin_lock_irq(&connection->resource->req_lock);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004645 oc = connection->cstate;
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004646 if (oc >= C_UNCONNECTED)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004647 _conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE);
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004648
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004649 spin_unlock_irq(&connection->resource->req_lock);
Philipp Reisner360cc742011-02-08 14:29:53 +01004650
Lars Ellenbergf3dfa402011-05-02 10:45:05 +02004651 if (oc == C_DISCONNECTING)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004652 conn_request_state(connection, NS(conn, C_STANDALONE), CS_VERBOSE | CS_HARD);
Philipp Reisner360cc742011-02-08 14:29:53 +01004653}
4654
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004655static int drbd_disconnected(struct drbd_peer_device *peer_device)
Philipp Reisner360cc742011-02-08 14:29:53 +01004656{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004657 struct drbd_device *device = peer_device->device;
Philipp Reisner360cc742011-02-08 14:29:53 +01004658 unsigned int i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004659
Philipp Reisner85719572010-07-21 10:20:17 +02004660 /* wait for current activity to cease. */
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004661 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004662 _drbd_wait_ee_list_empty(device, &device->active_ee);
4663 _drbd_wait_ee_list_empty(device, &device->sync_ee);
4664 _drbd_wait_ee_list_empty(device, &device->read_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004665 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004666
4667 /* We do not have data structures that would allow us to
4668 * get the rs_pending_cnt down to 0 again.
4669 * * On C_SYNC_TARGET we do not have any data structures describing
4670 * the pending RSDataRequest's we have sent.
4671 * * On C_SYNC_SOURCE there is no data structure that tracks
4672 * the P_RS_DATA_REPLY blocks that we sent to the SyncTarget.
4673 * And no, it is not the sum of the reference counts in the
4674 * resync_LRU. The resync_LRU tracks the whole operation including
4675 * the disk-IO, while the rs_pending_cnt only tracks the blocks
4676 * on the fly. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004677 drbd_rs_cancel_all(device);
4678 device->rs_total = 0;
4679 device->rs_failed = 0;
4680 atomic_set(&device->rs_pending_cnt, 0);
4681 wake_up(&device->misc_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004682
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004683 del_timer_sync(&device->resync_timer);
4684 resync_timer_fn((unsigned long)device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004685
Philipp Reisnerb411b362009-09-25 16:07:19 -07004686 /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
4687 * w_make_resync_request etc. which may still be on the worker queue
4688 * to be "canceled" */
Andreas Gruenbacherb5043c52011-07-28 15:56:02 +02004689 drbd_flush_workqueue(&peer_device->connection->sender_work);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004690
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004691 drbd_finish_peer_reqs(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004692
Philipp Reisnerd10b4ea2011-11-30 23:25:36 +01004693 /* This second workqueue flush is necessary, since drbd_finish_peer_reqs()
4694 might have issued a work again. The one before drbd_finish_peer_reqs() is
4695 necessary to reclain net_ee in drbd_finish_peer_reqs(). */
Andreas Gruenbacherb5043c52011-07-28 15:56:02 +02004696 drbd_flush_workqueue(&peer_device->connection->sender_work);
Philipp Reisnerd10b4ea2011-11-30 23:25:36 +01004697
Lars Ellenberg08332d72012-08-17 15:09:13 +02004698 /* need to do it again, drbd_finish_peer_reqs() may have populated it
4699 * again via drbd_try_clear_on_disk_bm(). */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004700 drbd_rs_cancel_all(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004701
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004702 kfree(device->p_uuid);
4703 device->p_uuid = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004704
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004705 if (!drbd_suspended(device))
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004706 tl_clear(peer_device->connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004707
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004708 drbd_md_sync(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004709
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004710 /* serialize with bitmap writeout triggered by the state change,
4711 * if any. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004712 wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004713
Philipp Reisnerb411b362009-09-25 16:07:19 -07004714 /* tcp_close and release of sendpage pages can be deferred. I don't
4715 * want to use SO_LINGER, because apparently it can be deferred for
4716 * more than 20 seconds (longest time I checked).
4717 *
4718 * Actually we don't care for exactly when the network stack does its
4719 * put_page(), but release our reference on these pages right here.
4720 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004721 i = drbd_free_peer_reqs(device, &device->net_ee);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004722 if (i)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004723 drbd_info(device, "net_ee not empty, killed %u entries\n", i);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004724 i = atomic_read(&device->pp_in_use_by_net);
Lars Ellenberg435f0742010-09-06 12:30:25 +02004725 if (i)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004726 drbd_info(device, "pp_in_use_by_net = %d, expected 0\n", i);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004727 i = atomic_read(&device->pp_in_use);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004728 if (i)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004729 drbd_info(device, "pp_in_use = %d, expected 0\n", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004730
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02004731 D_ASSERT(device, list_empty(&device->read_ee));
4732 D_ASSERT(device, list_empty(&device->active_ee));
4733 D_ASSERT(device, list_empty(&device->sync_ee));
4734 D_ASSERT(device, list_empty(&device->done_ee));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004735
Philipp Reisner360cc742011-02-08 14:29:53 +01004736 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004737}
4738
4739/*
4740 * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version
4741 * we can agree on is stored in agreed_pro_version.
4742 *
4743 * feature flags and the reserved array should be enough room for future
4744 * enhancements of the handshake protocol, and possible plugins...
4745 *
4746 * for now, they are expected to be zero, but ignored.
4747 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004748static int drbd_send_features(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004749{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004750 struct drbd_socket *sock;
4751 struct p_connection_features *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004752
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004753 sock = &connection->data;
4754 p = conn_prepare_command(connection, sock);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004755 if (!p)
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004756 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004757 memset(p, 0, sizeof(*p));
4758 p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
4759 p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
Lars Ellenberg20c68fd2014-04-28 18:43:25 +02004760 p->feature_flags = cpu_to_be32(PRO_FEATURES);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004761 return conn_send_command(connection, sock, P_CONNECTION_FEATURES, sizeof(*p), NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004762}
4763
4764/*
4765 * return values:
4766 * 1 yes, we have a valid connection
4767 * 0 oops, did not work out, please try again
4768 * -1 peer talks different language,
4769 * no point in trying again, please go standalone.
4770 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004771static int drbd_do_features(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004772{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004773 /* ASSERT current == connection->receiver ... */
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004774 struct p_connection_features *p;
4775 const int expect = sizeof(struct p_connection_features);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004776 struct packet_info pi;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004777 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004778
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004779 err = drbd_send_features(connection);
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004780 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004781 return 0;
4782
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004783 err = drbd_recv_header(connection, &pi);
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004784 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004785 return 0;
4786
Andreas Gruenbacher60381782011-03-28 17:05:50 +02004787 if (pi.cmd != P_CONNECTION_FEATURES) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004788 drbd_err(connection, "expected ConnectionFeatures packet, received: %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004789 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004790 return -1;
4791 }
4792
Philipp Reisner77351055b2011-02-07 17:24:26 +01004793 if (pi.size != expect) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004794 drbd_err(connection, "expected ConnectionFeatures length: %u, received: %u\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004795 expect, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004796 return -1;
4797 }
4798
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004799 p = pi.data;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004800 err = drbd_recv_all_warn(connection, p, expect);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004801 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004802 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004803
Philipp Reisnerb411b362009-09-25 16:07:19 -07004804 p->protocol_min = be32_to_cpu(p->protocol_min);
4805 p->protocol_max = be32_to_cpu(p->protocol_max);
4806 if (p->protocol_max == 0)
4807 p->protocol_max = p->protocol_min;
4808
4809 if (PRO_VERSION_MAX < p->protocol_min ||
4810 PRO_VERSION_MIN > p->protocol_max)
4811 goto incompat;
4812
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004813 connection->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
Lars Ellenberg20c68fd2014-04-28 18:43:25 +02004814 connection->agreed_features = PRO_FEATURES & be32_to_cpu(p->feature_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004815
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004816 drbd_info(connection, "Handshake successful: "
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004817 "Agreed network protocol version %d\n", connection->agreed_pro_version);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004818
Lars Ellenberg20c68fd2014-04-28 18:43:25 +02004819 drbd_info(connection, "Agreed to%ssupport TRIM on protocol level\n",
4820 connection->agreed_features & FF_TRIM ? " " : " not ");
4821
Philipp Reisnerb411b362009-09-25 16:07:19 -07004822 return 1;
4823
4824 incompat:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004825 drbd_err(connection, "incompatible DRBD dialects: "
Philipp Reisnerb411b362009-09-25 16:07:19 -07004826 "I support %d-%d, peer supports %d-%d\n",
4827 PRO_VERSION_MIN, PRO_VERSION_MAX,
4828 p->protocol_min, p->protocol_max);
4829 return -1;
4830}
4831
4832#if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004833static int drbd_do_auth(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004834{
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004835 drbd_err(connection, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
4836 drbd_err(connection, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004837 return -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004838}
4839#else
4840#define CHALLENGE_LEN 64
Johannes Thomab10d96c2010-01-07 16:02:50 +01004841
4842/* Return value:
4843 1 - auth succeeded,
4844 0 - failed, try again (network error),
4845 -1 - auth failed, don't try again.
4846*/
4847
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004848static int drbd_do_auth(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004849{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004850 struct drbd_socket *sock;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004851 char my_challenge[CHALLENGE_LEN]; /* 64 Bytes... */
4852 struct scatterlist sg;
4853 char *response = NULL;
4854 char *right_response = NULL;
4855 char *peers_ch = NULL;
Philipp Reisner44ed1672011-04-19 17:10:19 +02004856 unsigned int key_len;
4857 char secret[SHARED_SECRET_MAX]; /* 64 byte */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004858 unsigned int resp_size;
4859 struct hash_desc desc;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004860 struct packet_info pi;
Philipp Reisner44ed1672011-04-19 17:10:19 +02004861 struct net_conf *nc;
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004862 int err, rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004863
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004864 /* FIXME: Put the challenge/response into the preallocated socket buffer. */
4865
Philipp Reisner44ed1672011-04-19 17:10:19 +02004866 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004867 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +02004868 key_len = strlen(nc->shared_secret);
4869 memcpy(secret, nc->shared_secret, key_len);
4870 rcu_read_unlock();
4871
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004872 desc.tfm = connection->cram_hmac_tfm;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004873 desc.flags = 0;
4874
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004875 rv = crypto_hash_setkey(connection->cram_hmac_tfm, (u8 *)secret, key_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004876 if (rv) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004877 drbd_err(connection, "crypto_hash_setkey() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004878 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004879 goto fail;
4880 }
4881
4882 get_random_bytes(my_challenge, CHALLENGE_LEN);
4883
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004884 sock = &connection->data;
4885 if (!conn_prepare_command(connection, sock)) {
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004886 rv = 0;
4887 goto fail;
4888 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004889 rv = !conn_send_command(connection, sock, P_AUTH_CHALLENGE, 0,
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004890 my_challenge, CHALLENGE_LEN);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004891 if (!rv)
4892 goto fail;
4893
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004894 err = drbd_recv_header(connection, &pi);
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004895 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004896 rv = 0;
4897 goto fail;
4898 }
4899
Philipp Reisner77351055b2011-02-07 17:24:26 +01004900 if (pi.cmd != P_AUTH_CHALLENGE) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004901 drbd_err(connection, "expected AuthChallenge packet, received: %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004902 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004903 rv = 0;
4904 goto fail;
4905 }
4906
Philipp Reisner77351055b2011-02-07 17:24:26 +01004907 if (pi.size > CHALLENGE_LEN * 2) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004908 drbd_err(connection, "expected AuthChallenge payload too big.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004909 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004910 goto fail;
4911 }
4912
Philipp Reisner67cca282014-04-28 18:43:30 +02004913 if (pi.size < CHALLENGE_LEN) {
4914 drbd_err(connection, "AuthChallenge payload too small.\n");
4915 rv = -1;
4916 goto fail;
4917 }
4918
Philipp Reisner77351055b2011-02-07 17:24:26 +01004919 peers_ch = kmalloc(pi.size, GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004920 if (peers_ch == NULL) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004921 drbd_err(connection, "kmalloc of peers_ch failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004922 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004923 goto fail;
4924 }
4925
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004926 err = drbd_recv_all_warn(connection, peers_ch, pi.size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004927 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004928 rv = 0;
4929 goto fail;
4930 }
4931
Philipp Reisner67cca282014-04-28 18:43:30 +02004932 if (!memcmp(my_challenge, peers_ch, CHALLENGE_LEN)) {
4933 drbd_err(connection, "Peer presented the same challenge!\n");
4934 rv = -1;
4935 goto fail;
4936 }
4937
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004938 resp_size = crypto_hash_digestsize(connection->cram_hmac_tfm);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004939 response = kmalloc(resp_size, GFP_NOIO);
4940 if (response == NULL) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004941 drbd_err(connection, "kmalloc of response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004942 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004943 goto fail;
4944 }
4945
4946 sg_init_table(&sg, 1);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004947 sg_set_buf(&sg, peers_ch, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004948
4949 rv = crypto_hash_digest(&desc, &sg, sg.length, response);
4950 if (rv) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004951 drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004952 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004953 goto fail;
4954 }
4955
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004956 if (!conn_prepare_command(connection, sock)) {
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004957 rv = 0;
4958 goto fail;
4959 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004960 rv = !conn_send_command(connection, sock, P_AUTH_RESPONSE, 0,
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004961 response, resp_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004962 if (!rv)
4963 goto fail;
4964
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004965 err = drbd_recv_header(connection, &pi);
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004966 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004967 rv = 0;
4968 goto fail;
4969 }
4970
Philipp Reisner77351055b2011-02-07 17:24:26 +01004971 if (pi.cmd != P_AUTH_RESPONSE) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004972 drbd_err(connection, "expected AuthResponse packet, received: %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004973 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004974 rv = 0;
4975 goto fail;
4976 }
4977
Philipp Reisner77351055b2011-02-07 17:24:26 +01004978 if (pi.size != resp_size) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004979 drbd_err(connection, "expected AuthResponse payload of wrong size\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004980 rv = 0;
4981 goto fail;
4982 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004983
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004984 err = drbd_recv_all_warn(connection, response , resp_size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004985 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004986 rv = 0;
4987 goto fail;
4988 }
4989
4990 right_response = kmalloc(resp_size, GFP_NOIO);
Julia Lawall2d1ee872009-12-27 22:27:11 +01004991 if (right_response == NULL) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004992 drbd_err(connection, "kmalloc of right_response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004993 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004994 goto fail;
4995 }
4996
4997 sg_set_buf(&sg, my_challenge, CHALLENGE_LEN);
4998
4999 rv = crypto_hash_digest(&desc, &sg, sg.length, right_response);
5000 if (rv) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005001 drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01005002 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005003 goto fail;
5004 }
5005
5006 rv = !memcmp(response, right_response, resp_size);
5007
5008 if (rv)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005009 drbd_info(connection, "Peer authenticated using %d bytes HMAC\n",
Philipp Reisner44ed1672011-04-19 17:10:19 +02005010 resp_size);
Johannes Thomab10d96c2010-01-07 16:02:50 +01005011 else
5012 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005013
5014 fail:
5015 kfree(peers_ch);
5016 kfree(response);
5017 kfree(right_response);
5018
5019 return rv;
5020}
5021#endif
5022
Andreas Gruenbacher8fe60552011-07-22 11:04:36 +02005023int drbd_receiver(struct drbd_thread *thi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005024{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005025 struct drbd_connection *connection = thi->connection;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005026 int h;
5027
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005028 drbd_info(connection, "receiver (re)started\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005029
5030 do {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005031 h = conn_connect(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005032 if (h == 0) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005033 conn_disconnect(connection);
Philipp Reisner20ee6392011-01-18 15:28:59 +01005034 schedule_timeout_interruptible(HZ);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005035 }
5036 if (h == -1) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005037 drbd_warn(connection, "Discarding network configuration.\n");
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005038 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005039 }
5040 } while (h == 0);
5041
Philipp Reisner91fd4da2011-04-20 17:47:29 +02005042 if (h > 0)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005043 drbdd(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005044
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005045 conn_disconnect(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005046
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005047 drbd_info(connection, "receiver terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005048 return 0;
5049}
5050
5051/* ********* acknowledge sender ******** */
5052
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005053static int got_conn_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005054{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005055 struct p_req_state_reply *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005056 int retcode = be32_to_cpu(p->retcode);
5057
5058 if (retcode >= SS_SUCCESS) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005059 set_bit(CONN_WD_ST_CHG_OKAY, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005060 } else {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005061 set_bit(CONN_WD_ST_CHG_FAIL, &connection->flags);
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005062 drbd_err(connection, "Requested state change failed by peer: %s (%d)\n",
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01005063 drbd_set_st_err_str(retcode), retcode);
5064 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005065 wake_up(&connection->ping_wait);
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01005066
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005067 return 0;
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01005068}
5069
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005070static int got_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005071{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005072 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005073 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005074 struct p_req_state_reply *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005075 int retcode = be32_to_cpu(p->retcode);
5076
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005077 peer_device = conn_peer_device(connection, pi->vnr);
5078 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005079 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005080 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005081
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005082 if (test_bit(CONN_WD_ST_CHG_REQ, &connection->flags)) {
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02005083 D_ASSERT(device, connection->agreed_pro_version < 100);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005084 return got_conn_RqSReply(connection, pi);
Philipp Reisner4d0fc3f2012-01-20 13:52:27 +01005085 }
5086
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01005087 if (retcode >= SS_SUCCESS) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005088 set_bit(CL_ST_CHG_SUCCESS, &device->flags);
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01005089 } else {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005090 set_bit(CL_ST_CHG_FAIL, &device->flags);
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02005091 drbd_err(device, "Requested state change failed by peer: %s (%d)\n",
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01005092 drbd_set_st_err_str(retcode), retcode);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005093 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005094 wake_up(&device->state_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005095
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005096 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005097}
5098
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005099static int got_Ping(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005100{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005101 return drbd_send_ping_ack(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005102
5103}
5104
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005105static int got_PingAck(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005106{
5107 /* restore idle timeout */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005108 connection->meta.socket->sk->sk_rcvtimeo = connection->net_conf->ping_int*HZ;
5109 if (!test_and_set_bit(GOT_PING_ACK, &connection->flags))
5110 wake_up(&connection->ping_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005111
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005112 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005113}
5114
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005115static int got_IsInSync(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005116{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005117 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005118 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005119 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005120 sector_t sector = be64_to_cpu(p->sector);
5121 int blksize = be32_to_cpu(p->blksize);
5122
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005123 peer_device = conn_peer_device(connection, pi->vnr);
5124 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005125 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005126 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005127
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005128 D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005129
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005130 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005131
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005132 if (get_ldev(device)) {
5133 drbd_rs_complete_io(device, sector);
5134 drbd_set_in_sync(device, sector, blksize);
Lars Ellenberg1d53f092010-09-05 01:13:24 +02005135 /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005136 device->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
5137 put_ldev(device);
Lars Ellenberg1d53f092010-09-05 01:13:24 +02005138 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005139 dec_rs_pending(device);
5140 atomic_add(blksize >> 9, &device->rs_sect_in);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005141
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005142 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005143}
5144
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01005145static int
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005146validate_req_change_req_state(struct drbd_device *device, u64 id, sector_t sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01005147 struct rb_root *root, const char *func,
5148 enum drbd_req_event what, bool missing_ok)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005149{
5150 struct drbd_request *req;
5151 struct bio_and_error m;
5152
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005153 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005154 req = find_request(device, root, id, sector, missing_ok, func);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005155 if (unlikely(!req)) {
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005156 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005157 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005158 }
5159 __req_mod(req, what, &m);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005160 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005161
5162 if (m.bio)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005163 complete_master_bio(device, &m);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005164 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005165}
5166
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005167static int got_BlockAck(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005168{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005169 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005170 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005171 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005172 sector_t sector = be64_to_cpu(p->sector);
5173 int blksize = be32_to_cpu(p->blksize);
5174 enum drbd_req_event what;
5175
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005176 peer_device = conn_peer_device(connection, pi->vnr);
5177 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005178 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005179 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005180
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005181 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005182
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01005183 if (p->block_id == ID_SYNCER) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005184 drbd_set_in_sync(device, sector, blksize);
5185 dec_rs_pending(device);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005186 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005187 }
Andreas Gruenbachere05e1e52011-03-25 15:16:26 +01005188 switch (pi->cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07005189 case P_RS_WRITE_ACK:
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01005190 what = WRITE_ACKED_BY_PEER_AND_SIS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005191 break;
5192 case P_WRITE_ACK:
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01005193 what = WRITE_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005194 break;
5195 case P_RECV_ACK:
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01005196 what = RECV_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005197 break;
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02005198 case P_SUPERSEDED:
5199 what = CONFLICT_RESOLVED;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01005200 break;
5201 case P_RETRY_WRITE:
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01005202 what = POSTPONE_WRITE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005203 break;
5204 default:
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005205 BUG();
Philipp Reisnerb411b362009-09-25 16:07:19 -07005206 }
5207
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005208 return validate_req_change_req_state(device, p->block_id, sector,
5209 &device->write_requests, __func__,
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005210 what, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005211}
5212
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005213static int got_NegAck(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005214{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005215 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005216 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005217 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005218 sector_t sector = be64_to_cpu(p->sector);
Philipp Reisner2deb8332011-01-17 18:39:18 +01005219 int size = be32_to_cpu(p->blksize);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005220 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005221
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005222 peer_device = conn_peer_device(connection, pi->vnr);
5223 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005224 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005225 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005226
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005227 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005228
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01005229 if (p->block_id == ID_SYNCER) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005230 dec_rs_pending(device);
5231 drbd_rs_failed_io(device, sector, size);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005232 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005233 }
Philipp Reisner2deb8332011-01-17 18:39:18 +01005234
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005235 err = validate_req_change_req_state(device, p->block_id, sector,
5236 &device->write_requests, __func__,
Philipp Reisner303d1442011-04-13 16:24:47 -07005237 NEG_ACKED, true);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005238 if (err) {
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01005239 /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs.
5240 The master bio might already be completed, therefore the
5241 request is no longer in the collision hash. */
5242 /* In Protocol B we might already have got a P_RECV_ACK
5243 but then get a P_NEG_ACK afterwards. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005244 drbd_set_out_of_sync(device, sector, size);
Philipp Reisner2deb8332011-01-17 18:39:18 +01005245 }
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005246 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005247}
5248
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005249static int got_NegDReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005250{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005251 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005252 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005253 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005254 sector_t sector = be64_to_cpu(p->sector);
5255
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005256 peer_device = conn_peer_device(connection, pi->vnr);
5257 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005258 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005259 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005260
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005261 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01005262
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02005263 drbd_err(device, "Got NegDReply; Sector %llus, len %u.\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07005264 (unsigned long long)sector, be32_to_cpu(p->blksize));
5265
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005266 return validate_req_change_req_state(device, p->block_id, sector,
5267 &device->read_requests, __func__,
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005268 NEG_ACKED, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005269}
5270
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005271static int got_NegRSDReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005272{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005273 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005274 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005275 sector_t sector;
5276 int size;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005277 struct p_block_ack *p = pi->data;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005278
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005279 peer_device = conn_peer_device(connection, pi->vnr);
5280 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005281 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005282 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005283
5284 sector = be64_to_cpu(p->sector);
5285 size = be32_to_cpu(p->blksize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005286
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005287 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005288
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005289 dec_rs_pending(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005290
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005291 if (get_ldev_if_state(device, D_FAILED)) {
5292 drbd_rs_complete_io(device, sector);
Andreas Gruenbachere05e1e52011-03-25 15:16:26 +01005293 switch (pi->cmd) {
Philipp Reisnerd612d302010-12-27 10:53:28 +01005294 case P_NEG_RS_DREPLY:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005295 drbd_rs_failed_io(device, sector, size);
Philipp Reisnerd612d302010-12-27 10:53:28 +01005296 case P_RS_CANCEL:
5297 break;
5298 default:
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005299 BUG();
Philipp Reisnerd612d302010-12-27 10:53:28 +01005300 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005301 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005302 }
5303
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005304 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005305}
5306
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005307static int got_BarrierAck(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005308{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005309 struct p_barrier_ack *p = pi->data;
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005310 struct drbd_peer_device *peer_device;
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005311 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005312
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005313 tl_release(connection, p->barrier, be32_to_cpu(p->set_size));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005314
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005315 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005316 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5317 struct drbd_device *device = peer_device->device;
5318
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005319 if (device->state.conn == C_AHEAD &&
5320 atomic_read(&device->ap_in_flight) == 0 &&
5321 !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &device->flags)) {
5322 device->start_resync_timer.expires = jiffies + HZ;
5323 add_timer(&device->start_resync_timer);
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005324 }
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02005325 }
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005326 rcu_read_unlock();
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02005327
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005328 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005329}
5330
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005331static int got_OVResult(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005332{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005333 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005334 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005335 struct p_block_ack *p = pi->data;
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02005336 struct drbd_device_work *dw;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005337 sector_t sector;
5338 int size;
5339
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005340 peer_device = conn_peer_device(connection, pi->vnr);
5341 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005342 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005343 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005344
Philipp Reisnerb411b362009-09-25 16:07:19 -07005345 sector = be64_to_cpu(p->sector);
5346 size = be32_to_cpu(p->blksize);
5347
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005348 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005349
5350 if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005351 drbd_ov_out_of_sync_found(device, sector, size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005352 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005353 ov_out_of_sync_print(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005354
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005355 if (!get_ldev(device))
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005356 return 0;
Lars Ellenberg1d53f092010-09-05 01:13:24 +02005357
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005358 drbd_rs_complete_io(device, sector);
5359 dec_rs_pending(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005360
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005361 --device->ov_left;
Lars Ellenbergea5442a2010-11-05 09:48:01 +01005362
5363 /* let's advance progress step marks only for every other megabyte */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005364 if ((device->ov_left & 0x200) == 0x200)
5365 drbd_advance_rs_marks(device, device->ov_left);
Lars Ellenbergea5442a2010-11-05 09:48:01 +01005366
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005367 if (device->ov_left == 0) {
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02005368 dw = kmalloc(sizeof(*dw), GFP_NOIO);
5369 if (dw) {
5370 dw->w.cb = w_ov_finished;
5371 dw->device = device;
5372 drbd_queue_work(&peer_device->connection->sender_work, &dw->w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005373 } else {
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02005374 drbd_err(device, "kmalloc(dw) failed.");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005375 ov_out_of_sync_print(device);
5376 drbd_resync_finished(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005377 }
5378 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005379 put_ldev(device);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005380 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005381}
5382
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005383static int got_skip(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisner0ced55a2010-04-30 15:26:20 +02005384{
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005385 return 0;
Philipp Reisner0ced55a2010-04-30 15:26:20 +02005386}
5387
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005388static int connection_finish_peer_reqs(struct drbd_connection *connection)
Philipp Reisner32862ec2011-02-08 16:41:01 +01005389{
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005390 struct drbd_peer_device *peer_device;
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005391 int vnr, not_empty = 0;
Philipp Reisner32862ec2011-02-08 16:41:01 +01005392
5393 do {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005394 clear_bit(SIGNAL_ASENDER, &connection->flags);
Philipp Reisner32862ec2011-02-08 16:41:01 +01005395 flush_signals(current);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005396
5397 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005398 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5399 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005400 kref_get(&device->kref);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005401 rcu_read_unlock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005402 if (drbd_finish_peer_reqs(device)) {
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02005403 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005404 return 1;
Philipp Reisnerd3fcb492011-04-13 14:46:05 -07005405 }
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02005406 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005407 rcu_read_lock();
Philipp Reisner082a3432011-03-15 16:05:42 +01005408 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005409 set_bit(SIGNAL_ASENDER, &connection->flags);
Philipp Reisner082a3432011-03-15 16:05:42 +01005410
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005411 spin_lock_irq(&connection->resource->req_lock);
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005412 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5413 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005414 not_empty = !list_empty(&device->done_ee);
Philipp Reisner082a3432011-03-15 16:05:42 +01005415 if (not_empty)
5416 break;
5417 }
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005418 spin_unlock_irq(&connection->resource->req_lock);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005419 rcu_read_unlock();
Philipp Reisner32862ec2011-02-08 16:41:01 +01005420 } while (not_empty);
5421
5422 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005423}
5424
5425struct asender_cmd {
5426 size_t pkt_size;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005427 int (*fn)(struct drbd_connection *connection, struct packet_info *);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005428};
5429
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01005430static struct asender_cmd asender_tbl[] = {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005431 [P_PING] = { 0, got_Ping },
5432 [P_PING_ACK] = { 0, got_PingAck },
Philipp Reisnerb411b362009-09-25 16:07:19 -07005433 [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
5434 [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
5435 [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02005436 [P_SUPERSEDED] = { sizeof(struct p_block_ack), got_BlockAck },
Philipp Reisnerb411b362009-09-25 16:07:19 -07005437 [P_NEG_ACK] = { sizeof(struct p_block_ack), got_NegAck },
5438 [P_NEG_DREPLY] = { sizeof(struct p_block_ack), got_NegDReply },
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005439 [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), got_NegRSDReply },
Philipp Reisnerb411b362009-09-25 16:07:19 -07005440 [P_OV_RESULT] = { sizeof(struct p_block_ack), got_OVResult },
5441 [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck },
5442 [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
5443 [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync },
Philipp Reisner02918be2010-08-20 14:35:10 +02005444 [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip },
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005445 [P_RS_CANCEL] = { sizeof(struct p_block_ack), got_NegRSDReply },
5446 [P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), got_conn_RqSReply },
5447 [P_RETRY_WRITE] = { sizeof(struct p_block_ack), got_BlockAck },
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01005448};
Philipp Reisnerb411b362009-09-25 16:07:19 -07005449
5450int drbd_asender(struct drbd_thread *thi)
5451{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005452 struct drbd_connection *connection = thi->connection;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005453 struct asender_cmd *cmd = NULL;
Philipp Reisner77351055b2011-02-07 17:24:26 +01005454 struct packet_info pi;
Philipp Reisner257d0af2011-01-26 12:15:29 +01005455 int rv;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005456 void *buf = connection->meta.rbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005457 int received = 0;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005458 unsigned int header_size = drbd_header_size(connection);
Andreas Gruenbacher52b061a2011-03-30 11:38:49 +02005459 int expect = header_size;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005460 bool ping_timeout_active = false;
5461 struct net_conf *nc;
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005462 int ping_timeo, tcp_cork, ping_int;
Philipp Reisner3990e042013-03-27 14:08:48 +01005463 struct sched_param param = { .sched_priority = 2 };
Philipp Reisnerb411b362009-09-25 16:07:19 -07005464
Philipp Reisner3990e042013-03-27 14:08:48 +01005465 rv = sched_setscheduler(current, SCHED_RR, &param);
5466 if (rv < 0)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005467 drbd_err(connection, "drbd_asender: ERROR set priority, ret=%d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005468
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +01005469 while (get_t_state(thi) == RUNNING) {
Philipp Reisner80822282011-02-08 12:46:30 +01005470 drbd_thread_current_set_cpu(thi);
Philipp Reisner44ed1672011-04-19 17:10:19 +02005471
5472 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005473 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +02005474 ping_timeo = nc->ping_timeo;
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005475 tcp_cork = nc->tcp_cork;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005476 ping_int = nc->ping_int;
5477 rcu_read_unlock();
5478
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005479 if (test_and_clear_bit(SEND_PING, &connection->flags)) {
5480 if (drbd_send_ping(connection)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005481 drbd_err(connection, "drbd_send_ping has failed\n");
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01005482 goto reconnect;
5483 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005484 connection->meta.socket->sk->sk_rcvtimeo = ping_timeo * HZ / 10;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005485 ping_timeout_active = true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005486 }
5487
Philipp Reisner32862ec2011-02-08 16:41:01 +01005488 /* TODO: conditionally cork; it may hurt latency if we cork without
5489 much to send */
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005490 if (tcp_cork)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005491 drbd_tcp_cork(connection->meta.socket);
5492 if (connection_finish_peer_reqs(connection)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005493 drbd_err(connection, "connection_finish_peer_reqs() failed\n");
Philipp Reisner32862ec2011-02-08 16:41:01 +01005494 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005495 }
5496 /* but unconditionally uncork unless disabled */
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005497 if (tcp_cork)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005498 drbd_tcp_uncork(connection->meta.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005499
5500 /* short circuit, recv_msg would return EINTR anyways. */
5501 if (signal_pending(current))
5502 continue;
5503
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005504 rv = drbd_recv_short(connection->meta.socket, buf, expect-received, 0);
5505 clear_bit(SIGNAL_ASENDER, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005506
5507 flush_signals(current);
5508
5509 /* Note:
5510 * -EINTR (on meta) we got a signal
5511 * -EAGAIN (on meta) rcvtimeo expired
5512 * -ECONNRESET other side closed the connection
5513 * -ERESTARTSYS (on data) we got a signal
5514 * rv < 0 other than above: unexpected error!
5515 * rv == expected: full header or command
5516 * rv < expected: "woken" by signal during receive
5517 * rv == 0 : "connection shut down by peer"
5518 */
5519 if (likely(rv > 0)) {
5520 received += rv;
5521 buf += rv;
5522 } else if (rv == 0) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005523 if (test_bit(DISCONNECT_SENT, &connection->flags)) {
Philipp Reisnerb66623e2012-08-08 21:19:09 +02005524 long t;
5525 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005526 t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
Philipp Reisnerb66623e2012-08-08 21:19:09 +02005527 rcu_read_unlock();
5528
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005529 t = wait_event_timeout(connection->ping_wait,
5530 connection->cstate < C_WF_REPORT_PARAMS,
Philipp Reisnerb66623e2012-08-08 21:19:09 +02005531 t);
Philipp Reisner599377a2012-08-17 14:50:22 +02005532 if (t)
5533 break;
5534 }
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005535 drbd_err(connection, "meta connection shut down by peer.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005536 goto reconnect;
5537 } else if (rv == -EAGAIN) {
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02005538 /* If the data socket received something meanwhile,
5539 * that is good enough: peer is still alive. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005540 if (time_after(connection->last_received,
5541 jiffies - connection->meta.socket->sk->sk_rcvtimeo))
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02005542 continue;
Lars Ellenbergf36af182011-03-09 22:44:55 +01005543 if (ping_timeout_active) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005544 drbd_err(connection, "PingAck did not arrive in time.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005545 goto reconnect;
5546 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005547 set_bit(SEND_PING, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005548 continue;
5549 } else if (rv == -EINTR) {
5550 continue;
5551 } else {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005552 drbd_err(connection, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005553 goto reconnect;
5554 }
5555
5556 if (received == expect && cmd == NULL) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005557 if (decode_header(connection, connection->meta.rbuf, &pi))
Philipp Reisnerb411b362009-09-25 16:07:19 -07005558 goto reconnect;
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01005559 cmd = &asender_tbl[pi.cmd];
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005560 if (pi.cmd >= ARRAY_SIZE(asender_tbl) || !cmd->fn) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005561 drbd_err(connection, "Unexpected meta packet %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02005562 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005563 goto disconnect;
5564 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005565 expect = header_size + cmd->pkt_size;
Andreas Gruenbacher52b061a2011-03-30 11:38:49 +02005566 if (pi.size != expect - header_size) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005567 drbd_err(connection, "Wrong packet size on meta (c: %d, l: %d)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01005568 pi.cmd, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005569 goto reconnect;
Philipp Reisner257d0af2011-01-26 12:15:29 +01005570 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005571 }
5572 if (received == expect) {
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005573 bool err;
Philipp Reisnera4fbda82011-03-16 11:13:17 +01005574
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005575 err = cmd->fn(connection, &pi);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005576 if (err) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005577 drbd_err(connection, "%pf failed\n", cmd->fn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005578 goto reconnect;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005579 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005580
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005581 connection->last_received = jiffies;
Lars Ellenbergf36af182011-03-09 22:44:55 +01005582
Philipp Reisner44ed1672011-04-19 17:10:19 +02005583 if (cmd == &asender_tbl[P_PING_ACK]) {
5584 /* restore idle timeout */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005585 connection->meta.socket->sk->sk_rcvtimeo = ping_int * HZ;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005586 ping_timeout_active = false;
5587 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005588
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005589 buf = connection->meta.rbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005590 received = 0;
Andreas Gruenbacher52b061a2011-03-30 11:38:49 +02005591 expect = header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005592 cmd = NULL;
5593 }
5594 }
5595
5596 if (0) {
5597reconnect:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005598 conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
5599 conn_md_sync(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005600 }
5601 if (0) {
5602disconnect:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005603 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005604 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005605 clear_bit(SIGNAL_ASENDER, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005606
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005607 drbd_info(connection, "asender terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005608
5609 return 0;
5610}