blob: d169b4a7926700187cc643f1ef29a6cd22aeaed3 [file] [log] [blame]
Philipp Reisnerb411b362009-09-25 16:07:19 -07001/*
2 drbd_receiver.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23 */
24
25
Philipp Reisnerb411b362009-09-25 16:07:19 -070026#include <linux/module.h>
27
28#include <asm/uaccess.h>
29#include <net/sock.h>
30
Philipp Reisnerb411b362009-09-25 16:07:19 -070031#include <linux/drbd.h>
32#include <linux/fs.h>
33#include <linux/file.h>
34#include <linux/in.h>
35#include <linux/mm.h>
36#include <linux/memcontrol.h>
37#include <linux/mm_inline.h>
38#include <linux/slab.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070039#include <linux/pkt_sched.h>
40#define __KERNEL_SYSCALLS__
41#include <linux/unistd.h>
42#include <linux/vmalloc.h>
43#include <linux/random.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070044#include <linux/string.h>
45#include <linux/scatterlist.h>
46#include "drbd_int.h"
Andreas Gruenbachera3603a62011-05-30 11:47:37 +020047#include "drbd_protocol.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070048#include "drbd_req.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070049#include "drbd_vli.h"
50
Lars Ellenberg20c68fd2014-04-28 18:43:25 +020051#define PRO_FEATURES (FF_TRIM)
52
Philipp Reisner77351055b2011-02-07 17:24:26 +010053struct packet_info {
54 enum drbd_packet cmd;
Andreas Gruenbachere2857212011-03-25 00:57:38 +010055 unsigned int size;
56 unsigned int vnr;
Andreas Gruenbachere6589832011-03-30 12:54:42 +020057 void *data;
Philipp Reisner77351055b2011-02-07 17:24:26 +010058};
59
Philipp Reisnerb411b362009-09-25 16:07:19 -070060enum finish_epoch {
61 FE_STILL_LIVE,
62 FE_DESTROYED,
63 FE_RECYCLED,
64};
65
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +020066static int drbd_do_features(struct drbd_connection *connection);
67static int drbd_do_auth(struct drbd_connection *connection);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +020068static int drbd_disconnected(struct drbd_peer_device *);
Lars Ellenberga0fb3c42014-04-28 18:43:23 +020069static void conn_wait_active_ee_empty(struct drbd_connection *connection);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +020070static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *, struct drbd_epoch *, enum epoch_event);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +010071static int e_end_block(struct drbd_work *, int);
Philipp Reisnerb411b362009-09-25 16:07:19 -070072
Philipp Reisnerb411b362009-09-25 16:07:19 -070073
74#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
75
Lars Ellenberg45bb9122010-05-14 17:10:48 +020076/*
77 * some helper functions to deal with single linked page lists,
78 * page->private being our "next" pointer.
79 */
80
81/* If at least n pages are linked at head, get n pages off.
82 * Otherwise, don't modify head, and return NULL.
83 * Locking is the responsibility of the caller.
84 */
85static struct page *page_chain_del(struct page **head, int n)
86{
87 struct page *page;
88 struct page *tmp;
89
90 BUG_ON(!n);
91 BUG_ON(!head);
92
93 page = *head;
Philipp Reisner23ce4222010-05-20 13:35:31 +020094
95 if (!page)
96 return NULL;
97
Lars Ellenberg45bb9122010-05-14 17:10:48 +020098 while (page) {
99 tmp = page_chain_next(page);
100 if (--n == 0)
101 break; /* found sufficient pages */
102 if (tmp == NULL)
103 /* insufficient pages, don't use any of them. */
104 return NULL;
105 page = tmp;
106 }
107
108 /* add end of list marker for the returned list */
109 set_page_private(page, 0);
110 /* actual return value, and adjustment of head */
111 page = *head;
112 *head = tmp;
113 return page;
114}
115
116/* may be used outside of locks to find the tail of a (usually short)
117 * "private" page chain, before adding it back to a global chain head
118 * with page_chain_add() under a spinlock. */
119static struct page *page_chain_tail(struct page *page, int *len)
120{
121 struct page *tmp;
122 int i = 1;
123 while ((tmp = page_chain_next(page)))
124 ++i, page = tmp;
125 if (len)
126 *len = i;
127 return page;
128}
129
130static int page_chain_free(struct page *page)
131{
132 struct page *tmp;
133 int i = 0;
134 page_chain_for_each_safe(page, tmp) {
135 put_page(page);
136 ++i;
137 }
138 return i;
139}
140
141static void page_chain_add(struct page **head,
142 struct page *chain_first, struct page *chain_last)
143{
144#if 1
145 struct page *tmp;
146 tmp = page_chain_tail(chain_first, NULL);
147 BUG_ON(tmp != chain_last);
148#endif
149
150 /* add chain to head */
151 set_page_private(chain_last, (unsigned long)*head);
152 *head = chain_first;
153}
154
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200155static struct page *__drbd_alloc_pages(struct drbd_device *device,
Andreas Gruenbacher18c2d522011-04-07 21:08:50 +0200156 unsigned int number)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700157{
158 struct page *page = NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200159 struct page *tmp = NULL;
Andreas Gruenbacher18c2d522011-04-07 21:08:50 +0200160 unsigned int i = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700161
162 /* Yes, testing drbd_pp_vacant outside the lock is racy.
163 * So what. It saves a spin_lock. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200164 if (drbd_pp_vacant >= number) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700165 spin_lock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200166 page = page_chain_del(&drbd_pp_pool, number);
167 if (page)
168 drbd_pp_vacant -= number;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700169 spin_unlock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200170 if (page)
171 return page;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700172 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200173
Philipp Reisnerb411b362009-09-25 16:07:19 -0700174 /* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD
175 * "criss-cross" setup, that might cause write-out on some other DRBD,
176 * which in turn might block on the other node at this very place. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200177 for (i = 0; i < number; i++) {
178 tmp = alloc_page(GFP_TRY);
179 if (!tmp)
180 break;
181 set_page_private(tmp, (unsigned long)page);
182 page = tmp;
183 }
184
185 if (i == number)
186 return page;
187
188 /* Not enough pages immediately available this time.
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200189 * No need to jump around here, drbd_alloc_pages will retry this
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200190 * function "soon". */
191 if (page) {
192 tmp = page_chain_tail(page, NULL);
193 spin_lock(&drbd_pp_lock);
194 page_chain_add(&drbd_pp_pool, page, tmp);
195 drbd_pp_vacant += i;
196 spin_unlock(&drbd_pp_lock);
197 }
198 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700199}
200
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200201static void reclaim_finished_net_peer_reqs(struct drbd_device *device,
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200202 struct list_head *to_be_freed)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700203{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200204 struct drbd_peer_request *peer_req, *tmp;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700205
206 /* The EEs are always appended to the end of the list. Since
207 they are sent in order over the wire, they have to finish
208 in order. As soon as we see the first not finished we can
209 stop to examine the list... */
210
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200211 list_for_each_entry_safe(peer_req, tmp, &device->net_ee, w.list) {
Andreas Gruenbacher045417f2011-04-07 21:34:24 +0200212 if (drbd_peer_req_has_active_page(peer_req))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700213 break;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200214 list_move(&peer_req->w.list, to_be_freed);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700215 }
216}
217
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200218static void drbd_kick_lo_and_reclaim_net(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700219{
220 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100221 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700222
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200223 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200224 reclaim_finished_net_peer_reqs(device, &reclaimed);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200225 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700226
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200227 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200228 drbd_free_net_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700229}
230
231/**
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200232 * drbd_alloc_pages() - Returns @number pages, retries forever (or until signalled)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200233 * @device: DRBD device.
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200234 * @number: number of pages requested
235 * @retry: whether to retry, if not enough pages are available right now
Philipp Reisnerb411b362009-09-25 16:07:19 -0700236 *
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200237 * Tries to allocate number pages, first from our own page pool, then from
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200238 * the kernel.
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200239 * Possibly retry until DRBD frees sufficient pages somewhere else.
240 *
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200241 * If this allocation would exceed the max_buffers setting, we throttle
242 * allocation (schedule_timeout) to give the system some room to breathe.
243 *
244 * We do not use max-buffers as hard limit, because it could lead to
245 * congestion and further to a distributed deadlock during online-verify or
246 * (checksum based) resync, if the max-buffers, socket buffer sizes and
247 * resync-rate settings are mis-configured.
248 *
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200249 * Returns a page chain linked via page->private.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700250 */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200251struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int number,
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200252 bool retry)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700253{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200254 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700255 struct page *page = NULL;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200256 struct net_conf *nc;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700257 DEFINE_WAIT(wait);
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200258 unsigned int mxb;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700259
Philipp Reisner44ed1672011-04-19 17:10:19 +0200260 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200261 nc = rcu_dereference(peer_device->connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200262 mxb = nc ? nc->max_buffers : 1000000;
263 rcu_read_unlock();
264
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200265 if (atomic_read(&device->pp_in_use) < mxb)
266 page = __drbd_alloc_pages(device, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700267
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200268 while (page == NULL) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700269 prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
270
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200271 drbd_kick_lo_and_reclaim_net(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700272
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200273 if (atomic_read(&device->pp_in_use) < mxb) {
274 page = __drbd_alloc_pages(device, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700275 if (page)
276 break;
277 }
278
279 if (!retry)
280 break;
281
282 if (signal_pending(current)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200283 drbd_warn(device, "drbd_alloc_pages interrupted!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700284 break;
285 }
286
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200287 if (schedule_timeout(HZ/10) == 0)
288 mxb = UINT_MAX;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700289 }
290 finish_wait(&drbd_pp_wait, &wait);
291
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200292 if (page)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200293 atomic_add(number, &device->pp_in_use);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700294 return page;
295}
296
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200297/* Must not be used from irq, as that may deadlock: see drbd_alloc_pages.
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200298 * Is also used from inside an other spin_lock_irq(&resource->req_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200299 * Either links the page chain back to the global pool,
300 * or returns all pages to the system. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200301static void drbd_free_pages(struct drbd_device *device, struct page *page, int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700302{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200303 atomic_t *a = is_net ? &device->pp_in_use_by_net : &device->pp_in_use;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700304 int i;
Lars Ellenberg435f0742010-09-06 12:30:25 +0200305
Lars Ellenberga73ff322012-06-25 19:15:38 +0200306 if (page == NULL)
307 return;
308
Philipp Reisner81a5d602011-02-22 19:53:16 -0500309 if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200310 i = page_chain_free(page);
311 else {
312 struct page *tmp;
313 tmp = page_chain_tail(page, &i);
314 spin_lock(&drbd_pp_lock);
315 page_chain_add(&drbd_pp_pool, page, tmp);
316 drbd_pp_vacant += i;
317 spin_unlock(&drbd_pp_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700318 }
Lars Ellenberg435f0742010-09-06 12:30:25 +0200319 i = atomic_sub_return(i, a);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200320 if (i < 0)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200321 drbd_warn(device, "ASSERTION FAILED: %s: %d < 0\n",
Lars Ellenberg435f0742010-09-06 12:30:25 +0200322 is_net ? "pp_in_use_by_net" : "pp_in_use", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700323 wake_up(&drbd_pp_wait);
324}
325
326/*
327You need to hold the req_lock:
328 _drbd_wait_ee_list_empty()
329
330You must not have the req_lock:
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +0200331 drbd_free_peer_req()
Andreas Gruenbacher0db55362011-04-06 16:09:15 +0200332 drbd_alloc_peer_req()
Andreas Gruenbacher7721f562011-04-06 17:14:02 +0200333 drbd_free_peer_reqs()
Philipp Reisnerb411b362009-09-25 16:07:19 -0700334 drbd_ee_fix_bhs()
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200335 drbd_finish_peer_reqs()
Philipp Reisnerb411b362009-09-25 16:07:19 -0700336 drbd_clear_done_ee()
337 drbd_wait_ee_list_empty()
338*/
339
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100340struct drbd_peer_request *
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200341drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
Lars Ellenberga0fb3c42014-04-28 18:43:23 +0200342 unsigned int data_size, bool has_payload, gfp_t gfp_mask) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700343{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200344 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100345 struct drbd_peer_request *peer_req;
Lars Ellenberga73ff322012-06-25 19:15:38 +0200346 struct page *page = NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200347 unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700348
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200349 if (drbd_insert_fault(device, DRBD_FAULT_AL_EE))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700350 return NULL;
351
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100352 peer_req = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
353 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700354 if (!(gfp_mask & __GFP_NOWARN))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200355 drbd_err(device, "%s: allocation failed\n", __func__);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700356 return NULL;
357 }
358
Lars Ellenberga0fb3c42014-04-28 18:43:23 +0200359 if (has_payload && data_size) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200360 page = drbd_alloc_pages(peer_device, nr_pages, (gfp_mask & __GFP_WAIT));
Lars Ellenberga73ff322012-06-25 19:15:38 +0200361 if (!page)
362 goto fail;
363 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700364
Lars Ellenbergc5a2c152014-05-08 10:08:05 +0200365 memset(peer_req, 0, sizeof(*peer_req));
366 INIT_LIST_HEAD(&peer_req->w.list);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100367 drbd_clear_interval(&peer_req->i);
368 peer_req->i.size = data_size;
369 peer_req->i.sector = sector;
Lars Ellenbergc5a2c152014-05-08 10:08:05 +0200370 peer_req->submit_jif = jiffies;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200371 peer_req->peer_device = peer_device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100372 peer_req->pages = page;
Andreas Gruenbacher9a8e7752011-01-11 14:04:09 +0100373 /*
374 * The block_id is opaque to the receiver. It is not endianness
375 * converted, and sent back to the sender unchanged.
376 */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100377 peer_req->block_id = id;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700378
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100379 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700380
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200381 fail:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100382 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700383 return NULL;
384}
385
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200386void __drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request *peer_req,
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100387 int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700388{
Lars Ellenberg21ae5d72014-05-05 23:42:24 +0200389 might_sleep();
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100390 if (peer_req->flags & EE_HAS_DIGEST)
391 kfree(peer_req->digest);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200392 drbd_free_pages(device, peer_req->pages, is_net);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +0200393 D_ASSERT(device, atomic_read(&peer_req->pending_bios) == 0);
394 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
Lars Ellenberg21ae5d72014-05-05 23:42:24 +0200395 if (!expect(!(peer_req->flags & EE_CALL_AL_COMPLETE_IO))) {
396 peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO;
397 drbd_al_complete_io(device, &peer_req->i);
398 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100399 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700400}
401
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200402int drbd_free_peer_reqs(struct drbd_device *device, struct list_head *list)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700403{
404 LIST_HEAD(work_list);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100405 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700406 int count = 0;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200407 int is_net = list == &device->net_ee;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700408
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200409 spin_lock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700410 list_splice_init(list, &work_list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200411 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700412
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200413 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200414 __drbd_free_peer_req(device, peer_req, is_net);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700415 count++;
416 }
417 return count;
418}
419
Philipp Reisnerb411b362009-09-25 16:07:19 -0700420/*
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200421 * See also comments in _req_mod(,BARRIER_ACKED) and receive_Barrier.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700422 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200423static int drbd_finish_peer_reqs(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700424{
425 LIST_HEAD(work_list);
426 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100427 struct drbd_peer_request *peer_req, *t;
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100428 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700429
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200430 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200431 reclaim_finished_net_peer_reqs(device, &reclaimed);
432 list_splice_init(&device->done_ee, &work_list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200433 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700434
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200435 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200436 drbd_free_net_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700437
438 /* possible callbacks here:
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +0200439 * e_end_block, and e_end_resync_block, e_send_superseded.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700440 * all ignore the last argument.
441 */
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200442 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100443 int err2;
444
Philipp Reisnerb411b362009-09-25 16:07:19 -0700445 /* list_del not necessary, next/prev members not touched */
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200446 err2 = peer_req->w.cb(&peer_req->w, !!err);
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100447 if (!err)
448 err = err2;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200449 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700450 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200451 wake_up(&device->ee_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700452
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100453 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700454}
455
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200456static void _drbd_wait_ee_list_empty(struct drbd_device *device,
Andreas Gruenbacherd4da1532011-04-07 00:06:56 +0200457 struct list_head *head)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700458{
459 DEFINE_WAIT(wait);
460
461 /* avoids spin_lock/unlock
462 * and calling prepare_to_wait in the fast path */
463 while (!list_empty(head)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200464 prepare_to_wait(&device->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200465 spin_unlock_irq(&device->resource->req_lock);
Jens Axboe7eaceac2011-03-10 08:52:07 +0100466 io_schedule();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200467 finish_wait(&device->ee_wait, &wait);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200468 spin_lock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700469 }
470}
471
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200472static void drbd_wait_ee_list_empty(struct drbd_device *device,
Andreas Gruenbacherd4da1532011-04-07 00:06:56 +0200473 struct list_head *head)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700474{
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200475 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200476 _drbd_wait_ee_list_empty(device, head);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200477 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700478}
479
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100480static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700481{
Philipp Reisnerb411b362009-09-25 16:07:19 -0700482 struct kvec iov = {
483 .iov_base = buf,
484 .iov_len = size,
485 };
486 struct msghdr msg = {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700487 .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
488 };
Al Virof730c842014-02-08 21:07:38 -0500489 return kernel_recvmsg(sock, &msg, &iov, 1, size, msg.msg_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700490}
491
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200492static int drbd_recv(struct drbd_connection *connection, void *buf, size_t size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700493{
Philipp Reisnerb411b362009-09-25 16:07:19 -0700494 int rv;
495
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200496 rv = drbd_recv_short(connection->data.socket, buf, size, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700497
Philipp Reisnerdbd08202012-08-17 16:55:47 +0200498 if (rv < 0) {
499 if (rv == -ECONNRESET)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200500 drbd_info(connection, "sock was reset by peer\n");
Philipp Reisnerdbd08202012-08-17 16:55:47 +0200501 else if (rv != -ERESTARTSYS)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200502 drbd_err(connection, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerdbd08202012-08-17 16:55:47 +0200503 } else if (rv == 0) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200504 if (test_bit(DISCONNECT_SENT, &connection->flags)) {
Philipp Reisnerb66623e2012-08-08 21:19:09 +0200505 long t;
506 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200507 t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
Philipp Reisnerb66623e2012-08-08 21:19:09 +0200508 rcu_read_unlock();
509
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200510 t = wait_event_timeout(connection->ping_wait, connection->cstate < C_WF_REPORT_PARAMS, t);
Philipp Reisnerb66623e2012-08-08 21:19:09 +0200511
Philipp Reisner599377a2012-08-17 14:50:22 +0200512 if (t)
513 goto out;
514 }
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200515 drbd_info(connection, "sock was shut down by peer\n");
Philipp Reisner599377a2012-08-17 14:50:22 +0200516 }
517
Philipp Reisnerb411b362009-09-25 16:07:19 -0700518 if (rv != size)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200519 conn_request_state(connection, NS(conn, C_BROKEN_PIPE), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700520
Philipp Reisner599377a2012-08-17 14:50:22 +0200521out:
Philipp Reisnerb411b362009-09-25 16:07:19 -0700522 return rv;
523}
524
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200525static int drbd_recv_all(struct drbd_connection *connection, void *buf, size_t size)
Andreas Gruenbacherc6967742011-03-17 17:15:20 +0100526{
527 int err;
528
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200529 err = drbd_recv(connection, buf, size);
Andreas Gruenbacherc6967742011-03-17 17:15:20 +0100530 if (err != size) {
531 if (err >= 0)
532 err = -EIO;
533 } else
534 err = 0;
535 return err;
536}
537
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200538static int drbd_recv_all_warn(struct drbd_connection *connection, void *buf, size_t size)
Andreas Gruenbachera5c31902011-03-24 03:28:04 +0100539{
540 int err;
541
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200542 err = drbd_recv_all(connection, buf, size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +0100543 if (err && !signal_pending(current))
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200544 drbd_warn(connection, "short read (expected size %d)\n", (int)size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +0100545 return err;
546}
547
Lars Ellenberg5dbf1672010-05-25 16:18:01 +0200548/* quoting tcp(7):
549 * On individual connections, the socket buffer size must be set prior to the
550 * listen(2) or connect(2) calls in order to have it take effect.
551 * This is our wrapper to do so.
552 */
553static void drbd_setbufsize(struct socket *sock, unsigned int snd,
554 unsigned int rcv)
555{
556 /* open coded SO_SNDBUF, SO_RCVBUF */
557 if (snd) {
558 sock->sk->sk_sndbuf = snd;
559 sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
560 }
561 if (rcv) {
562 sock->sk->sk_rcvbuf = rcv;
563 sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
564 }
565}
566
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200567static struct socket *drbd_try_connect(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700568{
569 const char *what;
570 struct socket *sock;
571 struct sockaddr_in6 src_in6;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200572 struct sockaddr_in6 peer_in6;
573 struct net_conf *nc;
574 int err, peer_addr_len, my_addr_len;
Andreas Gruenbacher69ef82d2011-05-11 14:34:35 +0200575 int sndbuf_size, rcvbuf_size, connect_int;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700576 int disconnect_on_error = 1;
577
Philipp Reisner44ed1672011-04-19 17:10:19 +0200578 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200579 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200580 if (!nc) {
581 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700582 return NULL;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200583 }
Philipp Reisner44ed1672011-04-19 17:10:19 +0200584 sndbuf_size = nc->sndbuf_size;
585 rcvbuf_size = nc->rcvbuf_size;
Andreas Gruenbacher69ef82d2011-05-11 14:34:35 +0200586 connect_int = nc->connect_int;
Andreas Gruenbacher089c0752011-06-14 18:28:09 +0200587 rcu_read_unlock();
Philipp Reisner44ed1672011-04-19 17:10:19 +0200588
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200589 my_addr_len = min_t(int, connection->my_addr_len, sizeof(src_in6));
590 memcpy(&src_in6, &connection->my_addr, my_addr_len);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200591
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200592 if (((struct sockaddr *)&connection->my_addr)->sa_family == AF_INET6)
Philipp Reisner44ed1672011-04-19 17:10:19 +0200593 src_in6.sin6_port = 0;
594 else
595 ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
596
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200597 peer_addr_len = min_t(int, connection->peer_addr_len, sizeof(src_in6));
598 memcpy(&peer_in6, &connection->peer_addr, peer_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700599
600 what = "sock_create_kern";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200601 err = sock_create_kern(((struct sockaddr *)&src_in6)->sa_family,
602 SOCK_STREAM, IPPROTO_TCP, &sock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700603 if (err < 0) {
604 sock = NULL;
605 goto out;
606 }
607
608 sock->sk->sk_rcvtimeo =
Andreas Gruenbacher69ef82d2011-05-11 14:34:35 +0200609 sock->sk->sk_sndtimeo = connect_int * HZ;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200610 drbd_setbufsize(sock, sndbuf_size, rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700611
612 /* explicitly bind to the configured IP as source IP
613 * for the outgoing connections.
614 * This is needed for multihomed hosts and to be
615 * able to use lo: interfaces for drbd.
616 * Make sure to use 0 as port number, so linux selects
617 * a free one dynamically.
618 */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700619 what = "bind before connect";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200620 err = sock->ops->bind(sock, (struct sockaddr *) &src_in6, my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700621 if (err < 0)
622 goto out;
623
624 /* connect may fail, peer not yet available.
625 * stay C_WF_CONNECTION, don't go Disconnecting! */
626 disconnect_on_error = 0;
627 what = "connect";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200628 err = sock->ops->connect(sock, (struct sockaddr *) &peer_in6, peer_addr_len, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700629
630out:
631 if (err < 0) {
632 if (sock) {
633 sock_release(sock);
634 sock = NULL;
635 }
636 switch (-err) {
637 /* timeout, busy, signal pending */
638 case ETIMEDOUT: case EAGAIN: case EINPROGRESS:
639 case EINTR: case ERESTARTSYS:
640 /* peer not (yet) available, network problem */
641 case ECONNREFUSED: case ENETUNREACH:
642 case EHOSTDOWN: case EHOSTUNREACH:
643 disconnect_on_error = 0;
644 break;
645 default:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200646 drbd_err(connection, "%s failed, err = %d\n", what, err);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700647 }
648 if (disconnect_on_error)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200649 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700650 }
Philipp Reisner44ed1672011-04-19 17:10:19 +0200651
Philipp Reisnerb411b362009-09-25 16:07:19 -0700652 return sock;
653}
654
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200655struct accept_wait_data {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200656 struct drbd_connection *connection;
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200657 struct socket *s_listen;
658 struct completion door_bell;
659 void (*original_sk_state_change)(struct sock *sk);
660
661};
662
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200663static void drbd_incoming_connection(struct sock *sk)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700664{
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200665 struct accept_wait_data *ad = sk->sk_user_data;
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200666 void (*state_change)(struct sock *sk);
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200667
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200668 state_change = ad->original_sk_state_change;
669 if (sk->sk_state == TCP_ESTABLISHED)
670 complete(&ad->door_bell);
671 state_change(sk);
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200672}
673
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200674static int prepare_listen_socket(struct drbd_connection *connection, struct accept_wait_data *ad)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700675{
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200676 int err, sndbuf_size, rcvbuf_size, my_addr_len;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200677 struct sockaddr_in6 my_addr;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200678 struct socket *s_listen;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200679 struct net_conf *nc;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700680 const char *what;
681
Philipp Reisner44ed1672011-04-19 17:10:19 +0200682 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200683 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200684 if (!nc) {
685 rcu_read_unlock();
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200686 return -EIO;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200687 }
Philipp Reisner44ed1672011-04-19 17:10:19 +0200688 sndbuf_size = nc->sndbuf_size;
689 rcvbuf_size = nc->rcvbuf_size;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200690 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700691
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200692 my_addr_len = min_t(int, connection->my_addr_len, sizeof(struct sockaddr_in6));
693 memcpy(&my_addr, &connection->my_addr, my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700694
695 what = "sock_create_kern";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200696 err = sock_create_kern(((struct sockaddr *)&my_addr)->sa_family,
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200697 SOCK_STREAM, IPPROTO_TCP, &s_listen);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700698 if (err) {
699 s_listen = NULL;
700 goto out;
701 }
702
Philipp Reisner98683652012-11-09 14:18:43 +0100703 s_listen->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
Philipp Reisner44ed1672011-04-19 17:10:19 +0200704 drbd_setbufsize(s_listen, sndbuf_size, rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700705
706 what = "bind before listen";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200707 err = s_listen->ops->bind(s_listen, (struct sockaddr *)&my_addr, my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700708 if (err < 0)
709 goto out;
710
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200711 ad->s_listen = s_listen;
712 write_lock_bh(&s_listen->sk->sk_callback_lock);
713 ad->original_sk_state_change = s_listen->sk->sk_state_change;
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200714 s_listen->sk->sk_state_change = drbd_incoming_connection;
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200715 s_listen->sk->sk_user_data = ad;
716 write_unlock_bh(&s_listen->sk->sk_callback_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700717
Philipp Reisner2820fd32012-07-12 10:22:48 +0200718 what = "listen";
719 err = s_listen->ops->listen(s_listen, 5);
720 if (err < 0)
721 goto out;
722
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200723 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700724out:
725 if (s_listen)
726 sock_release(s_listen);
727 if (err < 0) {
728 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200729 drbd_err(connection, "%s failed, err = %d\n", what, err);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200730 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700731 }
732 }
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200733
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200734 return -EIO;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200735}
736
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200737static void unregister_state_change(struct sock *sk, struct accept_wait_data *ad)
738{
739 write_lock_bh(&sk->sk_callback_lock);
740 sk->sk_state_change = ad->original_sk_state_change;
741 sk->sk_user_data = NULL;
742 write_unlock_bh(&sk->sk_callback_lock);
743}
744
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200745static struct socket *drbd_wait_for_connect(struct drbd_connection *connection, struct accept_wait_data *ad)
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200746{
747 int timeo, connect_int, err = 0;
748 struct socket *s_estab = NULL;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200749 struct net_conf *nc;
750
751 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200752 nc = rcu_dereference(connection->net_conf);
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200753 if (!nc) {
754 rcu_read_unlock();
755 return NULL;
756 }
757 connect_int = nc->connect_int;
758 rcu_read_unlock();
759
760 timeo = connect_int * HZ;
Akinobu Mita38b682b22013-04-29 16:21:31 -0700761 /* 28.5% random jitter */
762 timeo += (prandom_u32() & 1) ? timeo / 7 : -timeo / 7;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200763
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200764 err = wait_for_completion_interruptible_timeout(&ad->door_bell, timeo);
765 if (err <= 0)
766 return NULL;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200767
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200768 err = kernel_accept(ad->s_listen, &s_estab, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700769 if (err < 0) {
770 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200771 drbd_err(connection, "accept failed, err = %d\n", err);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200772 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700773 }
774 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700775
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200776 if (s_estab)
777 unregister_state_change(s_estab->sk, ad);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700778
779 return s_estab;
780}
781
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200782static int decode_header(struct drbd_connection *, void *, struct packet_info *);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700783
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200784static int send_first_packet(struct drbd_connection *connection, struct drbd_socket *sock,
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200785 enum drbd_packet cmd)
786{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200787 if (!conn_prepare_command(connection, sock))
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200788 return -EIO;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200789 return conn_send_command(connection, sock, cmd, 0, NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700790}
791
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200792static int receive_first_packet(struct drbd_connection *connection, struct socket *sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700793{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200794 unsigned int header_size = drbd_header_size(connection);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200795 struct packet_info pi;
Philipp Reisner4920e372014-03-18 14:40:13 +0100796 struct net_conf *nc;
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200797 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700798
Philipp Reisner4920e372014-03-18 14:40:13 +0100799 rcu_read_lock();
800 nc = rcu_dereference(connection->net_conf);
801 if (!nc) {
802 rcu_read_unlock();
803 return -EIO;
804 }
805 sock->sk->sk_rcvtimeo = nc->ping_timeo * 4 * HZ / 10;
806 rcu_read_unlock();
807
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200808 err = drbd_recv_short(sock, connection->data.rbuf, header_size, 0);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200809 if (err != header_size) {
810 if (err >= 0)
811 err = -EIO;
812 return err;
813 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200814 err = decode_header(connection, connection->data.rbuf, &pi);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200815 if (err)
816 return err;
817 return pi.cmd;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700818}
819
820/**
821 * drbd_socket_okay() - Free the socket if its connection is not okay
Philipp Reisnerb411b362009-09-25 16:07:19 -0700822 * @sock: pointer to the pointer to the socket.
823 */
Philipp Reisner5d0b17f2014-03-18 14:24:35 +0100824static bool drbd_socket_okay(struct socket **sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700825{
826 int rr;
827 char tb[4];
828
829 if (!*sock)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100830 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700831
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100832 rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700833
834 if (rr > 0 || rr == -EAGAIN) {
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100835 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700836 } else {
837 sock_release(*sock);
838 *sock = NULL;
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100839 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700840 }
841}
Philipp Reisner5d0b17f2014-03-18 14:24:35 +0100842
843static bool connection_established(struct drbd_connection *connection,
844 struct socket **sock1,
845 struct socket **sock2)
846{
847 struct net_conf *nc;
848 int timeout;
849 bool ok;
850
851 if (!*sock1 || !*sock2)
852 return false;
853
854 rcu_read_lock();
855 nc = rcu_dereference(connection->net_conf);
856 timeout = (nc->sock_check_timeo ?: nc->ping_timeo) * HZ / 10;
857 rcu_read_unlock();
858 schedule_timeout_interruptible(timeout);
859
860 ok = drbd_socket_okay(sock1);
861 ok = drbd_socket_okay(sock2) && ok;
862
863 return ok;
864}
865
Philipp Reisner2325eb62011-03-15 16:56:18 +0100866/* Gets called if a connection is established, or if a new minor gets created
867 in a connection */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200868int drbd_connected(struct drbd_peer_device *peer_device)
Philipp Reisner907599e2011-02-08 11:25:37 +0100869{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200870 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100871 int err;
Philipp Reisner907599e2011-02-08 11:25:37 +0100872
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200873 atomic_set(&device->packet_seq, 0);
874 device->peer_seq = 0;
Philipp Reisner907599e2011-02-08 11:25:37 +0100875
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200876 device->state_mutex = peer_device->connection->agreed_pro_version < 100 ?
877 &peer_device->connection->cstate_mutex :
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200878 &device->own_state_mutex;
Philipp Reisner8410da82011-02-11 20:11:10 +0100879
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200880 err = drbd_send_sync_param(peer_device);
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100881 if (!err)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200882 err = drbd_send_sizes(peer_device, 0, 0);
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100883 if (!err)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200884 err = drbd_send_uuids(peer_device);
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100885 if (!err)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200886 err = drbd_send_current_state(peer_device);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200887 clear_bit(USE_DEGR_WFC_T, &device->flags);
888 clear_bit(RESIZE_PENDING, &device->flags);
889 atomic_set(&device->ap_in_flight, 0);
890 mod_timer(&device->request_timer, jiffies + HZ); /* just start it here. */
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100891 return err;
Philipp Reisner907599e2011-02-08 11:25:37 +0100892}
Philipp Reisnerb411b362009-09-25 16:07:19 -0700893
894/*
895 * return values:
896 * 1 yes, we have a valid connection
897 * 0 oops, did not work out, please try again
898 * -1 peer talks different language,
899 * no point in trying again, please go standalone.
900 * -2 We do not have a network config...
901 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200902static int conn_connect(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700903{
Philipp Reisner7da35862011-12-19 22:42:56 +0100904 struct drbd_socket sock, msock;
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +0200905 struct drbd_peer_device *peer_device;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200906 struct net_conf *nc;
Philipp Reisner5d0b17f2014-03-18 14:24:35 +0100907 int vnr, timeout, h;
908 bool discard_my_data, ok;
Philipp Reisner197296f2012-03-26 16:47:11 +0200909 enum drbd_state_rv rv;
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200910 struct accept_wait_data ad = {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200911 .connection = connection,
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200912 .door_bell = COMPLETION_INITIALIZER_ONSTACK(ad.door_bell),
913 };
Philipp Reisnerb411b362009-09-25 16:07:19 -0700914
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200915 clear_bit(DISCONNECT_SENT, &connection->flags);
916 if (conn_request_state(connection, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700917 return -2;
918
Philipp Reisner7da35862011-12-19 22:42:56 +0100919 mutex_init(&sock.mutex);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200920 sock.sbuf = connection->data.sbuf;
921 sock.rbuf = connection->data.rbuf;
Philipp Reisner7da35862011-12-19 22:42:56 +0100922 sock.socket = NULL;
923 mutex_init(&msock.mutex);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200924 msock.sbuf = connection->meta.sbuf;
925 msock.rbuf = connection->meta.rbuf;
Philipp Reisner7da35862011-12-19 22:42:56 +0100926 msock.socket = NULL;
927
Andreas Gruenbacher0916e0e2011-03-21 14:10:15 +0100928 /* Assume that the peer only understands protocol 80 until we know better. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200929 connection->agreed_pro_version = 80;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700930
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200931 if (prepare_listen_socket(connection, &ad))
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200932 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700933
934 do {
Andreas Gruenbacher2bf89622011-03-28 16:33:12 +0200935 struct socket *s;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700936
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200937 s = drbd_try_connect(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700938 if (s) {
Philipp Reisner7da35862011-12-19 22:42:56 +0100939 if (!sock.socket) {
940 sock.socket = s;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200941 send_first_packet(connection, &sock, P_INITIAL_DATA);
Philipp Reisner7da35862011-12-19 22:42:56 +0100942 } else if (!msock.socket) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200943 clear_bit(RESOLVE_CONFLICTS, &connection->flags);
Philipp Reisner7da35862011-12-19 22:42:56 +0100944 msock.socket = s;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200945 send_first_packet(connection, &msock, P_INITIAL_META);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700946 } else {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200947 drbd_err(connection, "Logic error in conn_connect()\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700948 goto out_release_sockets;
949 }
950 }
951
Philipp Reisner5d0b17f2014-03-18 14:24:35 +0100952 if (connection_established(connection, &sock.socket, &msock.socket))
953 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700954
955retry:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200956 s = drbd_wait_for_connect(connection, &ad);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700957 if (s) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200958 int fp = receive_first_packet(connection, s);
Philipp Reisner7da35862011-12-19 22:42:56 +0100959 drbd_socket_okay(&sock.socket);
960 drbd_socket_okay(&msock.socket);
Philipp Reisner92f14952012-08-01 11:41:01 +0200961 switch (fp) {
Andreas Gruenbachere5d6f332011-03-28 16:44:40 +0200962 case P_INITIAL_DATA:
Philipp Reisner7da35862011-12-19 22:42:56 +0100963 if (sock.socket) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200964 drbd_warn(connection, "initial packet S crossed\n");
Philipp Reisner7da35862011-12-19 22:42:56 +0100965 sock_release(sock.socket);
Philipp Reisner80c6eed2012-08-01 14:53:39 +0200966 sock.socket = s;
967 goto randomize;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700968 }
Philipp Reisner7da35862011-12-19 22:42:56 +0100969 sock.socket = s;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700970 break;
Andreas Gruenbachere5d6f332011-03-28 16:44:40 +0200971 case P_INITIAL_META:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200972 set_bit(RESOLVE_CONFLICTS, &connection->flags);
Philipp Reisner7da35862011-12-19 22:42:56 +0100973 if (msock.socket) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200974 drbd_warn(connection, "initial packet M crossed\n");
Philipp Reisner7da35862011-12-19 22:42:56 +0100975 sock_release(msock.socket);
Philipp Reisner80c6eed2012-08-01 14:53:39 +0200976 msock.socket = s;
977 goto randomize;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700978 }
Philipp Reisner7da35862011-12-19 22:42:56 +0100979 msock.socket = s;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700980 break;
981 default:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200982 drbd_warn(connection, "Error receiving initial packet\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700983 sock_release(s);
Philipp Reisner80c6eed2012-08-01 14:53:39 +0200984randomize:
Akinobu Mita38b682b22013-04-29 16:21:31 -0700985 if (prandom_u32() & 1)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700986 goto retry;
987 }
988 }
989
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200990 if (connection->cstate <= C_DISCONNECTING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700991 goto out_release_sockets;
992 if (signal_pending(current)) {
993 flush_signals(current);
994 smp_rmb();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200995 if (get_t_state(&connection->receiver) == EXITING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700996 goto out_release_sockets;
997 }
998
Philipp Reisner5d0b17f2014-03-18 14:24:35 +0100999 ok = connection_established(connection, &sock.socket, &msock.socket);
Philipp Reisnerb666dbf2012-07-26 14:12:59 +02001000 } while (!ok);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001001
Philipp Reisner7a426fd2012-07-12 14:22:37 +02001002 if (ad.s_listen)
1003 sock_release(ad.s_listen);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001004
Philipp Reisner98683652012-11-09 14:18:43 +01001005 sock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
1006 msock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001007
Philipp Reisner7da35862011-12-19 22:42:56 +01001008 sock.socket->sk->sk_allocation = GFP_NOIO;
1009 msock.socket->sk->sk_allocation = GFP_NOIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001010
Philipp Reisner7da35862011-12-19 22:42:56 +01001011 sock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
1012 msock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001013
Philipp Reisnerb411b362009-09-25 16:07:19 -07001014 /* NOT YET ...
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001015 * sock.socket->sk->sk_sndtimeo = connection->net_conf->timeout*HZ/10;
Philipp Reisner7da35862011-12-19 22:42:56 +01001016 * sock.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
Andreas Gruenbacher60381782011-03-28 17:05:50 +02001017 * first set it to the P_CONNECTION_FEATURES timeout,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001018 * which we set to 4x the configured ping_timeout. */
Philipp Reisner44ed1672011-04-19 17:10:19 +02001019 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001020 nc = rcu_dereference(connection->net_conf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001021
Philipp Reisner7da35862011-12-19 22:42:56 +01001022 sock.socket->sk->sk_sndtimeo =
1023 sock.socket->sk->sk_rcvtimeo = nc->ping_timeo*4*HZ/10;
Philipp Reisner44ed1672011-04-19 17:10:19 +02001024
Philipp Reisner7da35862011-12-19 22:42:56 +01001025 msock.socket->sk->sk_rcvtimeo = nc->ping_int*HZ;
Philipp Reisner44ed1672011-04-19 17:10:19 +02001026 timeout = nc->timeout * HZ / 10;
Philipp Reisner08b165b2011-09-05 16:22:33 +02001027 discard_my_data = nc->discard_my_data;
Philipp Reisner44ed1672011-04-19 17:10:19 +02001028 rcu_read_unlock();
1029
Philipp Reisner7da35862011-12-19 22:42:56 +01001030 msock.socket->sk->sk_sndtimeo = timeout;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001031
1032 /* we don't want delays.
Lucas De Marchi25985ed2011-03-30 22:57:33 -03001033 * we use TCP_CORK where appropriate, though */
Philipp Reisner7da35862011-12-19 22:42:56 +01001034 drbd_tcp_nodelay(sock.socket);
1035 drbd_tcp_nodelay(msock.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001036
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001037 connection->data.socket = sock.socket;
1038 connection->meta.socket = msock.socket;
1039 connection->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001040
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001041 h = drbd_do_features(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001042 if (h <= 0)
1043 return h;
1044
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001045 if (connection->cram_hmac_tfm) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001046 /* drbd_request_state(device, NS(conn, WFAuth)); */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001047 switch (drbd_do_auth(connection)) {
Johannes Thomab10d96c2010-01-07 16:02:50 +01001048 case -1:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001049 drbd_err(connection, "Authentication of peer failed\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001050 return -1;
Johannes Thomab10d96c2010-01-07 16:02:50 +01001051 case 0:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001052 drbd_err(connection, "Authentication of peer failed, trying again.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01001053 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001054 }
1055 }
1056
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001057 connection->data.socket->sk->sk_sndtimeo = timeout;
1058 connection->data.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001059
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001060 if (drbd_send_protocol(connection) == -EOPNOTSUPP)
Philipp Reisner7e2455c2010-04-22 14:50:23 +02001061 return -1;
Philipp Reisner1e86ac42011-08-04 10:33:08 +02001062
Philipp Reisner31007742014-04-28 18:43:12 +02001063 /* Prevent a race between resync-handshake and
1064 * being promoted to Primary.
1065 *
1066 * Grab and release the state mutex, so we know that any current
1067 * drbd_set_role() is finished, and any incoming drbd_set_role
1068 * will see the STATE_SENT flag, and wait for it to be cleared.
1069 */
1070 idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
1071 mutex_lock(peer_device->device->state_mutex);
1072
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001073 set_bit(STATE_SENT, &connection->flags);
Philipp Reisner197296f2012-03-26 16:47:11 +02001074
Philipp Reisner31007742014-04-28 18:43:12 +02001075 idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
1076 mutex_unlock(peer_device->device->state_mutex);
1077
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02001078 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001079 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1080 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001081 kref_get(&device->kref);
Andreas Gruenbacher26ea8f92013-06-25 16:50:03 +02001082 rcu_read_unlock();
1083
Philipp Reisner08b165b2011-09-05 16:22:33 +02001084 if (discard_my_data)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001085 set_bit(DISCARD_MY_DATA, &device->flags);
Philipp Reisner08b165b2011-09-05 16:22:33 +02001086 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001087 clear_bit(DISCARD_MY_DATA, &device->flags);
Philipp Reisner08b165b2011-09-05 16:22:33 +02001088
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001089 drbd_connected(peer_device);
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001090 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02001091 rcu_read_lock();
1092 }
1093 rcu_read_unlock();
1094
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001095 rv = conn_request_state(connection, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE);
1096 if (rv < SS_SUCCESS || connection->cstate != C_WF_REPORT_PARAMS) {
1097 clear_bit(STATE_SENT, &connection->flags);
Philipp Reisner1e86ac42011-08-04 10:33:08 +02001098 return 0;
Philipp Reisnera1096a62012-04-06 12:07:34 +02001099 }
Philipp Reisner1e86ac42011-08-04 10:33:08 +02001100
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001101 drbd_thread_start(&connection->asender);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001102
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001103 mutex_lock(&connection->resource->conf_update);
Philipp Reisner08b165b2011-09-05 16:22:33 +02001104 /* The discard_my_data flag is a single-shot modifier to the next
1105 * connection attempt, the handshake of which is now well underway.
1106 * No need for rcu style copying of the whole struct
1107 * just to clear a single value. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001108 connection->net_conf->discard_my_data = 0;
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001109 mutex_unlock(&connection->resource->conf_update);
Philipp Reisner08b165b2011-09-05 16:22:33 +02001110
Philipp Reisnerd3fcb492011-04-13 14:46:05 -07001111 return h;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001112
1113out_release_sockets:
Philipp Reisner7a426fd2012-07-12 14:22:37 +02001114 if (ad.s_listen)
1115 sock_release(ad.s_listen);
Philipp Reisner7da35862011-12-19 22:42:56 +01001116 if (sock.socket)
1117 sock_release(sock.socket);
1118 if (msock.socket)
1119 sock_release(msock.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001120 return -1;
1121}
1122
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001123static int decode_header(struct drbd_connection *connection, void *header, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001124{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001125 unsigned int header_size = drbd_header_size(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001126
Andreas Gruenbacher0c8e36d2011-03-30 16:00:17 +02001127 if (header_size == sizeof(struct p_header100) &&
1128 *(__be32 *)header == cpu_to_be32(DRBD_MAGIC_100)) {
1129 struct p_header100 *h = header;
1130 if (h->pad != 0) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001131 drbd_err(connection, "Header padding is not zero\n");
Andreas Gruenbacher0c8e36d2011-03-30 16:00:17 +02001132 return -EINVAL;
1133 }
1134 pi->vnr = be16_to_cpu(h->volume);
1135 pi->cmd = be16_to_cpu(h->command);
1136 pi->size = be32_to_cpu(h->length);
1137 } else if (header_size == sizeof(struct p_header95) &&
1138 *(__be16 *)header == cpu_to_be16(DRBD_MAGIC_BIG)) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001139 struct p_header95 *h = header;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001140 pi->cmd = be16_to_cpu(h->command);
Andreas Gruenbacherb55d84b2011-03-22 13:17:47 +01001141 pi->size = be32_to_cpu(h->length);
1142 pi->vnr = 0;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001143 } else if (header_size == sizeof(struct p_header80) &&
1144 *(__be32 *)header == cpu_to_be32(DRBD_MAGIC)) {
1145 struct p_header80 *h = header;
1146 pi->cmd = be16_to_cpu(h->command);
1147 pi->size = be16_to_cpu(h->length);
Philipp Reisner77351055b2011-02-07 17:24:26 +01001148 pi->vnr = 0;
Philipp Reisner02918be2010-08-20 14:35:10 +02001149 } else {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001150 drbd_err(connection, "Wrong magic value 0x%08x in protocol version %d\n",
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001151 be32_to_cpu(*(__be32 *)header),
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001152 connection->agreed_pro_version);
Andreas Gruenbacher8172f3e2011-03-16 17:22:39 +01001153 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001154 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001155 pi->data = header + header_size;
Andreas Gruenbacher8172f3e2011-03-16 17:22:39 +01001156 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001157}
1158
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001159static int drbd_recv_header(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisner257d0af2011-01-26 12:15:29 +01001160{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001161 void *buffer = connection->data.rbuf;
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001162 int err;
Philipp Reisner257d0af2011-01-26 12:15:29 +01001163
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001164 err = drbd_recv_all_warn(connection, buffer, drbd_header_size(connection));
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001165 if (err)
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001166 return err;
Philipp Reisner257d0af2011-01-26 12:15:29 +01001167
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001168 err = decode_header(connection, buffer, pi);
1169 connection->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001170
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001171 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001172}
1173
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001174static void drbd_flush(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001175{
1176 int rv;
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001177 struct drbd_peer_device *peer_device;
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001178 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001179
Philipp Reisnere9526582013-11-22 15:53:41 +01001180 if (connection->resource->write_ordering >= WO_bdev_flush) {
Lars Ellenberg615e0872011-11-17 14:32:12 +01001181 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001182 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1183 struct drbd_device *device = peer_device->device;
1184
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001185 if (!get_ldev(device))
Lars Ellenberg615e0872011-11-17 14:32:12 +01001186 continue;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001187 kref_get(&device->kref);
Lars Ellenberg615e0872011-11-17 14:32:12 +01001188 rcu_read_unlock();
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001189
Lars Ellenbergf4188152014-05-05 23:05:47 +02001190 /* Right now, we have only this one synchronous code path
1191 * for flushes between request epochs.
1192 * We may want to make those asynchronous,
1193 * or at least parallelize the flushes to the volume devices.
1194 */
1195 device->flush_jif = jiffies;
1196 set_bit(FLUSH_PENDING, &device->flags);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001197 rv = blkdev_issue_flush(device->ldev->backing_bdev,
Lars Ellenberg615e0872011-11-17 14:32:12 +01001198 GFP_NOIO, NULL);
Lars Ellenbergf4188152014-05-05 23:05:47 +02001199 clear_bit(FLUSH_PENDING, &device->flags);
Lars Ellenberg615e0872011-11-17 14:32:12 +01001200 if (rv) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001201 drbd_info(device, "local disk flush failed with status %d\n", rv);
Lars Ellenberg615e0872011-11-17 14:32:12 +01001202 /* would rather check on EOPNOTSUPP, but that is not reliable.
1203 * don't try again for ANY return value != 0
1204 * if (rv == -EOPNOTSUPP) */
Philipp Reisner8fe39aa2013-11-22 13:22:13 +01001205 drbd_bump_write_ordering(connection->resource, NULL, WO_drain_io);
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001206 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001207 put_ldev(device);
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001208 kref_put(&device->kref, drbd_destroy_device);
Lars Ellenberg615e0872011-11-17 14:32:12 +01001209
1210 rcu_read_lock();
1211 if (rv)
1212 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001213 }
Lars Ellenberg615e0872011-11-17 14:32:12 +01001214 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07001215 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001216}
1217
1218/**
1219 * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it.
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001220 * @device: DRBD device.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001221 * @epoch: Epoch object.
1222 * @ev: Epoch event.
1223 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001224static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *connection,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001225 struct drbd_epoch *epoch,
1226 enum epoch_event ev)
1227{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001228 int epoch_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001229 struct drbd_epoch *next_epoch;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001230 enum finish_epoch rv = FE_STILL_LIVE;
1231
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001232 spin_lock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001233 do {
1234 next_epoch = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001235
1236 epoch_size = atomic_read(&epoch->epoch_size);
1237
1238 switch (ev & ~EV_CLEANUP) {
1239 case EV_PUT:
1240 atomic_dec(&epoch->active);
1241 break;
1242 case EV_GOT_BARRIER_NR:
1243 set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001244 break;
1245 case EV_BECAME_LAST:
1246 /* nothing to do*/
1247 break;
1248 }
1249
Philipp Reisnerb411b362009-09-25 16:07:19 -07001250 if (epoch_size != 0 &&
1251 atomic_read(&epoch->active) == 0 &&
Philipp Reisner80f9fd52011-07-18 15:45:15 +02001252 (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) || ev & EV_CLEANUP)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001253 if (!(ev & EV_CLEANUP)) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001254 spin_unlock(&connection->epoch_lock);
1255 drbd_send_b_ack(epoch->connection, epoch->barrier_nr, epoch_size);
1256 spin_lock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001257 }
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001258#if 0
1259 /* FIXME: dec unacked on connection, once we have
1260 * something to count pending connection packets in. */
Philipp Reisner80f9fd52011-07-18 15:45:15 +02001261 if (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags))
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001262 dec_unacked(epoch->connection);
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001263#endif
Philipp Reisnerb411b362009-09-25 16:07:19 -07001264
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001265 if (connection->current_epoch != epoch) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001266 next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
1267 list_del(&epoch->list);
1268 ev = EV_BECAME_LAST | (ev & EV_CLEANUP);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001269 connection->epochs--;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001270 kfree(epoch);
1271
1272 if (rv == FE_STILL_LIVE)
1273 rv = FE_DESTROYED;
1274 } else {
1275 epoch->flags = 0;
1276 atomic_set(&epoch->epoch_size, 0);
Uwe Kleine-König698f9312010-07-02 20:41:51 +02001277 /* atomic_set(&epoch->active, 0); is already zero */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001278 if (rv == FE_STILL_LIVE)
1279 rv = FE_RECYCLED;
1280 }
1281 }
1282
1283 if (!next_epoch)
1284 break;
1285
1286 epoch = next_epoch;
1287 } while (1);
1288
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001289 spin_unlock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001290
Philipp Reisnerb411b362009-09-25 16:07:19 -07001291 return rv;
1292}
1293
Philipp Reisner8fe39aa2013-11-22 13:22:13 +01001294static enum write_ordering_e
1295max_allowed_wo(struct drbd_backing_dev *bdev, enum write_ordering_e wo)
1296{
1297 struct disk_conf *dc;
1298
1299 dc = rcu_dereference(bdev->disk_conf);
1300
1301 if (wo == WO_bdev_flush && !dc->disk_flushes)
1302 wo = WO_drain_io;
1303 if (wo == WO_drain_io && !dc->disk_drain)
1304 wo = WO_none;
1305
1306 return wo;
1307}
1308
Philipp Reisnerb411b362009-09-25 16:07:19 -07001309/**
1310 * drbd_bump_write_ordering() - Fall back to an other write ordering method
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001311 * @connection: DRBD connection.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001312 * @wo: Write ordering method to try.
1313 */
Philipp Reisner8fe39aa2013-11-22 13:22:13 +01001314void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backing_dev *bdev,
1315 enum write_ordering_e wo)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001316{
Philipp Reisnere9526582013-11-22 15:53:41 +01001317 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001318 enum write_ordering_e pwo;
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001319 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001320 static char *write_ordering_str[] = {
1321 [WO_none] = "none",
1322 [WO_drain_io] = "drain",
1323 [WO_bdev_flush] = "flush",
Philipp Reisnerb411b362009-09-25 16:07:19 -07001324 };
1325
Philipp Reisnere9526582013-11-22 15:53:41 +01001326 pwo = resource->write_ordering;
Lars Ellenberg70df7092013-12-20 11:17:02 +01001327 if (wo != WO_bdev_flush)
1328 wo = min(pwo, wo);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001329 rcu_read_lock();
Philipp Reisnere9526582013-11-22 15:53:41 +01001330 idr_for_each_entry(&resource->devices, device, vnr) {
Philipp Reisner8fe39aa2013-11-22 13:22:13 +01001331 if (get_ldev(device)) {
1332 wo = max_allowed_wo(device->ldev, wo);
1333 if (device->ldev == bdev)
1334 bdev = NULL;
1335 put_ldev(device);
1336 }
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001337 }
Philipp Reisner8fe39aa2013-11-22 13:22:13 +01001338
1339 if (bdev)
1340 wo = max_allowed_wo(bdev, wo);
1341
Lars Ellenberg70df7092013-12-20 11:17:02 +01001342 rcu_read_unlock();
1343
Philipp Reisnere9526582013-11-22 15:53:41 +01001344 resource->write_ordering = wo;
1345 if (pwo != resource->write_ordering || wo == WO_bdev_flush)
1346 drbd_info(resource, "Method to ensure write ordering: %s\n", write_ordering_str[resource->write_ordering]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001347}
1348
1349/**
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001350 * drbd_submit_peer_request()
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001351 * @device: DRBD device.
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001352 * @peer_req: peer request
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001353 * @rw: flag field, see bio->bi_rw
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001354 *
1355 * May spread the pages to multiple bios,
1356 * depending on bio_add_page restrictions.
1357 *
1358 * Returns 0 if all bios have been submitted,
1359 * -ENOMEM if we could not allocate enough bios,
1360 * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a
1361 * single page to an empty bio (which should never happen and likely indicates
1362 * that the lower level IO stack is in some way broken). This has been observed
1363 * on certain Xen deployments.
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001364 */
1365/* TODO allocate from our own bio_set. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001366int drbd_submit_peer_request(struct drbd_device *device,
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001367 struct drbd_peer_request *peer_req,
1368 const unsigned rw, const int fault_type)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001369{
1370 struct bio *bios = NULL;
1371 struct bio *bio;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001372 struct page *page = peer_req->pages;
1373 sector_t sector = peer_req->i.sector;
Andreas Gruenbacher11f8b2b2014-09-11 14:29:05 +02001374 unsigned data_size = peer_req->i.size;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001375 unsigned n_bios = 0;
Andreas Gruenbacher11f8b2b2014-09-11 14:29:05 +02001376 unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001377 int err = -ENOMEM;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001378
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001379 if (peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) {
1380 /* wait for all pending IO completions, before we start
1381 * zeroing things out. */
1382 conn_wait_active_ee_empty(first_peer_device(device)->connection);
Lars Ellenberg45d29332014-04-23 12:25:23 +02001383 /* add it to the active list now,
1384 * so we can find it to present it in debugfs */
Lars Ellenberg21ae5d72014-05-05 23:42:24 +02001385 peer_req->submit_jif = jiffies;
1386 peer_req->flags |= EE_SUBMITTED;
Lars Ellenberg45d29332014-04-23 12:25:23 +02001387 spin_lock_irq(&device->resource->req_lock);
1388 list_add_tail(&peer_req->w.list, &device->active_ee);
1389 spin_unlock_irq(&device->resource->req_lock);
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001390 if (blkdev_issue_zeroout(device->ldev->backing_bdev,
Andreas Gruenbacher11f8b2b2014-09-11 14:29:05 +02001391 sector, data_size >> 9, GFP_NOIO))
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001392 peer_req->flags |= EE_WAS_ERROR;
1393 drbd_endio_write_sec_final(peer_req);
1394 return 0;
1395 }
1396
Lars Ellenberg54ed4ed2014-06-25 17:52:38 +02001397 /* Discards don't have any payload.
1398 * But the scsi layer still expects a bio_vec it can use internally,
1399 * see sd_setup_discard_cmnd() and blk_add_request_payload(). */
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001400 if (peer_req->flags & EE_IS_TRIM)
Lars Ellenberg54ed4ed2014-06-25 17:52:38 +02001401 nr_pages = 1;
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001402
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001403 /* In most cases, we will only need one bio. But in case the lower
1404 * level restrictions happen to be different at this offset on this
1405 * side than those of the sending peer, we may need to submit the
Lars Ellenberg9476f392011-02-23 17:02:01 +01001406 * request in more than one bio.
1407 *
1408 * Plain bio_alloc is good enough here, this is no DRBD internally
1409 * generated bio, but a bio allocated on behalf of the peer.
1410 */
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001411next_bio:
1412 bio = bio_alloc(GFP_NOIO, nr_pages);
1413 if (!bio) {
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001414 drbd_err(device, "submit_ee: Allocation of a bio failed (nr_pages=%u)\n", nr_pages);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001415 goto fail;
1416 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001417 /* > peer_req->i.sector, unless this is the first bio */
Kent Overstreet4f024f32013-10-11 15:44:27 -07001418 bio->bi_iter.bi_sector = sector;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001419 bio->bi_bdev = device->ldev->backing_bdev;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001420 bio->bi_rw = rw;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001421 bio->bi_private = peer_req;
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001422 bio->bi_end_io = drbd_peer_request_endio;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001423
1424 bio->bi_next = bios;
1425 bios = bio;
1426 ++n_bios;
1427
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001428 if (rw & REQ_DISCARD) {
Andreas Gruenbacher11f8b2b2014-09-11 14:29:05 +02001429 bio->bi_iter.bi_size = data_size;
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001430 goto submit;
1431 }
1432
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001433 page_chain_for_each(page) {
Andreas Gruenbacher11f8b2b2014-09-11 14:29:05 +02001434 unsigned len = min_t(unsigned, data_size, PAGE_SIZE);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001435 if (!bio_add_page(bio, page, len, 0)) {
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001436 /* A single page must always be possible!
1437 * But in case it fails anyways,
1438 * we deal with it, and complain (below). */
1439 if (bio->bi_vcnt == 0) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001440 drbd_err(device,
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001441 "bio_add_page failed for len=%u, "
1442 "bi_vcnt=0 (bi_sector=%llu)\n",
Kent Overstreet4f024f32013-10-11 15:44:27 -07001443 len, (uint64_t)bio->bi_iter.bi_sector);
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001444 err = -ENOSPC;
1445 goto fail;
1446 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001447 goto next_bio;
1448 }
Andreas Gruenbacher11f8b2b2014-09-11 14:29:05 +02001449 data_size -= len;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001450 sector += len >> 9;
1451 --nr_pages;
1452 }
Andreas Gruenbacher11f8b2b2014-09-11 14:29:05 +02001453 D_ASSERT(device, data_size == 0);
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001454submit:
1455 D_ASSERT(device, page == NULL);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001456
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001457 atomic_set(&peer_req->pending_bios, n_bios);
Lars Ellenberg21ae5d72014-05-05 23:42:24 +02001458 /* for debugfs: update timestamp, mark as submitted */
1459 peer_req->submit_jif = jiffies;
1460 peer_req->flags |= EE_SUBMITTED;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001461 do {
1462 bio = bios;
1463 bios = bios->bi_next;
1464 bio->bi_next = NULL;
1465
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001466 drbd_generic_make_request(device, fault_type, bio);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001467 } while (bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001468 return 0;
1469
1470fail:
1471 while (bios) {
1472 bio = bios;
1473 bios = bios->bi_next;
1474 bio_put(bio);
1475 }
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001476 return err;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001477}
1478
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001479static void drbd_remove_epoch_entry_interval(struct drbd_device *device,
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001480 struct drbd_peer_request *peer_req)
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001481{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001482 struct drbd_interval *i = &peer_req->i;
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001483
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001484 drbd_remove_interval(&device->write_requests, i);
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001485 drbd_clear_interval(i);
1486
Andreas Gruenbacher6c852be2011-02-04 15:38:52 +01001487 /* Wake up any processes waiting for this peer request to complete. */
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001488 if (i->waiting)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001489 wake_up(&device->misc_wait);
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001490}
1491
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001492static void conn_wait_active_ee_empty(struct drbd_connection *connection)
Philipp Reisner77fede52011-11-10 21:19:11 +01001493{
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001494 struct drbd_peer_device *peer_device;
Philipp Reisner77fede52011-11-10 21:19:11 +01001495 int vnr;
1496
1497 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001498 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1499 struct drbd_device *device = peer_device->device;
1500
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001501 kref_get(&device->kref);
Philipp Reisner77fede52011-11-10 21:19:11 +01001502 rcu_read_unlock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001503 drbd_wait_ee_list_empty(device, &device->active_ee);
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001504 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisner77fede52011-11-10 21:19:11 +01001505 rcu_read_lock();
1506 }
1507 rcu_read_unlock();
1508}
1509
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001510static struct drbd_peer_device *
1511conn_peer_device(struct drbd_connection *connection, int volume_number)
1512{
1513 return idr_find(&connection->peer_devices, volume_number);
1514}
1515
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001516static int receive_Barrier(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001517{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001518 int rv;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001519 struct p_barrier *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001520 struct drbd_epoch *epoch;
1521
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001522 /* FIXME these are unacked on connection,
1523 * not a specific (peer)device.
1524 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001525 connection->current_epoch->barrier_nr = p->barrier;
1526 connection->current_epoch->connection = connection;
1527 rv = drbd_may_finish_epoch(connection, connection->current_epoch, EV_GOT_BARRIER_NR);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001528
1529 /* P_BARRIER_ACK may imply that the corresponding extent is dropped from
1530 * the activity log, which means it would not be resynced in case the
1531 * R_PRIMARY crashes now.
1532 * Therefore we must send the barrier_ack after the barrier request was
1533 * completed. */
Philipp Reisnere9526582013-11-22 15:53:41 +01001534 switch (connection->resource->write_ordering) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001535 case WO_none:
1536 if (rv == FE_RECYCLED)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001537 return 0;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001538
1539 /* receiver context, in the writeout path of the other node.
1540 * avoid potential distributed deadlock */
1541 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1542 if (epoch)
1543 break;
1544 else
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001545 drbd_warn(connection, "Allocation of an epoch failed, slowing down\n");
Philipp Reisner2451fc32010-08-24 13:43:11 +02001546 /* Fall through */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001547
1548 case WO_bdev_flush:
1549 case WO_drain_io:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001550 conn_wait_active_ee_empty(connection);
1551 drbd_flush(connection);
Philipp Reisner2451fc32010-08-24 13:43:11 +02001552
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001553 if (atomic_read(&connection->current_epoch->epoch_size)) {
Philipp Reisner2451fc32010-08-24 13:43:11 +02001554 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1555 if (epoch)
1556 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001557 }
1558
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001559 return 0;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001560 default:
Philipp Reisnere9526582013-11-22 15:53:41 +01001561 drbd_err(connection, "Strangeness in connection->write_ordering %d\n",
1562 connection->resource->write_ordering);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001563 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001564 }
1565
1566 epoch->flags = 0;
1567 atomic_set(&epoch->epoch_size, 0);
1568 atomic_set(&epoch->active, 0);
1569
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001570 spin_lock(&connection->epoch_lock);
1571 if (atomic_read(&connection->current_epoch->epoch_size)) {
1572 list_add(&epoch->list, &connection->current_epoch->list);
1573 connection->current_epoch = epoch;
1574 connection->epochs++;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001575 } else {
1576 /* The current_epoch got recycled while we allocated this one... */
1577 kfree(epoch);
1578 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001579 spin_unlock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001580
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001581 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001582}
1583
1584/* used from receive_RSDataReply (recv_resync_read)
1585 * and from receive_Data */
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +01001586static struct drbd_peer_request *
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001587read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001588 struct packet_info *pi) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001589{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001590 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001591 const sector_t capacity = drbd_get_capacity(device->this_bdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001592 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001593 struct page *page;
Andreas Gruenbacher11f8b2b2014-09-11 14:29:05 +02001594 int digest_size, err;
1595 unsigned int data_size = pi->size, ds;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001596 void *dig_in = peer_device->connection->int_dig_in;
1597 void *dig_vv = peer_device->connection->int_dig_vv;
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001598 unsigned long *data;
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001599 struct p_trim *trim = (pi->cmd == P_TRIM) ? pi->data : NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001600
Andreas Gruenbacher11f8b2b2014-09-11 14:29:05 +02001601 digest_size = 0;
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001602 if (!trim && peer_device->connection->peer_integrity_tfm) {
Andreas Gruenbacher11f8b2b2014-09-11 14:29:05 +02001603 digest_size = crypto_hash_digestsize(peer_device->connection->peer_integrity_tfm);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001604 /*
1605 * FIXME: Receive the incoming digest into the receive buffer
1606 * here, together with its struct p_data?
1607 */
Andreas Gruenbacher11f8b2b2014-09-11 14:29:05 +02001608 err = drbd_recv_all_warn(peer_device->connection, dig_in, digest_size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001609 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001610 return NULL;
Andreas Gruenbacher11f8b2b2014-09-11 14:29:05 +02001611 data_size -= digest_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001612 }
1613
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001614 if (trim) {
1615 D_ASSERT(peer_device, data_size == 0);
1616 data_size = be32_to_cpu(trim->size);
1617 }
1618
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01001619 if (!expect(IS_ALIGNED(data_size, 512)))
1620 return NULL;
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001621 /* prepare for larger trim requests. */
1622 if (!trim && !expect(data_size <= DRBD_MAX_BIO_SIZE))
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01001623 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001624
Lars Ellenberg66660322010-04-06 12:15:04 +02001625 /* even though we trust out peer,
1626 * we sometimes have to double check. */
1627 if (sector + (data_size>>9) > capacity) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001628 drbd_err(device, "request from peer beyond end of local disk: "
Lars Ellenbergfdda6542011-01-24 15:11:01 +01001629 "capacity: %llus < sector: %llus + size: %u\n",
Lars Ellenberg66660322010-04-06 12:15:04 +02001630 (unsigned long long)capacity,
1631 (unsigned long long)sector, data_size);
1632 return NULL;
1633 }
1634
Philipp Reisnerb411b362009-09-25 16:07:19 -07001635 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
1636 * "criss-cross" setup, that might cause write-out on some other DRBD,
1637 * which in turn might block on the other node at this very place. */
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001638 peer_req = drbd_alloc_peer_req(peer_device, id, sector, data_size, trim == NULL, GFP_NOIO);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001639 if (!peer_req)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001640 return NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001641
Lars Ellenberg21ae5d72014-05-05 23:42:24 +02001642 peer_req->flags |= EE_WRITE;
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001643 if (trim)
Lars Ellenberg81a35372012-07-30 09:00:54 +02001644 return peer_req;
Lars Ellenberga73ff322012-06-25 19:15:38 +02001645
Philipp Reisnerb411b362009-09-25 16:07:19 -07001646 ds = data_size;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001647 page = peer_req->pages;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001648 page_chain_for_each(page) {
1649 unsigned len = min_t(int, ds, PAGE_SIZE);
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001650 data = kmap(page);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001651 err = drbd_recv_all_warn(peer_device->connection, data, len);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001652 if (drbd_insert_fault(device, DRBD_FAULT_RECEIVE)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001653 drbd_err(device, "Fault injection: Corrupting data on receive\n");
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001654 data[0] = data[0] ^ (unsigned long)-1;
1655 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001656 kunmap(page);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001657 if (err) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001658 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001659 return NULL;
1660 }
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001661 ds -= len;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001662 }
1663
Andreas Gruenbacher11f8b2b2014-09-11 14:29:05 +02001664 if (digest_size) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001665 drbd_csum_ee(peer_device->connection->peer_integrity_tfm, peer_req, dig_vv);
Andreas Gruenbacher11f8b2b2014-09-11 14:29:05 +02001666 if (memcmp(dig_in, dig_vv, digest_size)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001667 drbd_err(device, "Digest integrity check FAILED: %llus +%u\n",
Lars Ellenberg470be442010-11-10 10:36:52 +01001668 (unsigned long long)sector, data_size);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001669 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001670 return NULL;
1671 }
1672 }
Andreas Gruenbacher11f8b2b2014-09-11 14:29:05 +02001673 device->recv_cnt += data_size >> 9;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001674 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001675}
1676
1677/* drbd_drain_block() just takes a data block
1678 * out of the socket input buffer, and discards it.
1679 */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001680static int drbd_drain_block(struct drbd_peer_device *peer_device, int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001681{
1682 struct page *page;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001683 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001684 void *data;
1685
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001686 if (!data_size)
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001687 return 0;
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001688
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001689 page = drbd_alloc_pages(peer_device, 1, 1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001690
1691 data = kmap(page);
1692 while (data_size) {
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001693 unsigned int len = min_t(int, data_size, PAGE_SIZE);
1694
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001695 err = drbd_recv_all_warn(peer_device->connection, data, len);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001696 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001697 break;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001698 data_size -= len;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001699 }
1700 kunmap(page);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001701 drbd_free_pages(peer_device->device, page, 0);
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001702 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001703}
1704
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001705static int recv_dless_read(struct drbd_peer_device *peer_device, struct drbd_request *req,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001706 sector_t sector, int data_size)
1707{
Kent Overstreet79886132013-11-23 17:19:00 -08001708 struct bio_vec bvec;
1709 struct bvec_iter iter;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001710 struct bio *bio;
Andreas Gruenbacher11f8b2b2014-09-11 14:29:05 +02001711 int digest_size, err, expect;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001712 void *dig_in = peer_device->connection->int_dig_in;
1713 void *dig_vv = peer_device->connection->int_dig_vv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001714
Andreas Gruenbacher11f8b2b2014-09-11 14:29:05 +02001715 digest_size = 0;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001716 if (peer_device->connection->peer_integrity_tfm) {
Andreas Gruenbacher11f8b2b2014-09-11 14:29:05 +02001717 digest_size = crypto_hash_digestsize(peer_device->connection->peer_integrity_tfm);
1718 err = drbd_recv_all_warn(peer_device->connection, dig_in, digest_size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001719 if (err)
1720 return err;
Andreas Gruenbacher11f8b2b2014-09-11 14:29:05 +02001721 data_size -= digest_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001722 }
1723
Philipp Reisnerb411b362009-09-25 16:07:19 -07001724 /* optimistically update recv_cnt. if receiving fails below,
1725 * we disconnect anyways, and counters will be reset. */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001726 peer_device->device->recv_cnt += data_size>>9;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001727
1728 bio = req->master_bio;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001729 D_ASSERT(peer_device->device, sector == bio->bi_iter.bi_sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001730
Kent Overstreet79886132013-11-23 17:19:00 -08001731 bio_for_each_segment(bvec, bio, iter) {
1732 void *mapped = kmap(bvec.bv_page) + bvec.bv_offset;
1733 expect = min_t(int, data_size, bvec.bv_len);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001734 err = drbd_recv_all_warn(peer_device->connection, mapped, expect);
Kent Overstreet79886132013-11-23 17:19:00 -08001735 kunmap(bvec.bv_page);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001736 if (err)
1737 return err;
1738 data_size -= expect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001739 }
1740
Andreas Gruenbacher11f8b2b2014-09-11 14:29:05 +02001741 if (digest_size) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001742 drbd_csum_bio(peer_device->connection->peer_integrity_tfm, bio, dig_vv);
Andreas Gruenbacher11f8b2b2014-09-11 14:29:05 +02001743 if (memcmp(dig_in, dig_vv, digest_size)) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001744 drbd_err(peer_device, "Digest integrity check FAILED. Broken NICs?\n");
Andreas Gruenbacher28284ce2011-03-16 17:54:02 +01001745 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001746 }
1747 }
1748
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001749 D_ASSERT(peer_device->device, data_size == 0);
Andreas Gruenbacher28284ce2011-03-16 17:54:02 +01001750 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001751}
1752
Andreas Gruenbachera990be42011-04-06 17:56:48 +02001753/*
1754 * e_end_resync_block() is called in asender context via
1755 * drbd_finish_peer_reqs().
1756 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001757static int e_end_resync_block(struct drbd_work *w, int unused)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001758{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001759 struct drbd_peer_request *peer_req =
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001760 container_of(w, struct drbd_peer_request, w);
1761 struct drbd_peer_device *peer_device = peer_req->peer_device;
1762 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001763 sector_t sector = peer_req->i.sector;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001764 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001765
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001766 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001767
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001768 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001769 drbd_set_in_sync(device, sector, peer_req->i.size);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001770 err = drbd_send_ack(peer_device, P_RS_WRITE_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001771 } else {
1772 /* Record failure to sync */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001773 drbd_rs_failed_io(device, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001774
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001775 err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001776 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001777 dec_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001778
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001779 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001780}
1781
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001782static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t sector,
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001783 struct packet_info *pi) __releases(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001784{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001785 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001786 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001787
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001788 peer_req = read_in_block(peer_device, ID_SYNCER, sector, pi);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001789 if (!peer_req)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001790 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001791
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001792 dec_rs_pending(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001793
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001794 inc_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001795 /* corresponding dec_unacked() in e_end_resync_block()
1796 * respective _drbd_clear_done_ee */
1797
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001798 peer_req->w.cb = e_end_resync_block;
Lars Ellenberg21ae5d72014-05-05 23:42:24 +02001799 peer_req->submit_jif = jiffies;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001800
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001801 spin_lock_irq(&device->resource->req_lock);
Lars Ellenbergb9ed7082014-04-23 12:15:35 +02001802 list_add_tail(&peer_req->w.list, &device->sync_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001803 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001804
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001805 atomic_add(pi->size >> 9, &device->rs_sect_ev);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001806 if (drbd_submit_peer_request(device, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0)
Andreas Gruenbachere1c1b0f2011-03-16 17:58:27 +01001807 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001808
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001809 /* don't care for the reason here */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001810 drbd_err(device, "submit failed, triggering re-connect\n");
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001811 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001812 list_del(&peer_req->w.list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001813 spin_unlock_irq(&device->resource->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02001814
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001815 drbd_free_peer_req(device, peer_req);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001816fail:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001817 put_ldev(device);
Andreas Gruenbachere1c1b0f2011-03-16 17:58:27 +01001818 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001819}
1820
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001821static struct drbd_request *
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001822find_request(struct drbd_device *device, struct rb_root *root, u64 id,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001823 sector_t sector, bool missing_ok, const char *func)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001824{
1825 struct drbd_request *req;
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001826
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001827 /* Request object according to our peer */
1828 req = (struct drbd_request *)(unsigned long)id;
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001829 if (drbd_contains_interval(root, sector, &req->i) && req->i.local)
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001830 return req;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001831 if (!missing_ok) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001832 drbd_err(device, "%s: failed to find request 0x%lx, sector %llus\n", func,
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001833 (unsigned long)id, (unsigned long long)sector);
1834 }
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001835 return NULL;
1836}
1837
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001838static int receive_DataReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001839{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001840 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001841 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001842 struct drbd_request *req;
1843 sector_t sector;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001844 int err;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001845 struct p_data *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001846
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001847 peer_device = conn_peer_device(connection, pi->vnr);
1848 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001849 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001850 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001851
1852 sector = be64_to_cpu(p->sector);
1853
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001854 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001855 req = find_request(device, &device->read_requests, p->block_id, sector, false, __func__);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001856 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001857 if (unlikely(!req))
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001858 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001859
Bart Van Assche24c48302011-05-21 18:32:29 +02001860 /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid
Philipp Reisnerb411b362009-09-25 16:07:19 -07001861 * special casing it there for the various failure cases.
1862 * still no race with drbd_fail_pending_reads */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001863 err = recv_dless_read(peer_device, req, sector, pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001864 if (!err)
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001865 req_mod(req, DATA_RECEIVED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001866 /* else: nothing. handled from drbd_disconnect...
1867 * I don't think we may complete this just yet
1868 * in case we are "on-disconnect: freeze" */
1869
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001870 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001871}
1872
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001873static int receive_RSDataReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001874{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001875 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001876 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001877 sector_t sector;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001878 int err;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001879 struct p_data *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001880
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001881 peer_device = conn_peer_device(connection, pi->vnr);
1882 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001883 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001884 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001885
1886 sector = be64_to_cpu(p->sector);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001887 D_ASSERT(device, p->block_id == ID_SYNCER);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001888
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001889 if (get_ldev(device)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001890 /* data is submitted to disk within recv_resync_read.
1891 * corresponding put_ldev done below on error,
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001892 * or in drbd_peer_request_endio. */
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001893 err = recv_resync_read(peer_device, sector, pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001894 } else {
1895 if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001896 drbd_err(device, "Can not write resync data to local disk.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001897
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001898 err = drbd_drain_block(peer_device, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001899
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001900 drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001901 }
1902
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001903 atomic_add(pi->size >> 9, &device->rs_sect_in);
Philipp Reisner778f2712010-07-06 11:14:00 +02001904
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001905 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001906}
1907
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001908static void restart_conflicting_writes(struct drbd_device *device,
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001909 sector_t sector, int size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001910{
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001911 struct drbd_interval *i;
1912 struct drbd_request *req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001913
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001914 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001915 if (!i->local)
1916 continue;
1917 req = container_of(i, struct drbd_request, i);
1918 if (req->rq_state & RQ_LOCAL_PENDING ||
1919 !(req->rq_state & RQ_POSTPONED))
1920 continue;
Lars Ellenberg2312f0b32011-11-24 10:36:25 +01001921 /* as it is RQ_POSTPONED, this will cause it to
1922 * be queued on the retry workqueue. */
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02001923 __req_mod(req, CONFLICT_RESOLVED, NULL);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001924 }
1925}
1926
Andreas Gruenbachera990be42011-04-06 17:56:48 +02001927/*
1928 * e_end_block() is called in asender context via drbd_finish_peer_reqs().
Philipp Reisnerb411b362009-09-25 16:07:19 -07001929 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001930static int e_end_block(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001931{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001932 struct drbd_peer_request *peer_req =
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001933 container_of(w, struct drbd_peer_request, w);
1934 struct drbd_peer_device *peer_device = peer_req->peer_device;
1935 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001936 sector_t sector = peer_req->i.sector;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001937 int err = 0, pcmd;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001938
Philipp Reisner303d1442011-04-13 16:24:47 -07001939 if (peer_req->flags & EE_SEND_WRITE_ACK) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001940 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001941 pcmd = (device->state.conn >= C_SYNC_SOURCE &&
1942 device->state.conn <= C_PAUSED_SYNC_T &&
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001943 peer_req->flags & EE_MAY_SET_IN_SYNC) ?
Philipp Reisnerb411b362009-09-25 16:07:19 -07001944 P_RS_WRITE_ACK : P_WRITE_ACK;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001945 err = drbd_send_ack(peer_device, pcmd, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001946 if (pcmd == P_RS_WRITE_ACK)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001947 drbd_set_in_sync(device, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001948 } else {
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001949 err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001950 /* we expect it to be marked out of sync anyways...
1951 * maybe assert this? */
1952 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001953 dec_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001954 }
Lars Ellenberg08d0dab2014-03-20 11:19:22 +01001955
Philipp Reisnerb411b362009-09-25 16:07:19 -07001956 /* we delete from the conflict detection hash _after_ we sent out the
1957 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */
Philipp Reisner302bdea2011-04-21 11:36:49 +02001958 if (peer_req->flags & EE_IN_INTERVAL_TREE) {
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001959 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001960 D_ASSERT(device, !drbd_interval_empty(&peer_req->i));
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001961 drbd_remove_epoch_entry_interval(device, peer_req);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001962 if (peer_req->flags & EE_RESTART_REQUESTS)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001963 restart_conflicting_writes(device, sector, peer_req->i.size);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001964 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +01001965 } else
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001966 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001967
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001968 drbd_may_finish_epoch(first_peer_device(device)->connection, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001969
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001970 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001971}
1972
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001973static int e_send_ack(struct drbd_work *w, enum drbd_packet ack)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001974{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001975 struct drbd_peer_request *peer_req =
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001976 container_of(w, struct drbd_peer_request, w);
1977 struct drbd_peer_device *peer_device = peer_req->peer_device;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001978 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001979
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001980 err = drbd_send_ack(peer_device, ack, peer_req);
1981 dec_unacked(peer_device->device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001982
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001983 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001984}
1985
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02001986static int e_send_superseded(struct drbd_work *w, int unused)
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001987{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001988 return e_send_ack(w, P_SUPERSEDED);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001989}
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001990
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001991static int e_send_retry_write(struct drbd_work *w, int unused)
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001992{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001993 struct drbd_peer_request *peer_req =
1994 container_of(w, struct drbd_peer_request, w);
1995 struct drbd_connection *connection = peer_req->peer_device->connection;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001996
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001997 return e_send_ack(w, connection->agreed_pro_version >= 100 ?
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02001998 P_RETRY_WRITE : P_SUPERSEDED);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001999}
2000
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01002001static bool seq_greater(u32 a, u32 b)
2002{
2003 /*
2004 * We assume 32-bit wrap-around here.
2005 * For 24-bit wrap-around, we would have to shift:
2006 * a <<= 8; b <<= 8;
2007 */
2008 return (s32)a - (s32)b > 0;
2009}
2010
2011static u32 seq_max(u32 a, u32 b)
2012{
2013 return seq_greater(a, b) ? a : b;
2014}
2015
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002016static void update_peer_seq(struct drbd_peer_device *peer_device, unsigned int peer_seq)
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01002017{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002018 struct drbd_device *device = peer_device->device;
Lars Ellenberg3c13b682011-02-23 16:10:01 +01002019 unsigned int newest_peer_seq;
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01002020
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002021 if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002022 spin_lock(&device->peer_seq_lock);
2023 newest_peer_seq = seq_max(device->peer_seq, peer_seq);
2024 device->peer_seq = newest_peer_seq;
2025 spin_unlock(&device->peer_seq_lock);
2026 /* wake up only if we actually changed device->peer_seq */
Lars Ellenberg3c13b682011-02-23 16:10:01 +01002027 if (peer_seq == newest_peer_seq)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002028 wake_up(&device->seq_wait);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002029 }
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01002030}
2031
Lars Ellenbergd93f6302012-03-26 15:49:13 +02002032static inline int overlaps(sector_t s1, int l1, sector_t s2, int l2)
2033{
2034 return !((s1 + (l1>>9) <= s2) || (s1 >= s2 + (l2>>9)));
2035}
2036
2037/* maybe change sync_ee into interval trees as well? */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002038static bool overlapping_resync_write(struct drbd_device *device, struct drbd_peer_request *peer_req)
Lars Ellenbergd93f6302012-03-26 15:49:13 +02002039{
2040 struct drbd_peer_request *rs_req;
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01002041 bool rv = 0;
2042
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002043 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002044 list_for_each_entry(rs_req, &device->sync_ee, w.list) {
Lars Ellenbergd93f6302012-03-26 15:49:13 +02002045 if (overlaps(peer_req->i.sector, peer_req->i.size,
2046 rs_req->i.sector, rs_req->i.size)) {
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01002047 rv = 1;
2048 break;
2049 }
2050 }
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002051 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01002052
2053 return rv;
2054}
2055
Philipp Reisnerb411b362009-09-25 16:07:19 -07002056/* Called from receive_Data.
2057 * Synchronize packets on sock with packets on msock.
2058 *
2059 * This is here so even when a P_DATA packet traveling via sock overtook an Ack
2060 * packet traveling on msock, they are still processed in the order they have
2061 * been sent.
2062 *
2063 * Note: we don't care for Ack packets overtaking P_DATA packets.
2064 *
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002065 * In case packet_seq is larger than device->peer_seq number, there are
Philipp Reisnerb411b362009-09-25 16:07:19 -07002066 * outstanding packets on the msock. We wait for them to arrive.
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002067 * In case we are the logically next packet, we update device->peer_seq
Philipp Reisnerb411b362009-09-25 16:07:19 -07002068 * ourselves. Correctly handles 32bit wrap around.
2069 *
2070 * Assume we have a 10 GBit connection, that is about 1<<30 byte per second,
2071 * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds
2072 * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have
2073 * 1<<9 == 512 seconds aka ages for the 32bit wrap around...
2074 *
2075 * returns 0 if we may process the packet,
2076 * -ERESTARTSYS if we were interrupted (by disconnect signal). */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002077static int wait_for_and_update_peer_seq(struct drbd_peer_device *peer_device, const u32 peer_seq)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002078{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002079 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002080 DEFINE_WAIT(wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002081 long timeout;
Philipp Reisnerb874d232013-10-23 10:59:16 +02002082 int ret = 0, tp;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002083
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002084 if (!test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags))
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002085 return 0;
2086
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002087 spin_lock(&device->peer_seq_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002088 for (;;) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002089 if (!seq_greater(peer_seq - 1, device->peer_seq)) {
2090 device->peer_seq = seq_max(device->peer_seq, peer_seq);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002091 break;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002092 }
Philipp Reisnerb874d232013-10-23 10:59:16 +02002093
Philipp Reisnerb411b362009-09-25 16:07:19 -07002094 if (signal_pending(current)) {
2095 ret = -ERESTARTSYS;
2096 break;
2097 }
Philipp Reisnerb874d232013-10-23 10:59:16 +02002098
2099 rcu_read_lock();
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002100 tp = rcu_dereference(first_peer_device(device)->connection->net_conf)->two_primaries;
Philipp Reisnerb874d232013-10-23 10:59:16 +02002101 rcu_read_unlock();
2102
2103 if (!tp)
2104 break;
2105
2106 /* Only need to wait if two_primaries is enabled */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002107 prepare_to_wait(&device->seq_wait, &wait, TASK_INTERRUPTIBLE);
2108 spin_unlock(&device->peer_seq_lock);
Philipp Reisner44ed1672011-04-19 17:10:19 +02002109 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002110 timeout = rcu_dereference(peer_device->connection->net_conf)->ping_timeo*HZ/10;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002111 rcu_read_unlock();
Andreas Gruenbacher71b1c1e2011-03-01 15:40:43 +01002112 timeout = schedule_timeout(timeout);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002113 spin_lock(&device->peer_seq_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002114 if (!timeout) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002115 ret = -ETIMEDOUT;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002116 drbd_err(device, "Timed out waiting for missing ack packets; disconnecting\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002117 break;
2118 }
2119 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002120 spin_unlock(&device->peer_seq_lock);
2121 finish_wait(&device->seq_wait, &wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002122 return ret;
2123}
2124
Lars Ellenberg688593c2010-11-17 22:25:03 +01002125/* see also bio_flags_to_wire()
2126 * DRBD_REQ_*, because we need to semantically map the flags to data packet
2127 * flags and back. We may replicate to other kernel versions. */
Andreas Gruenbacher81f0ffd2011-08-30 16:22:33 +02002128static unsigned long wire_flags_to_bio(u32 dpf)
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02002129{
Lars Ellenberg688593c2010-11-17 22:25:03 +01002130 return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
2131 (dpf & DP_FUA ? REQ_FUA : 0) |
2132 (dpf & DP_FLUSH ? REQ_FLUSH : 0) |
2133 (dpf & DP_DISCARD ? REQ_DISCARD : 0);
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02002134}
2135
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002136static void fail_postponed_requests(struct drbd_device *device, sector_t sector,
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002137 unsigned int size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002138{
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002139 struct drbd_interval *i;
2140
2141 repeat:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002142 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002143 struct drbd_request *req;
2144 struct bio_and_error m;
2145
2146 if (!i->local)
2147 continue;
2148 req = container_of(i, struct drbd_request, i);
2149 if (!(req->rq_state & RQ_POSTPONED))
2150 continue;
2151 req->rq_state &= ~RQ_POSTPONED;
2152 __req_mod(req, NEG_ACKED, &m);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002153 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002154 if (m.bio)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002155 complete_master_bio(device, &m);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002156 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002157 goto repeat;
2158 }
2159}
2160
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002161static int handle_write_conflicts(struct drbd_device *device,
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002162 struct drbd_peer_request *peer_req)
2163{
Andreas Gruenbachere33b32d2011-08-30 15:38:04 +02002164 struct drbd_connection *connection = peer_req->peer_device->connection;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002165 bool resolve_conflicts = test_bit(RESOLVE_CONFLICTS, &connection->flags);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002166 sector_t sector = peer_req->i.sector;
2167 const unsigned int size = peer_req->i.size;
2168 struct drbd_interval *i;
2169 bool equal;
2170 int err;
2171
2172 /*
2173 * Inserting the peer request into the write_requests tree will prevent
2174 * new conflicting local requests from being added.
2175 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002176 drbd_insert_interval(&device->write_requests, &peer_req->i);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002177
2178 repeat:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002179 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002180 if (i == &peer_req->i)
2181 continue;
Lars Ellenberg08d0dab2014-03-20 11:19:22 +01002182 if (i->completed)
2183 continue;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002184
2185 if (!i->local) {
2186 /*
2187 * Our peer has sent a conflicting remote request; this
2188 * should not happen in a two-node setup. Wait for the
2189 * earlier peer request to complete.
2190 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002191 err = drbd_wait_misc(device, i);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002192 if (err)
2193 goto out;
2194 goto repeat;
2195 }
2196
2197 equal = i->sector == sector && i->size == size;
2198 if (resolve_conflicts) {
2199 /*
2200 * If the peer request is fully contained within the
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002201 * overlapping request, it can be considered overwritten
2202 * and thus superseded; otherwise, it will be retried
2203 * once all overlapping requests have completed.
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002204 */
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002205 bool superseded = i->sector <= sector && i->sector +
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002206 (i->size >> 9) >= sector + (size >> 9);
2207
2208 if (!equal)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002209 drbd_alert(device, "Concurrent writes detected: "
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002210 "local=%llus +%u, remote=%llus +%u, "
2211 "assuming %s came first\n",
2212 (unsigned long long)i->sector, i->size,
2213 (unsigned long long)sector, size,
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002214 superseded ? "local" : "remote");
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002215
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002216 peer_req->w.cb = superseded ? e_send_superseded :
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002217 e_send_retry_write;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002218 list_add_tail(&peer_req->w.list, &device->done_ee);
Andreas Gruenbachere33b32d2011-08-30 15:38:04 +02002219 wake_asender(connection);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002220
2221 err = -ENOENT;
2222 goto out;
2223 } else {
2224 struct drbd_request *req =
2225 container_of(i, struct drbd_request, i);
2226
2227 if (!equal)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002228 drbd_alert(device, "Concurrent writes detected: "
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002229 "local=%llus +%u, remote=%llus +%u\n",
2230 (unsigned long long)i->sector, i->size,
2231 (unsigned long long)sector, size);
2232
2233 if (req->rq_state & RQ_LOCAL_PENDING ||
2234 !(req->rq_state & RQ_POSTPONED)) {
2235 /*
2236 * Wait for the node with the discard flag to
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002237 * decide if this request has been superseded
2238 * or needs to be retried.
2239 * Requests that have been superseded will
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002240 * disappear from the write_requests tree.
2241 *
2242 * In addition, wait for the conflicting
2243 * request to finish locally before submitting
2244 * the conflicting peer request.
2245 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002246 err = drbd_wait_misc(device, &req->i);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002247 if (err) {
Andreas Gruenbachere33b32d2011-08-30 15:38:04 +02002248 _conn_request_state(connection, NS(conn, C_TIMEOUT), CS_HARD);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002249 fail_postponed_requests(device, sector, size);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002250 goto out;
2251 }
2252 goto repeat;
2253 }
2254 /*
2255 * Remember to restart the conflicting requests after
2256 * the new peer request has completed.
2257 */
2258 peer_req->flags |= EE_RESTART_REQUESTS;
2259 }
2260 }
2261 err = 0;
2262
2263 out:
2264 if (err)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002265 drbd_remove_epoch_entry_interval(device, peer_req);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002266 return err;
2267}
2268
Philipp Reisnerb411b362009-09-25 16:07:19 -07002269/* mirrored write */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002270static int receive_Data(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002271{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002272 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002273 struct drbd_device *device;
Lars Ellenberg21ae5d72014-05-05 23:42:24 +02002274 struct net_conf *nc;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002275 sector_t sector;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002276 struct drbd_peer_request *peer_req;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02002277 struct p_data *p = pi->data;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002278 u32 peer_seq = be32_to_cpu(p->seq_num);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002279 int rw = WRITE;
2280 u32 dp_flags;
Philipp Reisner302bdea2011-04-21 11:36:49 +02002281 int err, tp;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002282
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002283 peer_device = conn_peer_device(connection, pi->vnr);
2284 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002285 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002286 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002287
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002288 if (!get_ldev(device)) {
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002289 int err2;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002290
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002291 err = wait_for_and_update_peer_seq(peer_device, peer_seq);
2292 drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002293 atomic_inc(&connection->current_epoch->epoch_size);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002294 err2 = drbd_drain_block(peer_device, pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002295 if (!err)
2296 err = err2;
2297 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002298 }
2299
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01002300 /*
2301 * Corresponding put_ldev done either below (on various errors), or in
2302 * drbd_peer_request_endio, if we successfully submit the data at the
2303 * end of this function.
2304 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002305
2306 sector = be64_to_cpu(p->sector);
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02002307 peer_req = read_in_block(peer_device, p->block_id, sector, pi);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002308 if (!peer_req) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002309 put_ldev(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002310 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002311 }
2312
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002313 peer_req->w.cb = e_end_block;
Lars Ellenberg21ae5d72014-05-05 23:42:24 +02002314 peer_req->submit_jif = jiffies;
2315 peer_req->flags |= EE_APPLICATION;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002316
Lars Ellenberg688593c2010-11-17 22:25:03 +01002317 dp_flags = be32_to_cpu(p->dp_flags);
Andreas Gruenbacher81f0ffd2011-08-30 16:22:33 +02002318 rw |= wire_flags_to_bio(dp_flags);
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02002319 if (pi->cmd == P_TRIM) {
2320 struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev);
2321 peer_req->flags |= EE_IS_TRIM;
2322 if (!blk_queue_discard(q))
2323 peer_req->flags |= EE_IS_TRIM_USE_ZEROOUT;
2324 D_ASSERT(peer_device, peer_req->i.size > 0);
2325 D_ASSERT(peer_device, rw & REQ_DISCARD);
2326 D_ASSERT(peer_device, peer_req->pages == NULL);
2327 } else if (peer_req->pages == NULL) {
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02002328 D_ASSERT(device, peer_req->i.size == 0);
2329 D_ASSERT(device, dp_flags & DP_FLUSH);
Lars Ellenberga73ff322012-06-25 19:15:38 +02002330 }
Lars Ellenberg688593c2010-11-17 22:25:03 +01002331
2332 if (dp_flags & DP_MAY_SET_IN_SYNC)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002333 peer_req->flags |= EE_MAY_SET_IN_SYNC;
Lars Ellenberg688593c2010-11-17 22:25:03 +01002334
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002335 spin_lock(&connection->epoch_lock);
2336 peer_req->epoch = connection->current_epoch;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002337 atomic_inc(&peer_req->epoch->epoch_size);
2338 atomic_inc(&peer_req->epoch->active);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002339 spin_unlock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002340
Philipp Reisner302bdea2011-04-21 11:36:49 +02002341 rcu_read_lock();
Lars Ellenberg21ae5d72014-05-05 23:42:24 +02002342 nc = rcu_dereference(peer_device->connection->net_conf);
2343 tp = nc->two_primaries;
2344 if (peer_device->connection->agreed_pro_version < 100) {
2345 switch (nc->wire_protocol) {
2346 case DRBD_PROT_C:
2347 dp_flags |= DP_SEND_WRITE_ACK;
2348 break;
2349 case DRBD_PROT_B:
2350 dp_flags |= DP_SEND_RECEIVE_ACK;
2351 break;
2352 }
2353 }
Philipp Reisner302bdea2011-04-21 11:36:49 +02002354 rcu_read_unlock();
Lars Ellenberg21ae5d72014-05-05 23:42:24 +02002355
2356 if (dp_flags & DP_SEND_WRITE_ACK) {
2357 peer_req->flags |= EE_SEND_WRITE_ACK;
2358 inc_unacked(device);
2359 /* corresponding dec_unacked() in e_end_block()
2360 * respective _drbd_clear_done_ee */
2361 }
2362
2363 if (dp_flags & DP_SEND_RECEIVE_ACK) {
2364 /* I really don't like it that the receiver thread
2365 * sends on the msock, but anyways */
2366 drbd_send_ack(first_peer_device(device), P_RECV_ACK, peer_req);
2367 }
2368
Philipp Reisner302bdea2011-04-21 11:36:49 +02002369 if (tp) {
Lars Ellenberg21ae5d72014-05-05 23:42:24 +02002370 /* two primaries implies protocol C */
2371 D_ASSERT(device, dp_flags & DP_SEND_WRITE_ACK);
Philipp Reisner302bdea2011-04-21 11:36:49 +02002372 peer_req->flags |= EE_IN_INTERVAL_TREE;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002373 err = wait_for_and_update_peer_seq(peer_device, peer_seq);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002374 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002375 goto out_interrupted;
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002376 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002377 err = handle_write_conflicts(device, peer_req);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002378 if (err) {
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002379 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002380 if (err == -ENOENT) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002381 put_ldev(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002382 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002383 }
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002384 goto out_interrupted;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002385 }
Philipp Reisnerb874d232013-10-23 10:59:16 +02002386 } else {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002387 update_peer_seq(peer_device, peer_seq);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002388 spin_lock_irq(&device->resource->req_lock);
Philipp Reisnerb874d232013-10-23 10:59:16 +02002389 }
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02002390 /* if we use the zeroout fallback code, we process synchronously
2391 * and we wait for all pending requests, respectively wait for
2392 * active_ee to become empty in drbd_submit_peer_request();
2393 * better not add ourselves here. */
2394 if ((peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) == 0)
Lars Ellenbergb9ed7082014-04-23 12:15:35 +02002395 list_add_tail(&peer_req->w.list, &device->active_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002396 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002397
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002398 if (device->state.conn == C_SYNC_TARGET)
2399 wait_event(device->ee_wait, !overlapping_resync_write(device, peer_req));
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01002400
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002401 if (device->state.pdsk < D_INCONSISTENT) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002402 /* In case we have the only disk of the cluster, */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002403 drbd_set_out_of_sync(device, peer_req->i.sector, peer_req->i.size);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002404 peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
Lars Ellenberg4dd726f2014-02-11 11:15:36 +01002405 drbd_al_begin_io(device, &peer_req->i);
Lars Ellenberg21ae5d72014-05-05 23:42:24 +02002406 peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002407 }
2408
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002409 err = drbd_submit_peer_request(device, peer_req, rw, DRBD_FAULT_DT_WR);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002410 if (!err)
2411 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002412
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002413 /* don't care for the reason here */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002414 drbd_err(device, "submit failed, triggering re-connect\n");
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002415 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002416 list_del(&peer_req->w.list);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002417 drbd_remove_epoch_entry_interval(device, peer_req);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002418 spin_unlock_irq(&device->resource->req_lock);
Lars Ellenberg21ae5d72014-05-05 23:42:24 +02002419 if (peer_req->flags & EE_CALL_AL_COMPLETE_IO) {
2420 peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002421 drbd_al_complete_io(device, &peer_req->i);
Lars Ellenberg21ae5d72014-05-05 23:42:24 +02002422 }
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002423
Philipp Reisnerb411b362009-09-25 16:07:19 -07002424out_interrupted:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002425 drbd_may_finish_epoch(connection, peer_req->epoch, EV_PUT + EV_CLEANUP);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002426 put_ldev(device);
2427 drbd_free_peer_req(device, peer_req);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002428 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002429}
2430
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002431/* We may throttle resync, if the lower device seems to be busy,
2432 * and current sync rate is above c_min_rate.
2433 *
2434 * To decide whether or not the lower device is busy, we use a scheme similar
2435 * to MD RAID is_mddev_idle(): if the partition stats reveal "significant"
2436 * (more than 64 sectors) of activity we cannot account for with our own resync
2437 * activity, it obviously is "busy".
2438 *
2439 * The current sync rate used here uses only the most recent two step marks,
2440 * to have a short time average so we can react faster.
2441 */
Lars Ellenbergad3fee72013-12-20 11:22:13 +01002442bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector,
2443 bool throttle_if_app_is_waiting)
Lars Ellenberge8299872014-04-28 18:43:19 +02002444{
2445 struct lc_element *tmp;
Lars Ellenbergad3fee72013-12-20 11:22:13 +01002446 bool throttle = drbd_rs_c_min_rate_throttle(device);
Lars Ellenberge8299872014-04-28 18:43:19 +02002447
Lars Ellenbergad3fee72013-12-20 11:22:13 +01002448 if (!throttle || throttle_if_app_is_waiting)
2449 return throttle;
Lars Ellenberge8299872014-04-28 18:43:19 +02002450
2451 spin_lock_irq(&device->al_lock);
2452 tmp = lc_find(device->resync, BM_SECT_TO_EXT(sector));
2453 if (tmp) {
2454 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
2455 if (test_bit(BME_PRIORITY, &bm_ext->flags))
2456 throttle = false;
Lars Ellenbergad3fee72013-12-20 11:22:13 +01002457 /* Do not slow down if app IO is already waiting for this extent,
2458 * and our progress is necessary for application IO to complete. */
Lars Ellenberge8299872014-04-28 18:43:19 +02002459 }
2460 spin_unlock_irq(&device->al_lock);
2461
2462 return throttle;
2463}
2464
2465bool drbd_rs_c_min_rate_throttle(struct drbd_device *device)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002466{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002467 struct gendisk *disk = device->ldev->backing_bdev->bd_contains->bd_disk;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002468 unsigned long db, dt, dbdt;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002469 unsigned int c_min_rate;
Lars Ellenberge8299872014-04-28 18:43:19 +02002470 int curr_events;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002471
2472 rcu_read_lock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002473 c_min_rate = rcu_dereference(device->ldev->disk_conf)->c_min_rate;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002474 rcu_read_unlock();
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002475
2476 /* feature disabled? */
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002477 if (c_min_rate == 0)
Lars Ellenberge8299872014-04-28 18:43:19 +02002478 return false;
Philipp Reisnere3555d82010-11-07 15:56:29 +01002479
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002480 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
2481 (int)part_stat_read(&disk->part0, sectors[1]) -
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002482 atomic_read(&device->rs_sect_ev);
Lars Ellenbergad3fee72013-12-20 11:22:13 +01002483
2484 if (atomic_read(&device->ap_actlog_cnt)
Lars Ellenbergff8bd882014-11-10 17:21:12 +01002485 || curr_events - device->rs_last_events > 64) {
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002486 unsigned long rs_left;
2487 int i;
2488
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002489 device->rs_last_events = curr_events;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002490
2491 /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
2492 * approx. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002493 i = (device->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
Lars Ellenberg2649f082010-11-05 10:05:47 +01002494
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002495 if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T)
2496 rs_left = device->ov_left;
Lars Ellenberg2649f082010-11-05 10:05:47 +01002497 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002498 rs_left = drbd_bm_total_weight(device) - device->rs_failed;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002499
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002500 dt = ((long)jiffies - (long)device->rs_mark_time[i]) / HZ;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002501 if (!dt)
2502 dt++;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002503 db = device->rs_mark_left[i] - rs_left;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002504 dbdt = Bit2KB(db/dt);
2505
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002506 if (dbdt > c_min_rate)
Lars Ellenberge8299872014-04-28 18:43:19 +02002507 return true;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002508 }
Lars Ellenberge8299872014-04-28 18:43:19 +02002509 return false;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002510}
2511
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002512static int receive_DataRequest(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002513{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002514 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002515 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002516 sector_t sector;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002517 sector_t capacity;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002518 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002519 struct digest_info *di = NULL;
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002520 int size, verb;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002521 unsigned int fault_type;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02002522 struct p_block_req *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002523
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002524 peer_device = conn_peer_device(connection, pi->vnr);
2525 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002526 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002527 device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002528 capacity = drbd_get_capacity(device->this_bdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002529
2530 sector = be64_to_cpu(p->sector);
2531 size = be32_to_cpu(p->blksize);
2532
Andreas Gruenbacherc670a392011-02-21 12:41:39 +01002533 if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002534 drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
Philipp Reisnerb411b362009-09-25 16:07:19 -07002535 (unsigned long long)sector, size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002536 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002537 }
2538 if (sector + (size>>9) > capacity) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002539 drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
Philipp Reisnerb411b362009-09-25 16:07:19 -07002540 (unsigned long long)sector, size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002541 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002542 }
2543
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002544 if (!get_ldev_if_state(device, D_UP_TO_DATE)) {
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002545 verb = 1;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002546 switch (pi->cmd) {
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002547 case P_DATA_REQUEST:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002548 drbd_send_ack_rp(peer_device, P_NEG_DREPLY, p);
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002549 break;
2550 case P_RS_DATA_REQUEST:
2551 case P_CSUM_RS_REQUEST:
2552 case P_OV_REQUEST:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002553 drbd_send_ack_rp(peer_device, P_NEG_RS_DREPLY , p);
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002554 break;
2555 case P_OV_REPLY:
2556 verb = 0;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002557 dec_rs_pending(device);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002558 drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size, ID_IN_SYNC);
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002559 break;
2560 default:
Andreas Gruenbacher49ba9b12011-03-25 00:35:45 +01002561 BUG();
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002562 }
2563 if (verb && __ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002564 drbd_err(device, "Can not satisfy peer's read request, "
Philipp Reisnerb411b362009-09-25 16:07:19 -07002565 "no local data.\n");
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002566
Lars Ellenberga821cc42010-09-06 12:31:37 +02002567 /* drain possibly payload */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002568 return drbd_drain_block(peer_device, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002569 }
2570
2571 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
2572 * "criss-cross" setup, that might cause write-out on some other DRBD,
2573 * which in turn might block on the other node at this very place. */
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02002574 peer_req = drbd_alloc_peer_req(peer_device, p->block_id, sector, size,
2575 true /* has real payload */, GFP_NOIO);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002576 if (!peer_req) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002577 put_ldev(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002578 return -ENOMEM;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002579 }
2580
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002581 switch (pi->cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002582 case P_DATA_REQUEST:
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002583 peer_req->w.cb = w_e_end_data_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002584 fault_type = DRBD_FAULT_DT_RD;
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002585 /* application IO, don't drbd_rs_begin_io */
Lars Ellenberg21ae5d72014-05-05 23:42:24 +02002586 peer_req->flags |= EE_APPLICATION;
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002587 goto submit;
2588
Philipp Reisnerb411b362009-09-25 16:07:19 -07002589 case P_RS_DATA_REQUEST:
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002590 peer_req->w.cb = w_e_end_rsdata_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002591 fault_type = DRBD_FAULT_RS_RD;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002592 /* used in the sector offset progress display */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002593 device->bm_resync_fo = BM_SECT_TO_BIT(sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002594 break;
2595
2596 case P_OV_REPLY:
2597 case P_CSUM_RS_REQUEST:
2598 fault_type = DRBD_FAULT_RS_RD;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002599 di = kmalloc(sizeof(*di) + pi->size, GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002600 if (!di)
2601 goto out_free_e;
2602
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002603 di->digest_size = pi->size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002604 di->digest = (((char *)di)+sizeof(struct digest_info));
2605
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002606 peer_req->digest = di;
2607 peer_req->flags |= EE_HAS_DIGEST;
Lars Ellenbergc36c3ce2010-08-11 20:42:55 +02002608
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002609 if (drbd_recv_all(peer_device->connection, di->digest, pi->size))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002610 goto out_free_e;
2611
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002612 if (pi->cmd == P_CSUM_RS_REQUEST) {
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002613 D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002614 peer_req->w.cb = w_e_end_csum_rs_req;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002615 /* used in the sector offset progress display */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002616 device->bm_resync_fo = BM_SECT_TO_BIT(sector);
Lars Ellenbergaaaba342014-03-18 12:30:09 +01002617 /* remember to report stats in drbd_resync_finished */
2618 device->use_csums = true;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002619 } else if (pi->cmd == P_OV_REPLY) {
Lars Ellenberg2649f082010-11-05 10:05:47 +01002620 /* track progress, we may need to throttle */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002621 atomic_add(size >> 9, &device->rs_sect_in);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002622 peer_req->w.cb = w_e_end_ov_reply;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002623 dec_rs_pending(device);
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002624 /* drbd_rs_begin_io done when we sent this request,
2625 * but accounting still needs to be done. */
2626 goto submit_for_resync;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002627 }
2628 break;
2629
2630 case P_OV_REQUEST:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002631 if (device->ov_start_sector == ~(sector_t)0 &&
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002632 peer_device->connection->agreed_pro_version >= 90) {
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002633 unsigned long now = jiffies;
2634 int i;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002635 device->ov_start_sector = sector;
2636 device->ov_position = sector;
2637 device->ov_left = drbd_bm_bits(device) - BM_SECT_TO_BIT(sector);
2638 device->rs_total = device->ov_left;
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002639 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002640 device->rs_mark_left[i] = device->ov_left;
2641 device->rs_mark_time[i] = now;
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002642 }
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002643 drbd_info(device, "Online Verify start sector: %llu\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07002644 (unsigned long long)sector);
2645 }
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002646 peer_req->w.cb = w_e_end_ov_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002647 fault_type = DRBD_FAULT_RS_RD;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002648 break;
2649
Philipp Reisnerb411b362009-09-25 16:07:19 -07002650 default:
Andreas Gruenbacher49ba9b12011-03-25 00:35:45 +01002651 BUG();
Philipp Reisnerb411b362009-09-25 16:07:19 -07002652 }
2653
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002654 /* Throttle, drbd_rs_begin_io and submit should become asynchronous
2655 * wrt the receiver, but it is not as straightforward as it may seem.
2656 * Various places in the resync start and stop logic assume resync
2657 * requests are processed in order, requeuing this on the worker thread
2658 * introduces a bunch of new code for synchronization between threads.
2659 *
2660 * Unlimited throttling before drbd_rs_begin_io may stall the resync
2661 * "forever", throttling after drbd_rs_begin_io will lock that extent
2662 * for application writes for the same time. For now, just throttle
2663 * here, where the rest of the code expects the receiver to sleep for
2664 * a while, anyways.
2665 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002666
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002667 /* Throttle before drbd_rs_begin_io, as that locks out application IO;
2668 * this defers syncer requests for some time, before letting at least
2669 * on request through. The resync controller on the receiving side
2670 * will adapt to the incoming rate accordingly.
2671 *
2672 * We cannot throttle here if remote is Primary/SyncTarget:
2673 * we would also throttle its application reads.
2674 * In that case, throttling is done on the SyncTarget only.
2675 */
Lars Ellenbergc5a2c152014-05-08 10:08:05 +02002676
2677 /* Even though this may be a resync request, we do add to "read_ee";
2678 * "sync_ee" is only used for resync WRITEs.
2679 * Add to list early, so debugfs can find this request
2680 * even if we have to sleep below. */
2681 spin_lock_irq(&device->resource->req_lock);
2682 list_add_tail(&peer_req->w.list, &device->read_ee);
2683 spin_unlock_irq(&device->resource->req_lock);
2684
Lars Ellenberg944410e2014-05-06 15:02:05 +02002685 update_receiver_timing_details(connection, drbd_rs_should_slow_down);
Lars Ellenbergad3fee72013-12-20 11:22:13 +01002686 if (device->state.peer != R_PRIMARY
2687 && drbd_rs_should_slow_down(device, sector, false))
Philipp Reisnere3555d82010-11-07 15:56:29 +01002688 schedule_timeout_uninterruptible(HZ/10);
Lars Ellenberg944410e2014-05-06 15:02:05 +02002689 update_receiver_timing_details(connection, drbd_rs_begin_io);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002690 if (drbd_rs_begin_io(device, sector))
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002691 goto out_free_e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002692
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002693submit_for_resync:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002694 atomic_add(size >> 9, &device->rs_sect_ev);
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002695
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002696submit:
Lars Ellenberg944410e2014-05-06 15:02:05 +02002697 update_receiver_timing_details(connection, drbd_submit_peer_request);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002698 inc_unacked(device);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002699 if (drbd_submit_peer_request(device, peer_req, READ, fault_type) == 0)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002700 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002701
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002702 /* don't care for the reason here */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002703 drbd_err(device, "submit failed, triggering re-connect\n");
Lars Ellenbergc5a2c152014-05-08 10:08:05 +02002704
2705out_free_e:
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002706 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002707 list_del(&peer_req->w.list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002708 spin_unlock_irq(&device->resource->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002709 /* no drbd_rs_complete_io(), we are dropping the connection anyways */
2710
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002711 put_ldev(device);
2712 drbd_free_peer_req(device, peer_req);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002713 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002714}
2715
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002716/**
2717 * drbd_asb_recover_0p - Recover after split-brain with no remaining primaries
2718 */
2719static int drbd_asb_recover_0p(struct drbd_peer_device *peer_device) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002720{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002721 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002722 int self, peer, rv = -100;
2723 unsigned long ch_self, ch_peer;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002724 enum drbd_after_sb_p after_sb_0p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002725
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002726 self = device->ldev->md.uuid[UI_BITMAP] & 1;
2727 peer = device->p_uuid[UI_BITMAP] & 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002728
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002729 ch_peer = device->p_uuid[UI_SIZE];
2730 ch_self = device->comm_bm_set;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002731
Philipp Reisner44ed1672011-04-19 17:10:19 +02002732 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002733 after_sb_0p = rcu_dereference(peer_device->connection->net_conf)->after_sb_0p;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002734 rcu_read_unlock();
2735 switch (after_sb_0p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002736 case ASB_CONSENSUS:
2737 case ASB_DISCARD_SECONDARY:
2738 case ASB_CALL_HELPER:
Philipp Reisner44ed1672011-04-19 17:10:19 +02002739 case ASB_VIOLENTLY:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002740 drbd_err(device, "Configuration error.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002741 break;
2742 case ASB_DISCONNECT:
2743 break;
2744 case ASB_DISCARD_YOUNGER_PRI:
2745 if (self == 0 && peer == 1) {
2746 rv = -1;
2747 break;
2748 }
2749 if (self == 1 && peer == 0) {
2750 rv = 1;
2751 break;
2752 }
2753 /* Else fall through to one of the other strategies... */
2754 case ASB_DISCARD_OLDER_PRI:
2755 if (self == 0 && peer == 1) {
2756 rv = 1;
2757 break;
2758 }
2759 if (self == 1 && peer == 0) {
2760 rv = -1;
2761 break;
2762 }
2763 /* Else fall through to one of the other strategies... */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002764 drbd_warn(device, "Discard younger/older primary did not find a decision\n"
Philipp Reisnerb411b362009-09-25 16:07:19 -07002765 "Using discard-least-changes instead\n");
2766 case ASB_DISCARD_ZERO_CHG:
2767 if (ch_peer == 0 && ch_self == 0) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002768 rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002769 ? -1 : 1;
2770 break;
2771 } else {
2772 if (ch_peer == 0) { rv = 1; break; }
2773 if (ch_self == 0) { rv = -1; break; }
2774 }
Philipp Reisner44ed1672011-04-19 17:10:19 +02002775 if (after_sb_0p == ASB_DISCARD_ZERO_CHG)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002776 break;
2777 case ASB_DISCARD_LEAST_CHG:
2778 if (ch_self < ch_peer)
2779 rv = -1;
2780 else if (ch_self > ch_peer)
2781 rv = 1;
2782 else /* ( ch_self == ch_peer ) */
2783 /* Well, then use something else. */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002784 rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002785 ? -1 : 1;
2786 break;
2787 case ASB_DISCARD_LOCAL:
2788 rv = -1;
2789 break;
2790 case ASB_DISCARD_REMOTE:
2791 rv = 1;
2792 }
2793
2794 return rv;
2795}
2796
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002797/**
2798 * drbd_asb_recover_1p - Recover after split-brain with one remaining primary
2799 */
2800static int drbd_asb_recover_1p(struct drbd_peer_device *peer_device) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002801{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002802 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002803 int hg, rv = -100;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002804 enum drbd_after_sb_p after_sb_1p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002805
Philipp Reisner44ed1672011-04-19 17:10:19 +02002806 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002807 after_sb_1p = rcu_dereference(peer_device->connection->net_conf)->after_sb_1p;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002808 rcu_read_unlock();
2809 switch (after_sb_1p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002810 case ASB_DISCARD_YOUNGER_PRI:
2811 case ASB_DISCARD_OLDER_PRI:
2812 case ASB_DISCARD_LEAST_CHG:
2813 case ASB_DISCARD_LOCAL:
2814 case ASB_DISCARD_REMOTE:
Philipp Reisner44ed1672011-04-19 17:10:19 +02002815 case ASB_DISCARD_ZERO_CHG:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002816 drbd_err(device, "Configuration error.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002817 break;
2818 case ASB_DISCONNECT:
2819 break;
2820 case ASB_CONSENSUS:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002821 hg = drbd_asb_recover_0p(peer_device);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002822 if (hg == -1 && device->state.role == R_SECONDARY)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002823 rv = hg;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002824 if (hg == 1 && device->state.role == R_PRIMARY)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002825 rv = hg;
2826 break;
2827 case ASB_VIOLENTLY:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002828 rv = drbd_asb_recover_0p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002829 break;
2830 case ASB_DISCARD_SECONDARY:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002831 return device->state.role == R_PRIMARY ? 1 : -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002832 case ASB_CALL_HELPER:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002833 hg = drbd_asb_recover_0p(peer_device);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002834 if (hg == -1 && device->state.role == R_PRIMARY) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002835 enum drbd_state_rv rv2;
2836
Philipp Reisnerb411b362009-09-25 16:07:19 -07002837 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2838 * we might be here in C_WF_REPORT_PARAMS which is transient.
2839 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002840 rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002841 if (rv2 != SS_SUCCESS) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002842 drbd_khelper(device, "pri-lost-after-sb");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002843 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002844 drbd_warn(device, "Successfully gave up primary role.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002845 rv = hg;
2846 }
2847 } else
2848 rv = hg;
2849 }
2850
2851 return rv;
2852}
2853
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002854/**
2855 * drbd_asb_recover_2p - Recover after split-brain with two remaining primaries
2856 */
2857static int drbd_asb_recover_2p(struct drbd_peer_device *peer_device) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002858{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002859 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002860 int hg, rv = -100;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002861 enum drbd_after_sb_p after_sb_2p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002862
Philipp Reisner44ed1672011-04-19 17:10:19 +02002863 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002864 after_sb_2p = rcu_dereference(peer_device->connection->net_conf)->after_sb_2p;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002865 rcu_read_unlock();
2866 switch (after_sb_2p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002867 case ASB_DISCARD_YOUNGER_PRI:
2868 case ASB_DISCARD_OLDER_PRI:
2869 case ASB_DISCARD_LEAST_CHG:
2870 case ASB_DISCARD_LOCAL:
2871 case ASB_DISCARD_REMOTE:
2872 case ASB_CONSENSUS:
2873 case ASB_DISCARD_SECONDARY:
Philipp Reisner44ed1672011-04-19 17:10:19 +02002874 case ASB_DISCARD_ZERO_CHG:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002875 drbd_err(device, "Configuration error.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002876 break;
2877 case ASB_VIOLENTLY:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002878 rv = drbd_asb_recover_0p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002879 break;
2880 case ASB_DISCONNECT:
2881 break;
2882 case ASB_CALL_HELPER:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002883 hg = drbd_asb_recover_0p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002884 if (hg == -1) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002885 enum drbd_state_rv rv2;
2886
Philipp Reisnerb411b362009-09-25 16:07:19 -07002887 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2888 * we might be here in C_WF_REPORT_PARAMS which is transient.
2889 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002890 rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002891 if (rv2 != SS_SUCCESS) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002892 drbd_khelper(device, "pri-lost-after-sb");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002893 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002894 drbd_warn(device, "Successfully gave up primary role.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002895 rv = hg;
2896 }
2897 } else
2898 rv = hg;
2899 }
2900
2901 return rv;
2902}
2903
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002904static void drbd_uuid_dump(struct drbd_device *device, char *text, u64 *uuid,
Philipp Reisnerb411b362009-09-25 16:07:19 -07002905 u64 bits, u64 flags)
2906{
2907 if (!uuid) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002908 drbd_info(device, "%s uuid info vanished while I was looking!\n", text);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002909 return;
2910 }
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002911 drbd_info(device, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07002912 text,
2913 (unsigned long long)uuid[UI_CURRENT],
2914 (unsigned long long)uuid[UI_BITMAP],
2915 (unsigned long long)uuid[UI_HISTORY_START],
2916 (unsigned long long)uuid[UI_HISTORY_END],
2917 (unsigned long long)bits,
2918 (unsigned long long)flags);
2919}
2920
2921/*
2922 100 after split brain try auto recover
2923 2 C_SYNC_SOURCE set BitMap
2924 1 C_SYNC_SOURCE use BitMap
2925 0 no Sync
2926 -1 C_SYNC_TARGET use BitMap
2927 -2 C_SYNC_TARGET set BitMap
2928 -100 after split brain, disconnect
2929-1000 unrelated data
Philipp Reisner4a23f262011-01-11 17:42:17 +01002930-1091 requires proto 91
2931-1096 requires proto 96
Philipp Reisnerb411b362009-09-25 16:07:19 -07002932 */
Lars Ellenberg44a4d552013-11-22 12:40:58 +01002933static int drbd_uuid_compare(struct drbd_device *const device, int *rule_nr) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002934{
Lars Ellenberg44a4d552013-11-22 12:40:58 +01002935 struct drbd_peer_device *const peer_device = first_peer_device(device);
2936 struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002937 u64 self, peer;
2938 int i, j;
2939
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002940 self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2941 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002942
2943 *rule_nr = 10;
2944 if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED)
2945 return 0;
2946
2947 *rule_nr = 20;
2948 if ((self == UUID_JUST_CREATED || self == (u64)0) &&
2949 peer != UUID_JUST_CREATED)
2950 return -2;
2951
2952 *rule_nr = 30;
2953 if (self != UUID_JUST_CREATED &&
2954 (peer == UUID_JUST_CREATED || peer == (u64)0))
2955 return 2;
2956
2957 if (self == peer) {
2958 int rct, dc; /* roles at crash time */
2959
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002960 if (device->p_uuid[UI_BITMAP] == (u64)0 && device->ldev->md.uuid[UI_BITMAP] != (u64)0) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002961
Lars Ellenberg44a4d552013-11-22 12:40:58 +01002962 if (connection->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002963 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002964
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002965 if ((device->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
2966 (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002967 drbd_info(device, "was SyncSource, missed the resync finished event, corrected myself:\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002968 drbd_uuid_move_history(device);
2969 device->ldev->md.uuid[UI_HISTORY_START] = device->ldev->md.uuid[UI_BITMAP];
2970 device->ldev->md.uuid[UI_BITMAP] = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002971
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002972 drbd_uuid_dump(device, "self", device->ldev->md.uuid,
2973 device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002974 *rule_nr = 34;
2975 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002976 drbd_info(device, "was SyncSource (peer failed to write sync_uuid)\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002977 *rule_nr = 36;
2978 }
2979
2980 return 1;
2981 }
2982
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002983 if (device->ldev->md.uuid[UI_BITMAP] == (u64)0 && device->p_uuid[UI_BITMAP] != (u64)0) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002984
Lars Ellenberg44a4d552013-11-22 12:40:58 +01002985 if (connection->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002986 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002987
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002988 if ((device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_BITMAP] & ~((u64)1)) &&
2989 (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002990 drbd_info(device, "was SyncTarget, peer missed the resync finished event, corrected peer:\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002991
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002992 device->p_uuid[UI_HISTORY_START + 1] = device->p_uuid[UI_HISTORY_START];
2993 device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_BITMAP];
2994 device->p_uuid[UI_BITMAP] = 0UL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002995
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002996 drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002997 *rule_nr = 35;
2998 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002999 drbd_info(device, "was SyncTarget (failed to write sync_uuid)\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003000 *rule_nr = 37;
3001 }
3002
3003 return -1;
3004 }
3005
3006 /* Common power [off|failure] */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003007 rct = (test_bit(CRASHED_PRIMARY, &device->flags) ? 1 : 0) +
3008 (device->p_uuid[UI_FLAGS] & 2);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003009 /* lowest bit is set when we were primary,
3010 * next bit (weight 2) is set when peer was primary */
3011 *rule_nr = 40;
3012
3013 switch (rct) {
3014 case 0: /* !self_pri && !peer_pri */ return 0;
3015 case 1: /* self_pri && !peer_pri */ return 1;
3016 case 2: /* !self_pri && peer_pri */ return -1;
3017 case 3: /* self_pri && peer_pri */
Lars Ellenberg44a4d552013-11-22 12:40:58 +01003018 dc = test_bit(RESOLVE_CONFLICTS, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003019 return dc ? -1 : 1;
3020 }
3021 }
3022
3023 *rule_nr = 50;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003024 peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003025 if (self == peer)
3026 return -1;
3027
3028 *rule_nr = 51;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003029 peer = device->p_uuid[UI_HISTORY_START] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003030 if (self == peer) {
Lars Ellenberg44a4d552013-11-22 12:40:58 +01003031 if (connection->agreed_pro_version < 96 ?
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003032 (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
3033 (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
3034 peer + UUID_NEW_BM_OFFSET == (device->p_uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003035 /* The last P_SYNC_UUID did not get though. Undo the last start of
3036 resync as sync source modifications of the peer's UUIDs. */
3037
Lars Ellenberg44a4d552013-11-22 12:40:58 +01003038 if (connection->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01003039 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003040
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003041 device->p_uuid[UI_BITMAP] = device->p_uuid[UI_HISTORY_START];
3042 device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_HISTORY_START + 1];
Philipp Reisner4a23f262011-01-11 17:42:17 +01003043
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003044 drbd_info(device, "Lost last syncUUID packet, corrected:\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003045 drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
Philipp Reisner4a23f262011-01-11 17:42:17 +01003046
Philipp Reisnerb411b362009-09-25 16:07:19 -07003047 return -1;
3048 }
3049 }
3050
3051 *rule_nr = 60;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003052 self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003053 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003054 peer = device->p_uuid[i] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003055 if (self == peer)
3056 return -2;
3057 }
3058
3059 *rule_nr = 70;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003060 self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
3061 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003062 if (self == peer)
3063 return 1;
3064
3065 *rule_nr = 71;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003066 self = device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003067 if (self == peer) {
Lars Ellenberg44a4d552013-11-22 12:40:58 +01003068 if (connection->agreed_pro_version < 96 ?
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003069 (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
3070 (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
3071 self + UUID_NEW_BM_OFFSET == (device->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003072 /* The last P_SYNC_UUID did not get though. Undo the last start of
3073 resync as sync source modifications of our UUIDs. */
3074
Lars Ellenberg44a4d552013-11-22 12:40:58 +01003075 if (connection->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01003076 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003077
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003078 __drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_HISTORY_START]);
3079 __drbd_uuid_set(device, UI_HISTORY_START, device->ldev->md.uuid[UI_HISTORY_START + 1]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003080
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003081 drbd_info(device, "Last syncUUID did not get through, corrected:\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003082 drbd_uuid_dump(device, "self", device->ldev->md.uuid,
3083 device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003084
3085 return 1;
3086 }
3087 }
3088
3089
3090 *rule_nr = 80;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003091 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003092 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003093 self = device->ldev->md.uuid[i] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003094 if (self == peer)
3095 return 2;
3096 }
3097
3098 *rule_nr = 90;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003099 self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
3100 peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003101 if (self == peer && self != ((u64)0))
3102 return 100;
3103
3104 *rule_nr = 100;
3105 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003106 self = device->ldev->md.uuid[i] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003107 for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003108 peer = device->p_uuid[j] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003109 if (self == peer)
3110 return -100;
3111 }
3112 }
3113
3114 return -1000;
3115}
3116
3117/* drbd_sync_handshake() returns the new conn state on success, or
3118 CONN_MASK (-1) on failure.
3119 */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003120static enum drbd_conns drbd_sync_handshake(struct drbd_peer_device *peer_device,
3121 enum drbd_role peer_role,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003122 enum drbd_disk_state peer_disk) __must_hold(local)
3123{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003124 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003125 enum drbd_conns rv = C_MASK;
3126 enum drbd_disk_state mydisk;
Philipp Reisner44ed1672011-04-19 17:10:19 +02003127 struct net_conf *nc;
Andreas Gruenbacher6dff2902011-06-28 14:18:12 +02003128 int hg, rule_nr, rr_conflict, tentative;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003129
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003130 mydisk = device->state.disk;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003131 if (mydisk == D_NEGOTIATING)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003132 mydisk = device->new_state_tmp.disk;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003133
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003134 drbd_info(device, "drbd_sync_handshake:\n");
Philipp Reisner9f2247b2012-08-16 14:25:58 +02003135
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003136 spin_lock_irq(&device->ldev->md.uuid_lock);
3137 drbd_uuid_dump(device, "self", device->ldev->md.uuid, device->comm_bm_set, 0);
3138 drbd_uuid_dump(device, "peer", device->p_uuid,
3139 device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003140
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003141 hg = drbd_uuid_compare(device, &rule_nr);
3142 spin_unlock_irq(&device->ldev->md.uuid_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003143
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003144 drbd_info(device, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003145
3146 if (hg == -1000) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003147 drbd_alert(device, "Unrelated data, aborting!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003148 return C_MASK;
3149 }
Philipp Reisner4a23f262011-01-11 17:42:17 +01003150 if (hg < -1000) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003151 drbd_alert(device, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003152 return C_MASK;
3153 }
3154
3155 if ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) ||
3156 (peer_disk == D_INCONSISTENT && mydisk > D_INCONSISTENT)) {
3157 int f = (hg == -100) || abs(hg) == 2;
3158 hg = mydisk > D_INCONSISTENT ? 1 : -1;
3159 if (f)
3160 hg = hg*2;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003161 drbd_info(device, "Becoming sync %s due to disk states.\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07003162 hg > 0 ? "source" : "target");
3163 }
3164
Adam Gandelman3a11a482010-04-08 16:48:23 -07003165 if (abs(hg) == 100)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003166 drbd_khelper(device, "initial-split-brain");
Adam Gandelman3a11a482010-04-08 16:48:23 -07003167
Philipp Reisner44ed1672011-04-19 17:10:19 +02003168 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003169 nc = rcu_dereference(peer_device->connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +02003170
3171 if (hg == 100 || (hg == -100 && nc->always_asbp)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003172 int pcount = (device->state.role == R_PRIMARY)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003173 + (peer_role == R_PRIMARY);
3174 int forced = (hg == -100);
3175
3176 switch (pcount) {
3177 case 0:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003178 hg = drbd_asb_recover_0p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003179 break;
3180 case 1:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003181 hg = drbd_asb_recover_1p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003182 break;
3183 case 2:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003184 hg = drbd_asb_recover_2p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003185 break;
3186 }
3187 if (abs(hg) < 100) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003188 drbd_warn(device, "Split-Brain detected, %d primaries, "
Philipp Reisnerb411b362009-09-25 16:07:19 -07003189 "automatically solved. Sync from %s node\n",
3190 pcount, (hg < 0) ? "peer" : "this");
3191 if (forced) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003192 drbd_warn(device, "Doing a full sync, since"
Philipp Reisnerb411b362009-09-25 16:07:19 -07003193 " UUIDs where ambiguous.\n");
3194 hg = hg*2;
3195 }
3196 }
3197 }
3198
3199 if (hg == -100) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003200 if (test_bit(DISCARD_MY_DATA, &device->flags) && !(device->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003201 hg = -1;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003202 if (!test_bit(DISCARD_MY_DATA, &device->flags) && (device->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003203 hg = 1;
3204
3205 if (abs(hg) < 100)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003206 drbd_warn(device, "Split-Brain detected, manually solved. "
Philipp Reisnerb411b362009-09-25 16:07:19 -07003207 "Sync from %s node\n",
3208 (hg < 0) ? "peer" : "this");
3209 }
Philipp Reisner44ed1672011-04-19 17:10:19 +02003210 rr_conflict = nc->rr_conflict;
Andreas Gruenbacher6dff2902011-06-28 14:18:12 +02003211 tentative = nc->tentative;
Philipp Reisner44ed1672011-04-19 17:10:19 +02003212 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07003213
3214 if (hg == -100) {
Lars Ellenberg580b9762010-02-26 23:15:23 +01003215 /* FIXME this log message is not correct if we end up here
3216 * after an attempted attach on a diskless node.
3217 * We just refuse to attach -- well, we drop the "connection"
3218 * to that disk, in a way... */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003219 drbd_alert(device, "Split-Brain detected but unresolved, dropping connection!\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003220 drbd_khelper(device, "split-brain");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003221 return C_MASK;
3222 }
3223
3224 if (hg > 0 && mydisk <= D_INCONSISTENT) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003225 drbd_err(device, "I shall become SyncSource, but I am inconsistent!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003226 return C_MASK;
3227 }
3228
3229 if (hg < 0 && /* by intention we do not use mydisk here. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003230 device->state.role == R_PRIMARY && device->state.disk >= D_CONSISTENT) {
Philipp Reisner44ed1672011-04-19 17:10:19 +02003231 switch (rr_conflict) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003232 case ASB_CALL_HELPER:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003233 drbd_khelper(device, "pri-lost");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003234 /* fall through */
3235 case ASB_DISCONNECT:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003236 drbd_err(device, "I shall become SyncTarget, but I am primary!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003237 return C_MASK;
3238 case ASB_VIOLENTLY:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003239 drbd_warn(device, "Becoming SyncTarget, violating the stable-data"
Philipp Reisnerb411b362009-09-25 16:07:19 -07003240 "assumption\n");
3241 }
3242 }
3243
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003244 if (tentative || test_bit(CONN_DRY_RUN, &peer_device->connection->flags)) {
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003245 if (hg == 0)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003246 drbd_info(device, "dry-run connect: No resync, would become Connected immediately.\n");
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003247 else
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003248 drbd_info(device, "dry-run connect: Would become %s, doing a %s resync.",
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003249 drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET),
3250 abs(hg) >= 2 ? "full" : "bit-map based");
3251 return C_MASK;
3252 }
3253
Philipp Reisnerb411b362009-09-25 16:07:19 -07003254 if (abs(hg) >= 2) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003255 drbd_info(device, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003256 if (drbd_bitmap_io(device, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003257 BM_LOCKED_SET_ALLOWED))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003258 return C_MASK;
3259 }
3260
3261 if (hg > 0) { /* become sync source. */
3262 rv = C_WF_BITMAP_S;
3263 } else if (hg < 0) { /* become sync target */
3264 rv = C_WF_BITMAP_T;
3265 } else {
3266 rv = C_CONNECTED;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003267 if (drbd_bm_total_weight(device)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003268 drbd_info(device, "No resync, but %lu bits in bitmap!\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003269 drbd_bm_total_weight(device));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003270 }
3271 }
3272
3273 return rv;
3274}
3275
Philipp Reisnerf179d762011-05-16 17:31:47 +02003276static enum drbd_after_sb_p convert_after_sb(enum drbd_after_sb_p peer)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003277{
3278 /* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */
Philipp Reisnerf179d762011-05-16 17:31:47 +02003279 if (peer == ASB_DISCARD_REMOTE)
3280 return ASB_DISCARD_LOCAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003281
3282 /* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */
Philipp Reisnerf179d762011-05-16 17:31:47 +02003283 if (peer == ASB_DISCARD_LOCAL)
3284 return ASB_DISCARD_REMOTE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003285
3286 /* everything else is valid if they are equal on both sides. */
Philipp Reisnerf179d762011-05-16 17:31:47 +02003287 return peer;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003288}
3289
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003290static int receive_protocol(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003291{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003292 struct p_protocol *p = pi->data;
Philipp Reisner036b17e2011-05-16 17:38:11 +02003293 enum drbd_after_sb_p p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
3294 int p_proto, p_discard_my_data, p_two_primaries, cf;
3295 struct net_conf *nc, *old_net_conf, *new_net_conf = NULL;
3296 char integrity_alg[SHARED_SECRET_MAX] = "";
Andreas Gruenbacheraccdbcc2011-07-15 17:41:09 +02003297 struct crypto_hash *peer_integrity_tfm = NULL;
Philipp Reisner7aca6c72011-05-17 10:12:56 +02003298 void *int_dig_in = NULL, *int_dig_vv = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003299
Philipp Reisnerb411b362009-09-25 16:07:19 -07003300 p_proto = be32_to_cpu(p->protocol);
3301 p_after_sb_0p = be32_to_cpu(p->after_sb_0p);
3302 p_after_sb_1p = be32_to_cpu(p->after_sb_1p);
3303 p_after_sb_2p = be32_to_cpu(p->after_sb_2p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003304 p_two_primaries = be32_to_cpu(p->two_primaries);
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003305 cf = be32_to_cpu(p->conn_flags);
Andreas Gruenbacher6139f602011-05-06 20:00:02 +02003306 p_discard_my_data = cf & CF_DISCARD_MY_DATA;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003307
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003308 if (connection->agreed_pro_version >= 87) {
Andreas Gruenbacher86db0612011-04-28 15:24:18 +02003309 int err;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003310
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02003311 if (pi->size > sizeof(integrity_alg))
Andreas Gruenbacher86db0612011-04-28 15:24:18 +02003312 return -EIO;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003313 err = drbd_recv_all(connection, integrity_alg, pi->size);
Andreas Gruenbacher86db0612011-04-28 15:24:18 +02003314 if (err)
3315 return err;
Philipp Reisner036b17e2011-05-16 17:38:11 +02003316 integrity_alg[SHARED_SECRET_MAX - 1] = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003317 }
3318
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003319 if (pi->cmd != P_PROTOCOL_UPDATE) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003320 clear_bit(CONN_DRY_RUN, &connection->flags);
Philipp Reisner036b17e2011-05-16 17:38:11 +02003321
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003322 if (cf & CF_DRY_RUN)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003323 set_bit(CONN_DRY_RUN, &connection->flags);
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003324
3325 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003326 nc = rcu_dereference(connection->net_conf);
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003327
3328 if (p_proto != nc->wire_protocol) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003329 drbd_err(connection, "incompatible %s settings\n", "protocol");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003330 goto disconnect_rcu_unlock;
3331 }
3332
3333 if (convert_after_sb(p_after_sb_0p) != nc->after_sb_0p) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003334 drbd_err(connection, "incompatible %s settings\n", "after-sb-0pri");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003335 goto disconnect_rcu_unlock;
3336 }
3337
3338 if (convert_after_sb(p_after_sb_1p) != nc->after_sb_1p) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003339 drbd_err(connection, "incompatible %s settings\n", "after-sb-1pri");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003340 goto disconnect_rcu_unlock;
3341 }
3342
3343 if (convert_after_sb(p_after_sb_2p) != nc->after_sb_2p) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003344 drbd_err(connection, "incompatible %s settings\n", "after-sb-2pri");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003345 goto disconnect_rcu_unlock;
3346 }
3347
3348 if (p_discard_my_data && nc->discard_my_data) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003349 drbd_err(connection, "incompatible %s settings\n", "discard-my-data");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003350 goto disconnect_rcu_unlock;
3351 }
3352
3353 if (p_two_primaries != nc->two_primaries) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003354 drbd_err(connection, "incompatible %s settings\n", "allow-two-primaries");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003355 goto disconnect_rcu_unlock;
3356 }
3357
3358 if (strcmp(integrity_alg, nc->integrity_alg)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003359 drbd_err(connection, "incompatible %s settings\n", "data-integrity-alg");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003360 goto disconnect_rcu_unlock;
3361 }
3362
3363 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07003364 }
3365
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003366 if (integrity_alg[0]) {
3367 int hash_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003368
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003369 /*
3370 * We can only change the peer data integrity algorithm
3371 * here. Changing our own data integrity algorithm
3372 * requires that we send a P_PROTOCOL_UPDATE packet at
3373 * the same time; otherwise, the peer has no way to
3374 * tell between which packets the algorithm should
3375 * change.
3376 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003377
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003378 peer_integrity_tfm = crypto_alloc_hash(integrity_alg, 0, CRYPTO_ALG_ASYNC);
3379 if (!peer_integrity_tfm) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003380 drbd_err(connection, "peer data-integrity-alg %s not supported\n",
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003381 integrity_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003382 goto disconnect;
3383 }
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003384
3385 hash_size = crypto_hash_digestsize(peer_integrity_tfm);
3386 int_dig_in = kmalloc(hash_size, GFP_KERNEL);
3387 int_dig_vv = kmalloc(hash_size, GFP_KERNEL);
3388 if (!(int_dig_in && int_dig_vv)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003389 drbd_err(connection, "Allocation of buffers for data integrity checking failed\n");
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003390 goto disconnect;
3391 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003392 }
3393
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003394 new_net_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL);
3395 if (!new_net_conf) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003396 drbd_err(connection, "Allocation of new net_conf failed\n");
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003397 goto disconnect;
3398 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003399
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003400 mutex_lock(&connection->data.mutex);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003401 mutex_lock(&connection->resource->conf_update);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003402 old_net_conf = connection->net_conf;
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003403 *new_net_conf = *old_net_conf;
3404
3405 new_net_conf->wire_protocol = p_proto;
3406 new_net_conf->after_sb_0p = convert_after_sb(p_after_sb_0p);
3407 new_net_conf->after_sb_1p = convert_after_sb(p_after_sb_1p);
3408 new_net_conf->after_sb_2p = convert_after_sb(p_after_sb_2p);
3409 new_net_conf->two_primaries = p_two_primaries;
3410
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003411 rcu_assign_pointer(connection->net_conf, new_net_conf);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003412 mutex_unlock(&connection->resource->conf_update);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003413 mutex_unlock(&connection->data.mutex);
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003414
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003415 crypto_free_hash(connection->peer_integrity_tfm);
3416 kfree(connection->int_dig_in);
3417 kfree(connection->int_dig_vv);
3418 connection->peer_integrity_tfm = peer_integrity_tfm;
3419 connection->int_dig_in = int_dig_in;
3420 connection->int_dig_vv = int_dig_vv;
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003421
3422 if (strcmp(old_net_conf->integrity_alg, integrity_alg))
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003423 drbd_info(connection, "peer data-integrity-alg: %s\n",
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003424 integrity_alg[0] ? integrity_alg : "(none)");
3425
3426 synchronize_rcu();
3427 kfree(old_net_conf);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003428 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003429
Philipp Reisner44ed1672011-04-19 17:10:19 +02003430disconnect_rcu_unlock:
3431 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07003432disconnect:
Andreas Gruenbacherb792c352011-07-15 16:48:49 +02003433 crypto_free_hash(peer_integrity_tfm);
Philipp Reisner036b17e2011-05-16 17:38:11 +02003434 kfree(int_dig_in);
3435 kfree(int_dig_vv);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003436 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003437 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003438}
3439
3440/* helper function
3441 * input: alg name, feature name
3442 * return: NULL (alg name was "")
3443 * ERR_PTR(error) if something goes wrong
3444 * or the crypto hash ptr, if it worked out ok. */
Lars Ellenberg8ce953a2014-02-27 09:46:18 +01003445static struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_device *device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003446 const char *alg, const char *name)
3447{
3448 struct crypto_hash *tfm;
3449
3450 if (!alg[0])
3451 return NULL;
3452
3453 tfm = crypto_alloc_hash(alg, 0, CRYPTO_ALG_ASYNC);
3454 if (IS_ERR(tfm)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003455 drbd_err(device, "Can not allocate \"%s\" as %s (reason: %ld)\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07003456 alg, name, PTR_ERR(tfm));
3457 return tfm;
3458 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003459 return tfm;
3460}
3461
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003462static int ignore_remaining_packet(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003463{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003464 void *buffer = connection->data.rbuf;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003465 int size = pi->size;
3466
3467 while (size) {
3468 int s = min_t(int, size, DRBD_SOCKET_BUFFER_SIZE);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003469 s = drbd_recv(connection, buffer, s);
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003470 if (s <= 0) {
3471 if (s < 0)
3472 return s;
3473 break;
3474 }
3475 size -= s;
3476 }
3477 if (size)
3478 return -EIO;
3479 return 0;
3480}
3481
3482/*
3483 * config_unknown_volume - device configuration command for unknown volume
3484 *
3485 * When a device is added to an existing connection, the node on which the
3486 * device is added first will send configuration commands to its peer but the
3487 * peer will not know about the device yet. It will warn and ignore these
3488 * commands. Once the device is added on the second node, the second node will
3489 * send the same device configuration commands, but in the other direction.
3490 *
3491 * (We can also end up here if drbd is misconfigured.)
3492 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003493static int config_unknown_volume(struct drbd_connection *connection, struct packet_info *pi)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003494{
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003495 drbd_warn(connection, "%s packet received for volume %u, which is not configured locally\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02003496 cmdname(pi->cmd), pi->vnr);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003497 return ignore_remaining_packet(connection, pi);
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003498}
3499
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003500static int receive_SyncParam(struct drbd_connection *connection, struct packet_info *pi)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003501{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003502 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003503 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003504 struct p_rs_param_95 *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003505 unsigned int header_size, data_size, exp_max_sz;
3506 struct crypto_hash *verify_tfm = NULL;
3507 struct crypto_hash *csums_tfm = NULL;
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003508 struct net_conf *old_net_conf, *new_net_conf = NULL;
Philipp Reisner813472c2011-05-03 16:47:02 +02003509 struct disk_conf *old_disk_conf = NULL, *new_disk_conf = NULL;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003510 const int apv = connection->agreed_pro_version;
Philipp Reisner813472c2011-05-03 16:47:02 +02003511 struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
Philipp Reisner778f2712010-07-06 11:14:00 +02003512 int fifo_size = 0;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003513 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003514
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003515 peer_device = conn_peer_device(connection, pi->vnr);
3516 if (!peer_device)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003517 return config_unknown_volume(connection, pi);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003518 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003519
3520 exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param)
3521 : apv == 88 ? sizeof(struct p_rs_param)
3522 + SHARED_SECRET_MAX
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003523 : apv <= 94 ? sizeof(struct p_rs_param_89)
3524 : /* apv >= 95 */ sizeof(struct p_rs_param_95);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003525
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003526 if (pi->size > exp_max_sz) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003527 drbd_err(device, "SyncParam packet too long: received %u, expected <= %u bytes\n",
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003528 pi->size, exp_max_sz);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003529 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003530 }
3531
3532 if (apv <= 88) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003533 header_size = sizeof(struct p_rs_param);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003534 data_size = pi->size - header_size;
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003535 } else if (apv <= 94) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003536 header_size = sizeof(struct p_rs_param_89);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003537 data_size = pi->size - header_size;
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02003538 D_ASSERT(device, data_size == 0);
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003539 } else {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003540 header_size = sizeof(struct p_rs_param_95);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003541 data_size = pi->size - header_size;
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02003542 D_ASSERT(device, data_size == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003543 }
3544
3545 /* initialize verify_alg and csums_alg */
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003546 p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003547 memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
3548
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003549 err = drbd_recv_all(peer_device->connection, p, header_size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003550 if (err)
3551 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003552
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003553 mutex_lock(&connection->resource->conf_update);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003554 old_net_conf = peer_device->connection->net_conf;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003555 if (get_ldev(device)) {
Philipp Reisner813472c2011-05-03 16:47:02 +02003556 new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3557 if (!new_disk_conf) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003558 put_ldev(device);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003559 mutex_unlock(&connection->resource->conf_update);
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003560 drbd_err(device, "Allocation of new disk_conf failed\n");
Philipp Reisner813472c2011-05-03 16:47:02 +02003561 return -ENOMEM;
3562 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003563
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003564 old_disk_conf = device->ldev->disk_conf;
Philipp Reisner813472c2011-05-03 16:47:02 +02003565 *new_disk_conf = *old_disk_conf;
3566
Andreas Gruenbacher6394b932011-05-11 14:29:52 +02003567 new_disk_conf->resync_rate = be32_to_cpu(p->resync_rate);
Philipp Reisner813472c2011-05-03 16:47:02 +02003568 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003569
3570 if (apv >= 88) {
3571 if (apv == 88) {
Philipp Reisner5de73822012-03-28 10:17:32 +02003572 if (data_size > SHARED_SECRET_MAX || data_size == 0) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003573 drbd_err(device, "verify-alg of wrong size, "
Philipp Reisner5de73822012-03-28 10:17:32 +02003574 "peer wants %u, accepting only up to %u byte\n",
3575 data_size, SHARED_SECRET_MAX);
Philipp Reisner813472c2011-05-03 16:47:02 +02003576 err = -EIO;
3577 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003578 }
3579
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003580 err = drbd_recv_all(peer_device->connection, p->verify_alg, data_size);
Philipp Reisner813472c2011-05-03 16:47:02 +02003581 if (err)
3582 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003583 /* we expect NUL terminated string */
3584 /* but just in case someone tries to be evil */
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02003585 D_ASSERT(device, p->verify_alg[data_size-1] == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003586 p->verify_alg[data_size-1] = 0;
3587
3588 } else /* apv >= 89 */ {
3589 /* we still expect NUL terminated strings */
3590 /* but just in case someone tries to be evil */
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02003591 D_ASSERT(device, p->verify_alg[SHARED_SECRET_MAX-1] == 0);
3592 D_ASSERT(device, p->csums_alg[SHARED_SECRET_MAX-1] == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003593 p->verify_alg[SHARED_SECRET_MAX-1] = 0;
3594 p->csums_alg[SHARED_SECRET_MAX-1] = 0;
3595 }
3596
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003597 if (strcmp(old_net_conf->verify_alg, p->verify_alg)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003598 if (device->state.conn == C_WF_REPORT_PARAMS) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003599 drbd_err(device, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003600 old_net_conf->verify_alg, p->verify_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003601 goto disconnect;
3602 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003603 verify_tfm = drbd_crypto_alloc_digest_safe(device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003604 p->verify_alg, "verify-alg");
3605 if (IS_ERR(verify_tfm)) {
3606 verify_tfm = NULL;
3607 goto disconnect;
3608 }
3609 }
3610
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003611 if (apv >= 89 && strcmp(old_net_conf->csums_alg, p->csums_alg)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003612 if (device->state.conn == C_WF_REPORT_PARAMS) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003613 drbd_err(device, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003614 old_net_conf->csums_alg, p->csums_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003615 goto disconnect;
3616 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003617 csums_tfm = drbd_crypto_alloc_digest_safe(device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003618 p->csums_alg, "csums-alg");
3619 if (IS_ERR(csums_tfm)) {
3620 csums_tfm = NULL;
3621 goto disconnect;
3622 }
3623 }
3624
Philipp Reisner813472c2011-05-03 16:47:02 +02003625 if (apv > 94 && new_disk_conf) {
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003626 new_disk_conf->c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
3627 new_disk_conf->c_delay_target = be32_to_cpu(p->c_delay_target);
3628 new_disk_conf->c_fill_target = be32_to_cpu(p->c_fill_target);
3629 new_disk_conf->c_max_rate = be32_to_cpu(p->c_max_rate);
Philipp Reisner778f2712010-07-06 11:14:00 +02003630
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003631 fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003632 if (fifo_size != device->rs_plan_s->size) {
Philipp Reisner813472c2011-05-03 16:47:02 +02003633 new_plan = fifo_alloc(fifo_size);
3634 if (!new_plan) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003635 drbd_err(device, "kmalloc of fifo_buffer failed");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003636 put_ldev(device);
Philipp Reisner778f2712010-07-06 11:14:00 +02003637 goto disconnect;
3638 }
3639 }
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003640 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003641
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003642 if (verify_tfm || csums_tfm) {
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003643 new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
3644 if (!new_net_conf) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003645 drbd_err(device, "Allocation of new net_conf failed\n");
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003646 goto disconnect;
3647 }
3648
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003649 *new_net_conf = *old_net_conf;
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003650
3651 if (verify_tfm) {
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003652 strcpy(new_net_conf->verify_alg, p->verify_alg);
3653 new_net_conf->verify_alg_len = strlen(p->verify_alg) + 1;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003654 crypto_free_hash(peer_device->connection->verify_tfm);
3655 peer_device->connection->verify_tfm = verify_tfm;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003656 drbd_info(device, "using verify-alg: \"%s\"\n", p->verify_alg);
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003657 }
3658 if (csums_tfm) {
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003659 strcpy(new_net_conf->csums_alg, p->csums_alg);
3660 new_net_conf->csums_alg_len = strlen(p->csums_alg) + 1;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003661 crypto_free_hash(peer_device->connection->csums_tfm);
3662 peer_device->connection->csums_tfm = csums_tfm;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003663 drbd_info(device, "using csums-alg: \"%s\"\n", p->csums_alg);
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003664 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003665 rcu_assign_pointer(connection->net_conf, new_net_conf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003666 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003667 }
3668
Philipp Reisner813472c2011-05-03 16:47:02 +02003669 if (new_disk_conf) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003670 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
3671 put_ldev(device);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003672 }
Philipp Reisner813472c2011-05-03 16:47:02 +02003673
3674 if (new_plan) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003675 old_plan = device->rs_plan_s;
3676 rcu_assign_pointer(device->rs_plan_s, new_plan);
Philipp Reisner813472c2011-05-03 16:47:02 +02003677 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003678
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003679 mutex_unlock(&connection->resource->conf_update);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003680 synchronize_rcu();
3681 if (new_net_conf)
3682 kfree(old_net_conf);
3683 kfree(old_disk_conf);
Philipp Reisner813472c2011-05-03 16:47:02 +02003684 kfree(old_plan);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003685
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003686 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003687
Philipp Reisner813472c2011-05-03 16:47:02 +02003688reconnect:
3689 if (new_disk_conf) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003690 put_ldev(device);
Philipp Reisner813472c2011-05-03 16:47:02 +02003691 kfree(new_disk_conf);
3692 }
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003693 mutex_unlock(&connection->resource->conf_update);
Philipp Reisner813472c2011-05-03 16:47:02 +02003694 return -EIO;
3695
Philipp Reisnerb411b362009-09-25 16:07:19 -07003696disconnect:
Philipp Reisner813472c2011-05-03 16:47:02 +02003697 kfree(new_plan);
3698 if (new_disk_conf) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003699 put_ldev(device);
Philipp Reisner813472c2011-05-03 16:47:02 +02003700 kfree(new_disk_conf);
3701 }
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003702 mutex_unlock(&connection->resource->conf_update);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003703 /* just for completeness: actually not needed,
3704 * as this is not reached if csums_tfm was ok. */
3705 crypto_free_hash(csums_tfm);
3706 /* but free the verify_tfm again, if csums_tfm did not work out */
3707 crypto_free_hash(verify_tfm);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003708 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003709 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003710}
3711
Philipp Reisnerb411b362009-09-25 16:07:19 -07003712/* warn if the arguments differ by more than 12.5% */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003713static void warn_if_differ_considerably(struct drbd_device *device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003714 const char *s, sector_t a, sector_t b)
3715{
3716 sector_t d;
3717 if (a == 0 || b == 0)
3718 return;
3719 d = (a > b) ? (a - b) : (b - a);
3720 if (d > (a>>3) || d > (b>>3))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003721 drbd_warn(device, "Considerable difference in %s: %llus vs. %llus\n", s,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003722 (unsigned long long)a, (unsigned long long)b);
3723}
3724
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003725static int receive_sizes(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003726{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003727 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003728 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003729 struct p_sizes *p = pi->data;
Philipp Reisnere96c9632013-06-25 16:50:07 +02003730 enum determine_dev_size dd = DS_UNCHANGED;
Lars Ellenberg6a8d68b2014-03-18 12:22:14 +01003731 sector_t p_size, p_usize, p_csize, my_usize;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003732 int ldsc = 0; /* local disk size changed */
Philipp Reisnere89b5912010-03-24 17:11:33 +01003733 enum dds_flags ddsf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003734
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003735 peer_device = conn_peer_device(connection, pi->vnr);
3736 if (!peer_device)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003737 return config_unknown_volume(connection, pi);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003738 device = peer_device->device;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003739
Philipp Reisnerb411b362009-09-25 16:07:19 -07003740 p_size = be64_to_cpu(p->d_size);
3741 p_usize = be64_to_cpu(p->u_size);
Lars Ellenberg6a8d68b2014-03-18 12:22:14 +01003742 p_csize = be64_to_cpu(p->c_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003743
Philipp Reisnerb411b362009-09-25 16:07:19 -07003744 /* just store the peer's disk size for now.
3745 * we still need to figure out whether we accept that. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003746 device->p_size = p_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003747
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003748 if (get_ldev(device)) {
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003749 rcu_read_lock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003750 my_usize = rcu_dereference(device->ldev->disk_conf)->disk_size;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003751 rcu_read_unlock();
3752
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003753 warn_if_differ_considerably(device, "lower level device sizes",
3754 p_size, drbd_get_max_capacity(device->ldev));
3755 warn_if_differ_considerably(device, "user requested size",
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003756 p_usize, my_usize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003757
3758 /* if this is the first connect, or an otherwise expected
3759 * param exchange, choose the minimum */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003760 if (device->state.conn == C_WF_REPORT_PARAMS)
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003761 p_usize = min_not_zero(my_usize, p_usize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003762
3763 /* Never shrink a device with usable data during connect.
3764 But allow online shrinking if we are connected. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003765 if (drbd_new_dev_size(device, device->ldev, p_usize, 0) <
3766 drbd_get_capacity(device->this_bdev) &&
3767 device->state.disk >= D_OUTDATED &&
3768 device->state.conn < C_CONNECTED) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003769 drbd_err(device, "The peer's disk size is too small!\n");
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003770 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003771 put_ldev(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003772 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003773 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003774
3775 if (my_usize != p_usize) {
3776 struct disk_conf *old_disk_conf, *new_disk_conf = NULL;
3777
3778 new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3779 if (!new_disk_conf) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003780 drbd_err(device, "Allocation of new disk_conf failed\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003781 put_ldev(device);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003782 return -ENOMEM;
3783 }
3784
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003785 mutex_lock(&connection->resource->conf_update);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003786 old_disk_conf = device->ldev->disk_conf;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003787 *new_disk_conf = *old_disk_conf;
3788 new_disk_conf->disk_size = p_usize;
3789
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003790 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003791 mutex_unlock(&connection->resource->conf_update);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003792 synchronize_rcu();
3793 kfree(old_disk_conf);
3794
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003795 drbd_info(device, "Peer sets u_size to %lu sectors\n",
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003796 (unsigned long)my_usize);
3797 }
3798
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003799 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003800 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003801
Lars Ellenberg20c68fd2014-04-28 18:43:25 +02003802 device->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
Lars Ellenberg20c68fd2014-04-28 18:43:25 +02003803 /* Leave drbd_reconsider_max_bio_size() before drbd_determine_dev_size().
3804 In case we cleared the QUEUE_FLAG_DISCARD from our queue in
3805 drbd_reconsider_max_bio_size(), we can be sure that after
3806 drbd_determine_dev_size() no REQ_DISCARDs are in the queue. */
3807
Philipp Reisnere89b5912010-03-24 17:11:33 +01003808 ddsf = be16_to_cpu(p->dds_flags);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003809 if (get_ldev(device)) {
Philipp Reisner8fe39aa2013-11-22 13:22:13 +01003810 drbd_reconsider_max_bio_size(device, device->ldev);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003811 dd = drbd_determine_dev_size(device, ddsf, NULL);
3812 put_ldev(device);
Philipp Reisnere96c9632013-06-25 16:50:07 +02003813 if (dd == DS_ERROR)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003814 return -EIO;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003815 drbd_md_sync(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003816 } else {
Lars Ellenberg6a8d68b2014-03-18 12:22:14 +01003817 /*
3818 * I am diskless, need to accept the peer's *current* size.
3819 * I must NOT accept the peers backing disk size,
3820 * it may have been larger than mine all along...
3821 *
3822 * At this point, the peer knows more about my disk, or at
3823 * least about what we last agreed upon, than myself.
3824 * So if his c_size is less than his d_size, the most likely
3825 * reason is that *my* d_size was smaller last time we checked.
3826 *
3827 * However, if he sends a zero current size,
3828 * take his (user-capped or) backing disk size anyways.
3829 */
Philipp Reisner8fe39aa2013-11-22 13:22:13 +01003830 drbd_reconsider_max_bio_size(device, NULL);
Lars Ellenberg6a8d68b2014-03-18 12:22:14 +01003831 drbd_set_my_capacity(device, p_csize ?: p_usize ?: p_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003832 }
3833
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003834 if (get_ldev(device)) {
3835 if (device->ldev->known_size != drbd_get_capacity(device->ldev->backing_bdev)) {
3836 device->ldev->known_size = drbd_get_capacity(device->ldev->backing_bdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003837 ldsc = 1;
3838 }
3839
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003840 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003841 }
3842
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003843 if (device->state.conn > C_WF_REPORT_PARAMS) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003844 if (be64_to_cpu(p->c_size) !=
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003845 drbd_get_capacity(device->this_bdev) || ldsc) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003846 /* we have different sizes, probably peer
3847 * needs to know my new size... */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003848 drbd_send_sizes(peer_device, 0, ddsf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003849 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003850 if (test_and_clear_bit(RESIZE_PENDING, &device->flags) ||
3851 (dd == DS_GREW && device->state.conn == C_CONNECTED)) {
3852 if (device->state.pdsk >= D_INCONSISTENT &&
3853 device->state.disk >= D_INCONSISTENT) {
Philipp Reisnere89b5912010-03-24 17:11:33 +01003854 if (ddsf & DDSF_NO_RESYNC)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003855 drbd_info(device, "Resync of new storage suppressed with --assume-clean\n");
Philipp Reisnere89b5912010-03-24 17:11:33 +01003856 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003857 resync_after_online_grow(device);
Philipp Reisnere89b5912010-03-24 17:11:33 +01003858 } else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003859 set_bit(RESYNC_AFTER_NEG, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003860 }
3861 }
3862
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003863 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003864}
3865
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003866static int receive_uuids(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003867{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003868 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003869 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003870 struct p_uuids *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003871 u64 *p_uuid;
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003872 int i, updated_uuids = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003873
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003874 peer_device = conn_peer_device(connection, pi->vnr);
3875 if (!peer_device)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003876 return config_unknown_volume(connection, pi);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003877 device = peer_device->device;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003878
Philipp Reisnerb411b362009-09-25 16:07:19 -07003879 p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO);
Jing Wang063eacf2012-10-25 15:00:56 +08003880 if (!p_uuid) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003881 drbd_err(device, "kmalloc of p_uuid failed\n");
Jing Wang063eacf2012-10-25 15:00:56 +08003882 return false;
3883 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003884
3885 for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
3886 p_uuid[i] = be64_to_cpu(p->uuid[i]);
3887
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003888 kfree(device->p_uuid);
3889 device->p_uuid = p_uuid;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003890
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003891 if (device->state.conn < C_CONNECTED &&
3892 device->state.disk < D_INCONSISTENT &&
3893 device->state.role == R_PRIMARY &&
3894 (device->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003895 drbd_err(device, "Can only connect to data with current UUID=%016llX\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003896 (unsigned long long)device->ed_uuid);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003897 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003898 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003899 }
3900
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003901 if (get_ldev(device)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003902 int skip_initial_sync =
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003903 device->state.conn == C_CONNECTED &&
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003904 peer_device->connection->agreed_pro_version >= 90 &&
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003905 device->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003906 (p_uuid[UI_FLAGS] & 8);
3907 if (skip_initial_sync) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003908 drbd_info(device, "Accepted new current UUID, preparing to skip initial sync\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003909 drbd_bitmap_io(device, &drbd_bmio_clear_n_write,
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003910 "clear_n_write from receive_uuids",
3911 BM_LOCKED_TEST_ALLOWED);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003912 _drbd_uuid_set(device, UI_CURRENT, p_uuid[UI_CURRENT]);
3913 _drbd_uuid_set(device, UI_BITMAP, 0);
3914 _drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
Philipp Reisnerb411b362009-09-25 16:07:19 -07003915 CS_VERBOSE, NULL);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003916 drbd_md_sync(device);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003917 updated_uuids = 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003918 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003919 put_ldev(device);
3920 } else if (device->state.disk < D_INCONSISTENT &&
3921 device->state.role == R_PRIMARY) {
Philipp Reisner18a50fa2010-06-21 14:14:15 +02003922 /* I am a diskless primary, the peer just created a new current UUID
3923 for me. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003924 updated_uuids = drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003925 }
3926
3927 /* Before we test for the disk state, we should wait until an eventually
3928 ongoing cluster wide state change is finished. That is important if
3929 we are primary and are detaching from our disk. We need to see the
3930 new disk state... */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003931 mutex_lock(device->state_mutex);
3932 mutex_unlock(device->state_mutex);
3933 if (device->state.conn >= C_CONNECTED && device->state.disk < D_INCONSISTENT)
3934 updated_uuids |= drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003935
3936 if (updated_uuids)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003937 drbd_print_uuids(device, "receiver updated UUIDs to");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003938
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003939 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003940}
3941
3942/**
3943 * convert_state() - Converts the peer's view of the cluster state to our point of view
3944 * @ps: The state as seen by the peer.
3945 */
3946static union drbd_state convert_state(union drbd_state ps)
3947{
3948 union drbd_state ms;
3949
3950 static enum drbd_conns c_tab[] = {
Philipp Reisner369bea62011-07-06 23:04:44 +02003951 [C_WF_REPORT_PARAMS] = C_WF_REPORT_PARAMS,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003952 [C_CONNECTED] = C_CONNECTED,
3953
3954 [C_STARTING_SYNC_S] = C_STARTING_SYNC_T,
3955 [C_STARTING_SYNC_T] = C_STARTING_SYNC_S,
3956 [C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */
3957 [C_VERIFY_S] = C_VERIFY_T,
3958 [C_MASK] = C_MASK,
3959 };
3960
3961 ms.i = ps.i;
3962
3963 ms.conn = c_tab[ps.conn];
3964 ms.peer = ps.role;
3965 ms.role = ps.peer;
3966 ms.pdsk = ps.disk;
3967 ms.disk = ps.pdsk;
3968 ms.peer_isp = (ps.aftr_isp | ps.user_isp);
3969
3970 return ms;
3971}
3972
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003973static int receive_req_state(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003974{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003975 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003976 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003977 struct p_req_state *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003978 union drbd_state mask, val;
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +01003979 enum drbd_state_rv rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003980
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003981 peer_device = conn_peer_device(connection, pi->vnr);
3982 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003983 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003984 device = peer_device->device;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003985
Philipp Reisnerb411b362009-09-25 16:07:19 -07003986 mask.i = be32_to_cpu(p->mask);
3987 val.i = be32_to_cpu(p->val);
3988
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003989 if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags) &&
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003990 mutex_is_locked(device->state_mutex)) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003991 drbd_send_sr_reply(peer_device, SS_CONCURRENT_ST_CHG);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003992 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003993 }
3994
3995 mask = convert_state(mask);
3996 val = convert_state(val);
3997
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003998 rv = drbd_change_state(device, CS_VERBOSE, mask, val);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003999 drbd_send_sr_reply(peer_device, rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004000
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004001 drbd_md_sync(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004002
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004003 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004004}
4005
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004006static int receive_req_conn_state(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004007{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004008 struct p_req_state *p = pi->data;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01004009 union drbd_state mask, val;
4010 enum drbd_state_rv rv;
4011
4012 mask.i = be32_to_cpu(p->mask);
4013 val.i = be32_to_cpu(p->val);
4014
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004015 if (test_bit(RESOLVE_CONFLICTS, &connection->flags) &&
4016 mutex_is_locked(&connection->cstate_mutex)) {
4017 conn_send_sr_reply(connection, SS_CONCURRENT_ST_CHG);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004018 return 0;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01004019 }
4020
4021 mask = convert_state(mask);
4022 val = convert_state(val);
4023
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004024 rv = conn_request_state(connection, mask, val, CS_VERBOSE | CS_LOCAL_ONLY | CS_IGN_OUTD_FAIL);
4025 conn_send_sr_reply(connection, rv);
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01004026
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004027 return 0;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01004028}
4029
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004030static int receive_state(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004031{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004032 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004033 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004034 struct p_state *p = pi->data;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004035 union drbd_state os, ns, peer_state;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004036 enum drbd_disk_state real_peer_disk;
Philipp Reisner65d922c2010-06-16 16:18:09 +02004037 enum chg_state_flags cs_flags;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004038 int rv;
4039
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004040 peer_device = conn_peer_device(connection, pi->vnr);
4041 if (!peer_device)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004042 return config_unknown_volume(connection, pi);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004043 device = peer_device->device;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004044
Philipp Reisnerb411b362009-09-25 16:07:19 -07004045 peer_state.i = be32_to_cpu(p->state);
4046
4047 real_peer_disk = peer_state.disk;
4048 if (peer_state.disk == D_NEGOTIATING) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004049 real_peer_disk = device->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004050 drbd_info(device, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004051 }
4052
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004053 spin_lock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004054 retry:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004055 os = ns = drbd_read_state(device);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004056 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004057
Lars Ellenberg545752d2011-12-05 14:39:25 +01004058 /* If some other part of the code (asender thread, timeout)
4059 * already decided to close the connection again,
4060 * we must not "re-establish" it here. */
4061 if (os.conn <= C_TEAR_DOWN)
Lars Ellenberg58ffa582012-07-26 14:09:49 +02004062 return -ECONNRESET;
Lars Ellenberg545752d2011-12-05 14:39:25 +01004063
Lars Ellenberg40424e42011-09-26 15:24:56 +02004064 /* If this is the "end of sync" confirmation, usually the peer disk
4065 * transitions from D_INCONSISTENT to D_UP_TO_DATE. For empty (0 bits
4066 * set) resync started in PausedSyncT, or if the timing of pause-/
4067 * unpause-sync events has been "just right", the peer disk may
4068 * transition from D_CONSISTENT to D_UP_TO_DATE as well.
4069 */
4070 if ((os.pdsk == D_INCONSISTENT || os.pdsk == D_CONSISTENT) &&
4071 real_peer_disk == D_UP_TO_DATE &&
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02004072 os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
4073 /* If we are (becoming) SyncSource, but peer is still in sync
4074 * preparation, ignore its uptodate-ness to avoid flapping, it
4075 * will change to inconsistent once the peer reaches active
4076 * syncing states.
4077 * It may have changed syncer-paused flags, however, so we
4078 * cannot ignore this completely. */
4079 if (peer_state.conn > C_CONNECTED &&
4080 peer_state.conn < C_SYNC_SOURCE)
4081 real_peer_disk = D_INCONSISTENT;
4082
4083 /* if peer_state changes to connected at the same time,
4084 * it explicitly notifies us that it finished resync.
4085 * Maybe we should finish it up, too? */
4086 else if (os.conn >= C_SYNC_SOURCE &&
4087 peer_state.conn == C_CONNECTED) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004088 if (drbd_bm_total_weight(device) <= device->rs_failed)
4089 drbd_resync_finished(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004090 return 0;
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02004091 }
4092 }
4093
Lars Ellenberg02b91b52012-06-28 18:26:52 +02004094 /* explicit verify finished notification, stop sector reached. */
4095 if (os.conn == C_VERIFY_T && os.disk == D_UP_TO_DATE &&
4096 peer_state.conn == C_CONNECTED && real_peer_disk == D_UP_TO_DATE) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004097 ov_out_of_sync_print(device);
4098 drbd_resync_finished(device);
Lars Ellenberg58ffa582012-07-26 14:09:49 +02004099 return 0;
Lars Ellenberg02b91b52012-06-28 18:26:52 +02004100 }
4101
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02004102 /* peer says his disk is inconsistent, while we think it is uptodate,
4103 * and this happens while the peer still thinks we have a sync going on,
4104 * but we think we are already done with the sync.
4105 * We ignore this to avoid flapping pdsk.
4106 * This should not happen, if the peer is a recent version of drbd. */
4107 if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT &&
4108 os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE)
4109 real_peer_disk = D_UP_TO_DATE;
4110
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004111 if (ns.conn == C_WF_REPORT_PARAMS)
4112 ns.conn = C_CONNECTED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004113
Philipp Reisner67531712010-10-27 12:21:30 +02004114 if (peer_state.conn == C_AHEAD)
4115 ns.conn = C_BEHIND;
4116
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004117 if (device->p_uuid && peer_state.disk >= D_NEGOTIATING &&
4118 get_ldev_if_state(device, D_NEGOTIATING)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004119 int cr; /* consider resync */
4120
4121 /* if we established a new connection */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004122 cr = (os.conn < C_CONNECTED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004123 /* if we had an established connection
4124 * and one of the nodes newly attaches a disk */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004125 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07004126 (peer_state.disk == D_NEGOTIATING ||
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004127 os.disk == D_NEGOTIATING));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004128 /* if we have both been inconsistent, and the peer has been
4129 * forced to be UpToDate with --overwrite-data */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004130 cr |= test_bit(CONSIDER_RESYNC, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004131 /* if we had been plain connected, and the admin requested to
4132 * start a sync by "invalidate" or "invalidate-remote" */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004133 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07004134 (peer_state.conn >= C_STARTING_SYNC_S &&
4135 peer_state.conn <= C_WF_BITMAP_T));
4136
4137 if (cr)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004138 ns.conn = drbd_sync_handshake(peer_device, peer_state.role, real_peer_disk);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004139
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004140 put_ldev(device);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004141 if (ns.conn == C_MASK) {
4142 ns.conn = C_CONNECTED;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004143 if (device->state.disk == D_NEGOTIATING) {
4144 drbd_force_state(device, NS(disk, D_FAILED));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004145 } else if (peer_state.disk == D_NEGOTIATING) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004146 drbd_err(device, "Disk attach process on the peer node was aborted.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004147 peer_state.disk = D_DISKLESS;
Lars Ellenberg580b9762010-02-26 23:15:23 +01004148 real_peer_disk = D_DISKLESS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004149 } else {
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004150 if (test_and_clear_bit(CONN_DRY_RUN, &peer_device->connection->flags))
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004151 return -EIO;
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02004152 D_ASSERT(device, os.conn == C_WF_REPORT_PARAMS);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004153 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004154 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004155 }
4156 }
4157 }
4158
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004159 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004160 if (os.i != drbd_read_state(device).i)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004161 goto retry;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004162 clear_bit(CONSIDER_RESYNC, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004163 ns.peer = peer_state.role;
4164 ns.pdsk = real_peer_disk;
4165 ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004166 if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004167 ns.disk = device->new_state_tmp.disk;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004168 cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004169 if (ns.pdsk == D_CONSISTENT && drbd_suspended(device) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
4170 test_bit(NEW_CUR_UUID, &device->flags)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004171 /* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this
Philipp Reisner481c6f52010-06-22 14:03:27 +02004172 for temporal network outages! */
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004173 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004174 drbd_err(device, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004175 tl_clear(peer_device->connection);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004176 drbd_uuid_new_current(device);
4177 clear_bit(NEW_CUR_UUID, &device->flags);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004178 conn_request_state(peer_device->connection, NS2(conn, C_PROTOCOL_ERROR, susp, 0), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004179 return -EIO;
Philipp Reisner481c6f52010-06-22 14:03:27 +02004180 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004181 rv = _drbd_set_state(device, ns, cs_flags, NULL);
4182 ns = drbd_read_state(device);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004183 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004184
4185 if (rv < SS_SUCCESS) {
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004186 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004187 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004188 }
4189
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004190 if (os.conn > C_WF_REPORT_PARAMS) {
4191 if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07004192 peer_state.disk != D_NEGOTIATING ) {
4193 /* we want resync, peer has not yet decided to sync... */
4194 /* Nowadays only used when forcing a node into primary role and
4195 setting its disk to UpToDate with that */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004196 drbd_send_uuids(peer_device);
4197 drbd_send_current_state(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004198 }
4199 }
4200
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004201 clear_bit(DISCARD_MY_DATA, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004202
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004203 drbd_md_sync(device); /* update connected indicator, la_size_sect, ... */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004204
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004205 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004206}
4207
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004208static int receive_sync_uuid(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004209{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004210 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004211 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004212 struct p_rs_uuid *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004213
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004214 peer_device = conn_peer_device(connection, pi->vnr);
4215 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004216 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004217 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004218
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004219 wait_event(device->misc_wait,
4220 device->state.conn == C_WF_SYNC_UUID ||
4221 device->state.conn == C_BEHIND ||
4222 device->state.conn < C_CONNECTED ||
4223 device->state.disk < D_NEGOTIATING);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004224
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02004225 /* D_ASSERT(device, device->state.conn == C_WF_SYNC_UUID ); */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004226
Philipp Reisnerb411b362009-09-25 16:07:19 -07004227 /* Here the _drbd_uuid_ functions are right, current should
4228 _not_ be rotated into the history */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004229 if (get_ldev_if_state(device, D_NEGOTIATING)) {
4230 _drbd_uuid_set(device, UI_CURRENT, be64_to_cpu(p->uuid));
4231 _drbd_uuid_set(device, UI_BITMAP, 0UL);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004232
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004233 drbd_print_uuids(device, "updated sync uuid");
4234 drbd_start_resync(device, C_SYNC_TARGET);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004235
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004236 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004237 } else
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004238 drbd_err(device, "Ignoring SyncUUID packet!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004239
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004240 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004241}
4242
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004243/**
4244 * receive_bitmap_plain
4245 *
4246 * Return 0 when done, 1 when another iteration is needed, and a negative error
4247 * code upon failure.
4248 */
4249static int
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004250receive_bitmap_plain(struct drbd_peer_device *peer_device, unsigned int size,
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004251 unsigned long *p, struct bm_xfer_ctx *c)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004252{
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004253 unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE -
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004254 drbd_header_size(peer_device->connection);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004255 unsigned int num_words = min_t(size_t, data_size / sizeof(*p),
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004256 c->bm_words - c->word_offset);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004257 unsigned int want = num_words * sizeof(*p);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004258 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004259
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004260 if (want != size) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004261 drbd_err(peer_device, "%s:want (%u) != size (%u)\n", __func__, want, size);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004262 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004263 }
4264 if (want == 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004265 return 0;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004266 err = drbd_recv_all(peer_device->connection, p, want);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004267 if (err)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004268 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004269
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004270 drbd_bm_merge_lel(peer_device->device, c->word_offset, num_words, p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004271
4272 c->word_offset += num_words;
4273 c->bit_offset = c->word_offset * BITS_PER_LONG;
4274 if (c->bit_offset > c->bm_bits)
4275 c->bit_offset = c->bm_bits;
4276
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004277 return 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004278}
4279
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004280static enum drbd_bitmap_code dcbp_get_code(struct p_compressed_bm *p)
4281{
4282 return (enum drbd_bitmap_code)(p->encoding & 0x0f);
4283}
4284
4285static int dcbp_get_start(struct p_compressed_bm *p)
4286{
4287 return (p->encoding & 0x80) != 0;
4288}
4289
4290static int dcbp_get_pad_bits(struct p_compressed_bm *p)
4291{
4292 return (p->encoding >> 4) & 0x7;
4293}
4294
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004295/**
4296 * recv_bm_rle_bits
4297 *
4298 * Return 0 when done, 1 when another iteration is needed, and a negative error
4299 * code upon failure.
4300 */
4301static int
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004302recv_bm_rle_bits(struct drbd_peer_device *peer_device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07004303 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01004304 struct bm_xfer_ctx *c,
4305 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004306{
4307 struct bitstream bs;
4308 u64 look_ahead;
4309 u64 rl;
4310 u64 tmp;
4311 unsigned long s = c->bit_offset;
4312 unsigned long e;
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004313 int toggle = dcbp_get_start(p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004314 int have;
4315 int bits;
4316
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004317 bitstream_init(&bs, p->code, len, dcbp_get_pad_bits(p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004318
4319 bits = bitstream_get_bits(&bs, &look_ahead, 64);
4320 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004321 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004322
4323 for (have = bits; have > 0; s += rl, toggle = !toggle) {
4324 bits = vli_decode_bits(&rl, look_ahead);
4325 if (bits <= 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004326 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004327
4328 if (toggle) {
4329 e = s + rl -1;
4330 if (e >= c->bm_bits) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004331 drbd_err(peer_device, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004332 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004333 }
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004334 _drbd_bm_set_bits(peer_device->device, s, e);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004335 }
4336
4337 if (have < bits) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004338 drbd_err(peer_device, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07004339 have, bits, look_ahead,
4340 (unsigned int)(bs.cur.b - p->code),
4341 (unsigned int)bs.buf_len);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004342 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004343 }
Lars Ellenbergd2da5b02013-10-23 10:59:18 +02004344 /* if we consumed all 64 bits, assign 0; >> 64 is "undefined"; */
4345 if (likely(bits < 64))
4346 look_ahead >>= bits;
4347 else
4348 look_ahead = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004349 have -= bits;
4350
4351 bits = bitstream_get_bits(&bs, &tmp, 64 - have);
4352 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004353 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004354 look_ahead |= tmp << have;
4355 have += bits;
4356 }
4357
4358 c->bit_offset = s;
4359 bm_xfer_ctx_bit_to_word_offset(c);
4360
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004361 return (s != c->bm_bits);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004362}
4363
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004364/**
4365 * decode_bitmap_c
4366 *
4367 * Return 0 when done, 1 when another iteration is needed, and a negative error
4368 * code upon failure.
4369 */
4370static int
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004371decode_bitmap_c(struct drbd_peer_device *peer_device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07004372 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01004373 struct bm_xfer_ctx *c,
4374 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004375{
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004376 if (dcbp_get_code(p) == RLE_VLI_Bits)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004377 return recv_bm_rle_bits(peer_device, p, c, len - sizeof(*p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004378
4379 /* other variants had been implemented for evaluation,
4380 * but have been dropped as this one turned out to be "best"
4381 * during all our tests. */
4382
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004383 drbd_err(peer_device, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
4384 conn_request_state(peer_device->connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004385 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004386}
4387
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004388void INFO_bm_xfer_stats(struct drbd_device *device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07004389 const char *direction, struct bm_xfer_ctx *c)
4390{
4391 /* what would it take to transfer it "plaintext" */
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02004392 unsigned int header_size = drbd_header_size(first_peer_device(device)->connection);
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004393 unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE - header_size;
4394 unsigned int plain =
4395 header_size * (DIV_ROUND_UP(c->bm_words, data_size) + 1) +
4396 c->bm_words * sizeof(unsigned long);
4397 unsigned int total = c->bytes[0] + c->bytes[1];
4398 unsigned int r;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004399
4400 /* total can not be zero. but just in case: */
4401 if (total == 0)
4402 return;
4403
4404 /* don't report if not compressed */
4405 if (total >= plain)
4406 return;
4407
4408 /* total < plain. check for overflow, still */
4409 r = (total > UINT_MAX/1000) ? (total / (plain/1000))
4410 : (1000 * total / plain);
4411
4412 if (r > 1000)
4413 r = 1000;
4414
4415 r = 1000 - r;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004416 drbd_info(device, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
Philipp Reisnerb411b362009-09-25 16:07:19 -07004417 "total %u; compression: %u.%u%%\n",
4418 direction,
4419 c->bytes[1], c->packets[1],
4420 c->bytes[0], c->packets[0],
4421 total, r/10, r % 10);
4422}
4423
4424/* Since we are processing the bitfield from lower addresses to higher,
4425 it does not matter if the process it in 32 bit chunks or 64 bit
4426 chunks as long as it is little endian. (Understand it as byte stream,
4427 beginning with the lowest byte...) If we would use big endian
4428 we would need to process it from the highest address to the lowest,
4429 in order to be agnostic to the 32 vs 64 bits issue.
4430
4431 returns 0 on failure, 1 if we successfully received it. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004432static int receive_bitmap(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004433{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004434 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004435 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004436 struct bm_xfer_ctx c;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004437 int err;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004438
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004439 peer_device = conn_peer_device(connection, pi->vnr);
4440 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004441 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004442 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004443
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004444 drbd_bm_lock(device, "receive bitmap", BM_LOCKED_SET_ALLOWED);
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004445 /* you are supposed to send additional out-of-sync information
4446 * if you actually set bits during this phase */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004447
Philipp Reisnerb411b362009-09-25 16:07:19 -07004448 c = (struct bm_xfer_ctx) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004449 .bm_bits = drbd_bm_bits(device),
4450 .bm_words = drbd_bm_words(device),
Philipp Reisnerb411b362009-09-25 16:07:19 -07004451 };
4452
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004453 for(;;) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004454 if (pi->cmd == P_BITMAP)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004455 err = receive_bitmap_plain(peer_device, pi->size, pi->data, &c);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004456 else if (pi->cmd == P_COMPRESSED_BITMAP) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004457 /* MAYBE: sanity check that we speak proto >= 90,
4458 * and the feature is enabled! */
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004459 struct p_compressed_bm *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004460
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004461 if (pi->size > DRBD_SOCKET_BUFFER_SIZE - drbd_header_size(connection)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004462 drbd_err(device, "ReportCBitmap packet too large\n");
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004463 err = -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004464 goto out;
4465 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004466 if (pi->size <= sizeof(*p)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004467 drbd_err(device, "ReportCBitmap packet too small (l:%u)\n", pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004468 err = -EIO;
Andreas Gruenbacher78fcbda2010-12-10 22:18:27 +01004469 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004470 }
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004471 err = drbd_recv_all(peer_device->connection, p, pi->size);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004472 if (err)
4473 goto out;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004474 err = decode_bitmap_c(peer_device, p, &c, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004475 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004476 drbd_warn(device, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", pi->cmd);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004477 err = -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004478 goto out;
4479 }
4480
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004481 c.packets[pi->cmd == P_BITMAP]++;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004482 c.bytes[pi->cmd == P_BITMAP] += drbd_header_size(connection) + pi->size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004483
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004484 if (err <= 0) {
4485 if (err < 0)
4486 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004487 break;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004488 }
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004489 err = drbd_recv_header(peer_device->connection, pi);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004490 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004491 goto out;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004492 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004493
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004494 INFO_bm_xfer_stats(device, "receive", &c);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004495
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004496 if (device->state.conn == C_WF_BITMAP_T) {
Andreas Gruenbacherde1f8e42010-12-10 21:04:00 +01004497 enum drbd_state_rv rv;
4498
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004499 err = drbd_send_bitmap(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004500 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004501 goto out;
4502 /* Omit CS_ORDERED with this state transition to avoid deadlocks. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004503 rv = _drbd_request_state(device, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02004504 D_ASSERT(device, rv == SS_SUCCESS);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004505 } else if (device->state.conn != C_WF_BITMAP_S) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004506 /* admin may have requested C_DISCONNECTING,
4507 * other threads may have noticed network errors */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004508 drbd_info(device, "unexpected cstate (%s) in receive_bitmap\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004509 drbd_conn_str(device->state.conn));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004510 }
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004511 err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004512
Philipp Reisnerb411b362009-09-25 16:07:19 -07004513 out:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004514 drbd_bm_unlock(device);
4515 if (!err && device->state.conn == C_WF_BITMAP_S)
4516 drbd_start_resync(device, C_SYNC_SOURCE);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004517 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004518}
4519
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004520static int receive_skip(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004521{
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004522 drbd_warn(connection, "skipping unknown optional packet type %d, l: %d!\n",
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004523 pi->cmd, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004524
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004525 return ignore_remaining_packet(connection, pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004526}
4527
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004528static int receive_UnplugRemote(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004529{
Philipp Reisnerb411b362009-09-25 16:07:19 -07004530 /* Make sure we've acked all the TCP data associated
4531 * with the data requests being unplugged */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004532 drbd_tcp_quickack(connection->data.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004533
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004534 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004535}
4536
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004537static int receive_out_of_sync(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisner73a01a12010-10-27 14:33:00 +02004538{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004539 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004540 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004541 struct p_block_desc *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004542
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004543 peer_device = conn_peer_device(connection, pi->vnr);
4544 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004545 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004546 device = peer_device->device;
Philipp Reisner73a01a12010-10-27 14:33:00 +02004547
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004548 switch (device->state.conn) {
Lars Ellenbergf735e3632010-12-17 21:06:18 +01004549 case C_WF_SYNC_UUID:
4550 case C_WF_BITMAP_T:
4551 case C_BEHIND:
4552 break;
4553 default:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004554 drbd_err(device, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004555 drbd_conn_str(device->state.conn));
Lars Ellenbergf735e3632010-12-17 21:06:18 +01004556 }
4557
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004558 drbd_set_out_of_sync(device, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
Philipp Reisner73a01a12010-10-27 14:33:00 +02004559
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004560 return 0;
Philipp Reisner73a01a12010-10-27 14:33:00 +02004561}
4562
Philipp Reisner02918be2010-08-20 14:35:10 +02004563struct data_cmd {
4564 int expect_payload;
4565 size_t pkt_size;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004566 int (*fn)(struct drbd_connection *, struct packet_info *);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004567};
4568
Philipp Reisner02918be2010-08-20 14:35:10 +02004569static struct data_cmd drbd_cmd_handler[] = {
4570 [P_DATA] = { 1, sizeof(struct p_data), receive_Data },
4571 [P_DATA_REPLY] = { 1, sizeof(struct p_data), receive_DataReply },
4572 [P_RS_DATA_REPLY] = { 1, sizeof(struct p_data), receive_RSDataReply } ,
4573 [P_BARRIER] = { 0, sizeof(struct p_barrier), receive_Barrier } ,
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004574 [P_BITMAP] = { 1, 0, receive_bitmap } ,
4575 [P_COMPRESSED_BITMAP] = { 1, 0, receive_bitmap } ,
4576 [P_UNPLUG_REMOTE] = { 0, 0, receive_UnplugRemote },
Philipp Reisner02918be2010-08-20 14:35:10 +02004577 [P_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
4578 [P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004579 [P_SYNC_PARAM] = { 1, 0, receive_SyncParam },
4580 [P_SYNC_PARAM89] = { 1, 0, receive_SyncParam },
Philipp Reisner02918be2010-08-20 14:35:10 +02004581 [P_PROTOCOL] = { 1, sizeof(struct p_protocol), receive_protocol },
4582 [P_UUIDS] = { 0, sizeof(struct p_uuids), receive_uuids },
4583 [P_SIZES] = { 0, sizeof(struct p_sizes), receive_sizes },
4584 [P_STATE] = { 0, sizeof(struct p_state), receive_state },
4585 [P_STATE_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_state },
4586 [P_SYNC_UUID] = { 0, sizeof(struct p_rs_uuid), receive_sync_uuid },
4587 [P_OV_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
4588 [P_OV_REPLY] = { 1, sizeof(struct p_block_req), receive_DataRequest },
4589 [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest },
4590 [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip },
Philipp Reisner73a01a12010-10-27 14:33:00 +02004591 [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004592 [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_conn_state },
Philipp Reisner036b17e2011-05-16 17:38:11 +02004593 [P_PROTOCOL_UPDATE] = { 1, sizeof(struct p_protocol), receive_protocol },
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02004594 [P_TRIM] = { 0, sizeof(struct p_trim), receive_Data },
Philipp Reisner02918be2010-08-20 14:35:10 +02004595};
4596
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004597static void drbdd(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004598{
Philipp Reisner77351055b2011-02-07 17:24:26 +01004599 struct packet_info pi;
Philipp Reisner02918be2010-08-20 14:35:10 +02004600 size_t shs; /* sub header size */
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004601 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004602
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004603 while (get_t_state(&connection->receiver) == RUNNING) {
Andreas Gruenbacherdeebe192011-03-25 00:01:04 +01004604 struct data_cmd *cmd;
4605
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004606 drbd_thread_current_set_cpu(&connection->receiver);
Lars Ellenberg944410e2014-05-06 15:02:05 +02004607 update_receiver_timing_details(connection, drbd_recv_header);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004608 if (drbd_recv_header(connection, &pi))
Philipp Reisner02918be2010-08-20 14:35:10 +02004609 goto err_out;
4610
Andreas Gruenbacherdeebe192011-03-25 00:01:04 +01004611 cmd = &drbd_cmd_handler[pi.cmd];
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004612 if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) || !cmd->fn)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004613 drbd_err(connection, "Unexpected data packet %s (0x%04x)",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004614 cmdname(pi.cmd), pi.cmd);
Philipp Reisner02918be2010-08-20 14:35:10 +02004615 goto err_out;
Lars Ellenberg0b33a912009-11-16 15:58:04 +01004616 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004617
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004618 shs = cmd->pkt_size;
4619 if (pi.size > shs && !cmd->expect_payload) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004620 drbd_err(connection, "No payload expected %s l:%d\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004621 cmdname(pi.cmd), pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02004622 goto err_out;
4623 }
4624
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02004625 if (shs) {
Lars Ellenberg944410e2014-05-06 15:02:05 +02004626 update_receiver_timing_details(connection, drbd_recv_all_warn);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004627 err = drbd_recv_all_warn(connection, pi.data, shs);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004628 if (err)
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02004629 goto err_out;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004630 pi.size -= shs;
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02004631 }
4632
Lars Ellenberg944410e2014-05-06 15:02:05 +02004633 update_receiver_timing_details(connection, cmd->fn);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004634 err = cmd->fn(connection, &pi);
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004635 if (err) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004636 drbd_err(connection, "error receiving %s, e: %d l: %d!\n",
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004637 cmdname(pi.cmd), err, pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02004638 goto err_out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004639 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004640 }
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004641 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004642
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004643 err_out:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004644 conn_request_state(connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004645}
4646
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004647static void conn_disconnect(struct drbd_connection *connection)
Philipp Reisnerf70b35112010-06-24 14:34:40 +02004648{
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02004649 struct drbd_peer_device *peer_device;
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004650 enum drbd_conns oc;
Philipp Reisner376694a2011-11-07 10:54:28 +01004651 int vnr;
Philipp Reisnerf70b35112010-06-24 14:34:40 +02004652
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004653 if (connection->cstate == C_STANDALONE)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004654 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004655
Lars Ellenberg545752d2011-12-05 14:39:25 +01004656 /* We are about to start the cleanup after connection loss.
4657 * Make sure drbd_make_request knows about that.
4658 * Usually we should be in some network failure state already,
4659 * but just in case we are not, we fix it up here.
4660 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004661 conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
Lars Ellenberg545752d2011-12-05 14:39:25 +01004662
Philipp Reisnerb411b362009-09-25 16:07:19 -07004663 /* asender does not clean up anything. it must not interfere, either */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004664 drbd_thread_stop(&connection->asender);
4665 drbd_free_sock(connection);
Philipp Reisner360cc742011-02-08 14:29:53 +01004666
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02004667 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02004668 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
4669 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004670 kref_get(&device->kref);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02004671 rcu_read_unlock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004672 drbd_disconnected(peer_device);
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02004673 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02004674 rcu_read_lock();
4675 }
4676 rcu_read_unlock();
4677
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004678 if (!list_empty(&connection->current_epoch->list))
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004679 drbd_err(connection, "ASSERTION FAILED: connection->current_epoch->list not empty\n");
Philipp Reisner12038a32011-11-09 19:18:00 +01004680 /* ok, no more ee's on the fly, it is safe to reset the epoch_size */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004681 atomic_set(&connection->current_epoch->epoch_size, 0);
4682 connection->send.seen_any_write_yet = false;
Philipp Reisner12038a32011-11-09 19:18:00 +01004683
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004684 drbd_info(connection, "Connection closed\n");
Philipp Reisner360cc742011-02-08 14:29:53 +01004685
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004686 if (conn_highest_role(connection) == R_PRIMARY && conn_highest_pdsk(connection) >= D_UNKNOWN)
4687 conn_try_outdate_peer_async(connection);
Philipp Reisnercb703452011-03-24 11:03:07 +01004688
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004689 spin_lock_irq(&connection->resource->req_lock);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004690 oc = connection->cstate;
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004691 if (oc >= C_UNCONNECTED)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004692 _conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE);
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004693
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004694 spin_unlock_irq(&connection->resource->req_lock);
Philipp Reisner360cc742011-02-08 14:29:53 +01004695
Lars Ellenbergf3dfa402011-05-02 10:45:05 +02004696 if (oc == C_DISCONNECTING)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004697 conn_request_state(connection, NS(conn, C_STANDALONE), CS_VERBOSE | CS_HARD);
Philipp Reisner360cc742011-02-08 14:29:53 +01004698}
4699
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004700static int drbd_disconnected(struct drbd_peer_device *peer_device)
Philipp Reisner360cc742011-02-08 14:29:53 +01004701{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004702 struct drbd_device *device = peer_device->device;
Philipp Reisner360cc742011-02-08 14:29:53 +01004703 unsigned int i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004704
Philipp Reisner85719572010-07-21 10:20:17 +02004705 /* wait for current activity to cease. */
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004706 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004707 _drbd_wait_ee_list_empty(device, &device->active_ee);
4708 _drbd_wait_ee_list_empty(device, &device->sync_ee);
4709 _drbd_wait_ee_list_empty(device, &device->read_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004710 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004711
4712 /* We do not have data structures that would allow us to
4713 * get the rs_pending_cnt down to 0 again.
4714 * * On C_SYNC_TARGET we do not have any data structures describing
4715 * the pending RSDataRequest's we have sent.
4716 * * On C_SYNC_SOURCE there is no data structure that tracks
4717 * the P_RS_DATA_REPLY blocks that we sent to the SyncTarget.
4718 * And no, it is not the sum of the reference counts in the
4719 * resync_LRU. The resync_LRU tracks the whole operation including
4720 * the disk-IO, while the rs_pending_cnt only tracks the blocks
4721 * on the fly. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004722 drbd_rs_cancel_all(device);
4723 device->rs_total = 0;
4724 device->rs_failed = 0;
4725 atomic_set(&device->rs_pending_cnt, 0);
4726 wake_up(&device->misc_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004727
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004728 del_timer_sync(&device->resync_timer);
4729 resync_timer_fn((unsigned long)device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004730
Philipp Reisnerb411b362009-09-25 16:07:19 -07004731 /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
4732 * w_make_resync_request etc. which may still be on the worker queue
4733 * to be "canceled" */
Andreas Gruenbacherb5043c52011-07-28 15:56:02 +02004734 drbd_flush_workqueue(&peer_device->connection->sender_work);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004735
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004736 drbd_finish_peer_reqs(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004737
Philipp Reisnerd10b4ea2011-11-30 23:25:36 +01004738 /* This second workqueue flush is necessary, since drbd_finish_peer_reqs()
4739 might have issued a work again. The one before drbd_finish_peer_reqs() is
4740 necessary to reclain net_ee in drbd_finish_peer_reqs(). */
Andreas Gruenbacherb5043c52011-07-28 15:56:02 +02004741 drbd_flush_workqueue(&peer_device->connection->sender_work);
Philipp Reisnerd10b4ea2011-11-30 23:25:36 +01004742
Lars Ellenberg08332d72012-08-17 15:09:13 +02004743 /* need to do it again, drbd_finish_peer_reqs() may have populated it
4744 * again via drbd_try_clear_on_disk_bm(). */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004745 drbd_rs_cancel_all(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004746
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004747 kfree(device->p_uuid);
4748 device->p_uuid = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004749
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004750 if (!drbd_suspended(device))
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004751 tl_clear(peer_device->connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004752
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004753 drbd_md_sync(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004754
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004755 /* serialize with bitmap writeout triggered by the state change,
4756 * if any. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004757 wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004758
Philipp Reisnerb411b362009-09-25 16:07:19 -07004759 /* tcp_close and release of sendpage pages can be deferred. I don't
4760 * want to use SO_LINGER, because apparently it can be deferred for
4761 * more than 20 seconds (longest time I checked).
4762 *
4763 * Actually we don't care for exactly when the network stack does its
4764 * put_page(), but release our reference on these pages right here.
4765 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004766 i = drbd_free_peer_reqs(device, &device->net_ee);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004767 if (i)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004768 drbd_info(device, "net_ee not empty, killed %u entries\n", i);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004769 i = atomic_read(&device->pp_in_use_by_net);
Lars Ellenberg435f0742010-09-06 12:30:25 +02004770 if (i)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004771 drbd_info(device, "pp_in_use_by_net = %d, expected 0\n", i);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004772 i = atomic_read(&device->pp_in_use);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004773 if (i)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004774 drbd_info(device, "pp_in_use = %d, expected 0\n", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004775
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02004776 D_ASSERT(device, list_empty(&device->read_ee));
4777 D_ASSERT(device, list_empty(&device->active_ee));
4778 D_ASSERT(device, list_empty(&device->sync_ee));
4779 D_ASSERT(device, list_empty(&device->done_ee));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004780
Philipp Reisner360cc742011-02-08 14:29:53 +01004781 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004782}
4783
4784/*
4785 * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version
4786 * we can agree on is stored in agreed_pro_version.
4787 *
4788 * feature flags and the reserved array should be enough room for future
4789 * enhancements of the handshake protocol, and possible plugins...
4790 *
4791 * for now, they are expected to be zero, but ignored.
4792 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004793static int drbd_send_features(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004794{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004795 struct drbd_socket *sock;
4796 struct p_connection_features *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004797
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004798 sock = &connection->data;
4799 p = conn_prepare_command(connection, sock);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004800 if (!p)
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004801 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004802 memset(p, 0, sizeof(*p));
4803 p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
4804 p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
Lars Ellenberg20c68fd2014-04-28 18:43:25 +02004805 p->feature_flags = cpu_to_be32(PRO_FEATURES);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004806 return conn_send_command(connection, sock, P_CONNECTION_FEATURES, sizeof(*p), NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004807}
4808
4809/*
4810 * return values:
4811 * 1 yes, we have a valid connection
4812 * 0 oops, did not work out, please try again
4813 * -1 peer talks different language,
4814 * no point in trying again, please go standalone.
4815 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004816static int drbd_do_features(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004817{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004818 /* ASSERT current == connection->receiver ... */
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004819 struct p_connection_features *p;
4820 const int expect = sizeof(struct p_connection_features);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004821 struct packet_info pi;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004822 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004823
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004824 err = drbd_send_features(connection);
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004825 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004826 return 0;
4827
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004828 err = drbd_recv_header(connection, &pi);
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004829 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004830 return 0;
4831
Andreas Gruenbacher60381782011-03-28 17:05:50 +02004832 if (pi.cmd != P_CONNECTION_FEATURES) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004833 drbd_err(connection, "expected ConnectionFeatures packet, received: %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004834 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004835 return -1;
4836 }
4837
Philipp Reisner77351055b2011-02-07 17:24:26 +01004838 if (pi.size != expect) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004839 drbd_err(connection, "expected ConnectionFeatures length: %u, received: %u\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004840 expect, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004841 return -1;
4842 }
4843
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004844 p = pi.data;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004845 err = drbd_recv_all_warn(connection, p, expect);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004846 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004847 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004848
Philipp Reisnerb411b362009-09-25 16:07:19 -07004849 p->protocol_min = be32_to_cpu(p->protocol_min);
4850 p->protocol_max = be32_to_cpu(p->protocol_max);
4851 if (p->protocol_max == 0)
4852 p->protocol_max = p->protocol_min;
4853
4854 if (PRO_VERSION_MAX < p->protocol_min ||
4855 PRO_VERSION_MIN > p->protocol_max)
4856 goto incompat;
4857
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004858 connection->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
Lars Ellenberg20c68fd2014-04-28 18:43:25 +02004859 connection->agreed_features = PRO_FEATURES & be32_to_cpu(p->feature_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004860
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004861 drbd_info(connection, "Handshake successful: "
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004862 "Agreed network protocol version %d\n", connection->agreed_pro_version);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004863
Lars Ellenberg20c68fd2014-04-28 18:43:25 +02004864 drbd_info(connection, "Agreed to%ssupport TRIM on protocol level\n",
4865 connection->agreed_features & FF_TRIM ? " " : " not ");
4866
Philipp Reisnerb411b362009-09-25 16:07:19 -07004867 return 1;
4868
4869 incompat:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004870 drbd_err(connection, "incompatible DRBD dialects: "
Philipp Reisnerb411b362009-09-25 16:07:19 -07004871 "I support %d-%d, peer supports %d-%d\n",
4872 PRO_VERSION_MIN, PRO_VERSION_MAX,
4873 p->protocol_min, p->protocol_max);
4874 return -1;
4875}
4876
4877#if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004878static int drbd_do_auth(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004879{
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004880 drbd_err(connection, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
4881 drbd_err(connection, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004882 return -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004883}
4884#else
4885#define CHALLENGE_LEN 64
Johannes Thomab10d96c2010-01-07 16:02:50 +01004886
4887/* Return value:
4888 1 - auth succeeded,
4889 0 - failed, try again (network error),
4890 -1 - auth failed, don't try again.
4891*/
4892
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004893static int drbd_do_auth(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004894{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004895 struct drbd_socket *sock;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004896 char my_challenge[CHALLENGE_LEN]; /* 64 Bytes... */
4897 struct scatterlist sg;
4898 char *response = NULL;
4899 char *right_response = NULL;
4900 char *peers_ch = NULL;
Philipp Reisner44ed1672011-04-19 17:10:19 +02004901 unsigned int key_len;
4902 char secret[SHARED_SECRET_MAX]; /* 64 byte */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004903 unsigned int resp_size;
4904 struct hash_desc desc;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004905 struct packet_info pi;
Philipp Reisner44ed1672011-04-19 17:10:19 +02004906 struct net_conf *nc;
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004907 int err, rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004908
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004909 /* FIXME: Put the challenge/response into the preallocated socket buffer. */
4910
Philipp Reisner44ed1672011-04-19 17:10:19 +02004911 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004912 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +02004913 key_len = strlen(nc->shared_secret);
4914 memcpy(secret, nc->shared_secret, key_len);
4915 rcu_read_unlock();
4916
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004917 desc.tfm = connection->cram_hmac_tfm;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004918 desc.flags = 0;
4919
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004920 rv = crypto_hash_setkey(connection->cram_hmac_tfm, (u8 *)secret, key_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004921 if (rv) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004922 drbd_err(connection, "crypto_hash_setkey() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004923 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004924 goto fail;
4925 }
4926
4927 get_random_bytes(my_challenge, CHALLENGE_LEN);
4928
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004929 sock = &connection->data;
4930 if (!conn_prepare_command(connection, sock)) {
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004931 rv = 0;
4932 goto fail;
4933 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004934 rv = !conn_send_command(connection, sock, P_AUTH_CHALLENGE, 0,
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004935 my_challenge, CHALLENGE_LEN);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004936 if (!rv)
4937 goto fail;
4938
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004939 err = drbd_recv_header(connection, &pi);
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004940 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004941 rv = 0;
4942 goto fail;
4943 }
4944
Philipp Reisner77351055b2011-02-07 17:24:26 +01004945 if (pi.cmd != P_AUTH_CHALLENGE) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004946 drbd_err(connection, "expected AuthChallenge packet, received: %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004947 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004948 rv = 0;
4949 goto fail;
4950 }
4951
Philipp Reisner77351055b2011-02-07 17:24:26 +01004952 if (pi.size > CHALLENGE_LEN * 2) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004953 drbd_err(connection, "expected AuthChallenge payload too big.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004954 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004955 goto fail;
4956 }
4957
Philipp Reisner67cca282014-04-28 18:43:30 +02004958 if (pi.size < CHALLENGE_LEN) {
4959 drbd_err(connection, "AuthChallenge payload too small.\n");
4960 rv = -1;
4961 goto fail;
4962 }
4963
Philipp Reisner77351055b2011-02-07 17:24:26 +01004964 peers_ch = kmalloc(pi.size, GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004965 if (peers_ch == NULL) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004966 drbd_err(connection, "kmalloc of peers_ch failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004967 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004968 goto fail;
4969 }
4970
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004971 err = drbd_recv_all_warn(connection, peers_ch, pi.size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004972 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004973 rv = 0;
4974 goto fail;
4975 }
4976
Philipp Reisner67cca282014-04-28 18:43:30 +02004977 if (!memcmp(my_challenge, peers_ch, CHALLENGE_LEN)) {
4978 drbd_err(connection, "Peer presented the same challenge!\n");
4979 rv = -1;
4980 goto fail;
4981 }
4982
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004983 resp_size = crypto_hash_digestsize(connection->cram_hmac_tfm);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004984 response = kmalloc(resp_size, GFP_NOIO);
4985 if (response == NULL) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004986 drbd_err(connection, "kmalloc of response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004987 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004988 goto fail;
4989 }
4990
4991 sg_init_table(&sg, 1);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004992 sg_set_buf(&sg, peers_ch, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004993
4994 rv = crypto_hash_digest(&desc, &sg, sg.length, response);
4995 if (rv) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004996 drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004997 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004998 goto fail;
4999 }
5000
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005001 if (!conn_prepare_command(connection, sock)) {
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02005002 rv = 0;
5003 goto fail;
5004 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005005 rv = !conn_send_command(connection, sock, P_AUTH_RESPONSE, 0,
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02005006 response, resp_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005007 if (!rv)
5008 goto fail;
5009
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005010 err = drbd_recv_header(connection, &pi);
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01005011 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07005012 rv = 0;
5013 goto fail;
5014 }
5015
Philipp Reisner77351055b2011-02-07 17:24:26 +01005016 if (pi.cmd != P_AUTH_RESPONSE) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005017 drbd_err(connection, "expected AuthResponse packet, received: %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02005018 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005019 rv = 0;
5020 goto fail;
5021 }
5022
Philipp Reisner77351055b2011-02-07 17:24:26 +01005023 if (pi.size != resp_size) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005024 drbd_err(connection, "expected AuthResponse payload of wrong size\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005025 rv = 0;
5026 goto fail;
5027 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005028
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005029 err = drbd_recv_all_warn(connection, response , resp_size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01005030 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07005031 rv = 0;
5032 goto fail;
5033 }
5034
5035 right_response = kmalloc(resp_size, GFP_NOIO);
Julia Lawall2d1ee872009-12-27 22:27:11 +01005036 if (right_response == NULL) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005037 drbd_err(connection, "kmalloc of right_response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01005038 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005039 goto fail;
5040 }
5041
5042 sg_set_buf(&sg, my_challenge, CHALLENGE_LEN);
5043
5044 rv = crypto_hash_digest(&desc, &sg, sg.length, right_response);
5045 if (rv) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005046 drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01005047 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005048 goto fail;
5049 }
5050
5051 rv = !memcmp(response, right_response, resp_size);
5052
5053 if (rv)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005054 drbd_info(connection, "Peer authenticated using %d bytes HMAC\n",
Philipp Reisner44ed1672011-04-19 17:10:19 +02005055 resp_size);
Johannes Thomab10d96c2010-01-07 16:02:50 +01005056 else
5057 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005058
5059 fail:
5060 kfree(peers_ch);
5061 kfree(response);
5062 kfree(right_response);
5063
5064 return rv;
5065}
5066#endif
5067
Andreas Gruenbacher8fe60552011-07-22 11:04:36 +02005068int drbd_receiver(struct drbd_thread *thi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005069{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005070 struct drbd_connection *connection = thi->connection;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005071 int h;
5072
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005073 drbd_info(connection, "receiver (re)started\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005074
5075 do {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005076 h = conn_connect(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005077 if (h == 0) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005078 conn_disconnect(connection);
Philipp Reisner20ee6392011-01-18 15:28:59 +01005079 schedule_timeout_interruptible(HZ);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005080 }
5081 if (h == -1) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005082 drbd_warn(connection, "Discarding network configuration.\n");
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005083 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005084 }
5085 } while (h == 0);
5086
Philipp Reisner91fd4da2011-04-20 17:47:29 +02005087 if (h > 0)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005088 drbdd(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005089
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005090 conn_disconnect(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005091
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005092 drbd_info(connection, "receiver terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005093 return 0;
5094}
5095
5096/* ********* acknowledge sender ******** */
5097
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005098static int got_conn_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005099{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005100 struct p_req_state_reply *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005101 int retcode = be32_to_cpu(p->retcode);
5102
5103 if (retcode >= SS_SUCCESS) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005104 set_bit(CONN_WD_ST_CHG_OKAY, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005105 } else {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005106 set_bit(CONN_WD_ST_CHG_FAIL, &connection->flags);
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005107 drbd_err(connection, "Requested state change failed by peer: %s (%d)\n",
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01005108 drbd_set_st_err_str(retcode), retcode);
5109 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005110 wake_up(&connection->ping_wait);
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01005111
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005112 return 0;
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01005113}
5114
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005115static int got_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005116{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005117 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005118 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005119 struct p_req_state_reply *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005120 int retcode = be32_to_cpu(p->retcode);
5121
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005122 peer_device = conn_peer_device(connection, pi->vnr);
5123 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005124 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005125 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005126
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005127 if (test_bit(CONN_WD_ST_CHG_REQ, &connection->flags)) {
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02005128 D_ASSERT(device, connection->agreed_pro_version < 100);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005129 return got_conn_RqSReply(connection, pi);
Philipp Reisner4d0fc3f2012-01-20 13:52:27 +01005130 }
5131
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01005132 if (retcode >= SS_SUCCESS) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005133 set_bit(CL_ST_CHG_SUCCESS, &device->flags);
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01005134 } else {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005135 set_bit(CL_ST_CHG_FAIL, &device->flags);
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02005136 drbd_err(device, "Requested state change failed by peer: %s (%d)\n",
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01005137 drbd_set_st_err_str(retcode), retcode);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005138 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005139 wake_up(&device->state_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005140
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005141 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005142}
5143
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005144static int got_Ping(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005145{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005146 return drbd_send_ping_ack(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005147
5148}
5149
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005150static int got_PingAck(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005151{
5152 /* restore idle timeout */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005153 connection->meta.socket->sk->sk_rcvtimeo = connection->net_conf->ping_int*HZ;
5154 if (!test_and_set_bit(GOT_PING_ACK, &connection->flags))
5155 wake_up(&connection->ping_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005156
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005157 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005158}
5159
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005160static int got_IsInSync(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005161{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005162 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005163 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005164 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005165 sector_t sector = be64_to_cpu(p->sector);
5166 int blksize = be32_to_cpu(p->blksize);
5167
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005168 peer_device = conn_peer_device(connection, pi->vnr);
5169 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005170 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005171 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005172
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005173 D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005174
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005175 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005176
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005177 if (get_ldev(device)) {
5178 drbd_rs_complete_io(device, sector);
5179 drbd_set_in_sync(device, sector, blksize);
Lars Ellenberg1d53f092010-09-05 01:13:24 +02005180 /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005181 device->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
5182 put_ldev(device);
Lars Ellenberg1d53f092010-09-05 01:13:24 +02005183 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005184 dec_rs_pending(device);
5185 atomic_add(blksize >> 9, &device->rs_sect_in);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005186
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005187 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005188}
5189
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01005190static int
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005191validate_req_change_req_state(struct drbd_device *device, u64 id, sector_t sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01005192 struct rb_root *root, const char *func,
5193 enum drbd_req_event what, bool missing_ok)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005194{
5195 struct drbd_request *req;
5196 struct bio_and_error m;
5197
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005198 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005199 req = find_request(device, root, id, sector, missing_ok, func);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005200 if (unlikely(!req)) {
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005201 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005202 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005203 }
5204 __req_mod(req, what, &m);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005205 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005206
5207 if (m.bio)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005208 complete_master_bio(device, &m);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005209 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005210}
5211
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005212static int got_BlockAck(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005213{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005214 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005215 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005216 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005217 sector_t sector = be64_to_cpu(p->sector);
5218 int blksize = be32_to_cpu(p->blksize);
5219 enum drbd_req_event what;
5220
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005221 peer_device = conn_peer_device(connection, pi->vnr);
5222 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005223 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005224 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005225
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005226 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005227
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01005228 if (p->block_id == ID_SYNCER) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005229 drbd_set_in_sync(device, sector, blksize);
5230 dec_rs_pending(device);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005231 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005232 }
Andreas Gruenbachere05e1e52011-03-25 15:16:26 +01005233 switch (pi->cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07005234 case P_RS_WRITE_ACK:
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01005235 what = WRITE_ACKED_BY_PEER_AND_SIS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005236 break;
5237 case P_WRITE_ACK:
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01005238 what = WRITE_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005239 break;
5240 case P_RECV_ACK:
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01005241 what = RECV_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005242 break;
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02005243 case P_SUPERSEDED:
5244 what = CONFLICT_RESOLVED;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01005245 break;
5246 case P_RETRY_WRITE:
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01005247 what = POSTPONE_WRITE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005248 break;
5249 default:
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005250 BUG();
Philipp Reisnerb411b362009-09-25 16:07:19 -07005251 }
5252
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005253 return validate_req_change_req_state(device, p->block_id, sector,
5254 &device->write_requests, __func__,
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005255 what, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005256}
5257
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005258static int got_NegAck(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005259{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005260 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005261 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005262 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005263 sector_t sector = be64_to_cpu(p->sector);
Philipp Reisner2deb8332011-01-17 18:39:18 +01005264 int size = be32_to_cpu(p->blksize);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005265 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005266
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005267 peer_device = conn_peer_device(connection, pi->vnr);
5268 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005269 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005270 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005271
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005272 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005273
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01005274 if (p->block_id == ID_SYNCER) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005275 dec_rs_pending(device);
5276 drbd_rs_failed_io(device, sector, size);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005277 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005278 }
Philipp Reisner2deb8332011-01-17 18:39:18 +01005279
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005280 err = validate_req_change_req_state(device, p->block_id, sector,
5281 &device->write_requests, __func__,
Philipp Reisner303d1442011-04-13 16:24:47 -07005282 NEG_ACKED, true);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005283 if (err) {
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01005284 /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs.
5285 The master bio might already be completed, therefore the
5286 request is no longer in the collision hash. */
5287 /* In Protocol B we might already have got a P_RECV_ACK
5288 but then get a P_NEG_ACK afterwards. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005289 drbd_set_out_of_sync(device, sector, size);
Philipp Reisner2deb8332011-01-17 18:39:18 +01005290 }
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005291 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005292}
5293
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005294static int got_NegDReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005295{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005296 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005297 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005298 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005299 sector_t sector = be64_to_cpu(p->sector);
5300
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005301 peer_device = conn_peer_device(connection, pi->vnr);
5302 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005303 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005304 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005305
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005306 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01005307
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02005308 drbd_err(device, "Got NegDReply; Sector %llus, len %u.\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07005309 (unsigned long long)sector, be32_to_cpu(p->blksize));
5310
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005311 return validate_req_change_req_state(device, p->block_id, sector,
5312 &device->read_requests, __func__,
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005313 NEG_ACKED, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005314}
5315
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005316static int got_NegRSDReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005317{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005318 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005319 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005320 sector_t sector;
5321 int size;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005322 struct p_block_ack *p = pi->data;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005323
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005324 peer_device = conn_peer_device(connection, pi->vnr);
5325 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005326 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005327 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005328
5329 sector = be64_to_cpu(p->sector);
5330 size = be32_to_cpu(p->blksize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005331
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005332 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005333
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005334 dec_rs_pending(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005335
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005336 if (get_ldev_if_state(device, D_FAILED)) {
5337 drbd_rs_complete_io(device, sector);
Andreas Gruenbachere05e1e52011-03-25 15:16:26 +01005338 switch (pi->cmd) {
Philipp Reisnerd612d302010-12-27 10:53:28 +01005339 case P_NEG_RS_DREPLY:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005340 drbd_rs_failed_io(device, sector, size);
Philipp Reisnerd612d302010-12-27 10:53:28 +01005341 case P_RS_CANCEL:
5342 break;
5343 default:
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005344 BUG();
Philipp Reisnerd612d302010-12-27 10:53:28 +01005345 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005346 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005347 }
5348
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005349 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005350}
5351
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005352static int got_BarrierAck(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005353{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005354 struct p_barrier_ack *p = pi->data;
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005355 struct drbd_peer_device *peer_device;
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005356 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005357
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005358 tl_release(connection, p->barrier, be32_to_cpu(p->set_size));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005359
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005360 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005361 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5362 struct drbd_device *device = peer_device->device;
5363
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005364 if (device->state.conn == C_AHEAD &&
5365 atomic_read(&device->ap_in_flight) == 0 &&
5366 !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &device->flags)) {
5367 device->start_resync_timer.expires = jiffies + HZ;
5368 add_timer(&device->start_resync_timer);
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005369 }
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02005370 }
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005371 rcu_read_unlock();
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02005372
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005373 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005374}
5375
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005376static int got_OVResult(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005377{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005378 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005379 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005380 struct p_block_ack *p = pi->data;
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02005381 struct drbd_device_work *dw;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005382 sector_t sector;
5383 int size;
5384
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005385 peer_device = conn_peer_device(connection, pi->vnr);
5386 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005387 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005388 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005389
Philipp Reisnerb411b362009-09-25 16:07:19 -07005390 sector = be64_to_cpu(p->sector);
5391 size = be32_to_cpu(p->blksize);
5392
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005393 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005394
5395 if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005396 drbd_ov_out_of_sync_found(device, sector, size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005397 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005398 ov_out_of_sync_print(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005399
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005400 if (!get_ldev(device))
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005401 return 0;
Lars Ellenberg1d53f092010-09-05 01:13:24 +02005402
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005403 drbd_rs_complete_io(device, sector);
5404 dec_rs_pending(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005405
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005406 --device->ov_left;
Lars Ellenbergea5442a2010-11-05 09:48:01 +01005407
5408 /* let's advance progress step marks only for every other megabyte */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005409 if ((device->ov_left & 0x200) == 0x200)
5410 drbd_advance_rs_marks(device, device->ov_left);
Lars Ellenbergea5442a2010-11-05 09:48:01 +01005411
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005412 if (device->ov_left == 0) {
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02005413 dw = kmalloc(sizeof(*dw), GFP_NOIO);
5414 if (dw) {
5415 dw->w.cb = w_ov_finished;
5416 dw->device = device;
5417 drbd_queue_work(&peer_device->connection->sender_work, &dw->w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005418 } else {
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02005419 drbd_err(device, "kmalloc(dw) failed.");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005420 ov_out_of_sync_print(device);
5421 drbd_resync_finished(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005422 }
5423 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005424 put_ldev(device);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005425 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005426}
5427
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005428static int got_skip(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisner0ced55a2010-04-30 15:26:20 +02005429{
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005430 return 0;
Philipp Reisner0ced55a2010-04-30 15:26:20 +02005431}
5432
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005433static int connection_finish_peer_reqs(struct drbd_connection *connection)
Philipp Reisner32862ec2011-02-08 16:41:01 +01005434{
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005435 struct drbd_peer_device *peer_device;
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005436 int vnr, not_empty = 0;
Philipp Reisner32862ec2011-02-08 16:41:01 +01005437
5438 do {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005439 clear_bit(SIGNAL_ASENDER, &connection->flags);
Philipp Reisner32862ec2011-02-08 16:41:01 +01005440 flush_signals(current);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005441
5442 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005443 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5444 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005445 kref_get(&device->kref);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005446 rcu_read_unlock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005447 if (drbd_finish_peer_reqs(device)) {
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02005448 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005449 return 1;
Philipp Reisnerd3fcb492011-04-13 14:46:05 -07005450 }
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02005451 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005452 rcu_read_lock();
Philipp Reisner082a3432011-03-15 16:05:42 +01005453 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005454 set_bit(SIGNAL_ASENDER, &connection->flags);
Philipp Reisner082a3432011-03-15 16:05:42 +01005455
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005456 spin_lock_irq(&connection->resource->req_lock);
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005457 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5458 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005459 not_empty = !list_empty(&device->done_ee);
Philipp Reisner082a3432011-03-15 16:05:42 +01005460 if (not_empty)
5461 break;
5462 }
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005463 spin_unlock_irq(&connection->resource->req_lock);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005464 rcu_read_unlock();
Philipp Reisner32862ec2011-02-08 16:41:01 +01005465 } while (not_empty);
5466
5467 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005468}
5469
5470struct asender_cmd {
5471 size_t pkt_size;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005472 int (*fn)(struct drbd_connection *connection, struct packet_info *);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005473};
5474
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01005475static struct asender_cmd asender_tbl[] = {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005476 [P_PING] = { 0, got_Ping },
5477 [P_PING_ACK] = { 0, got_PingAck },
Philipp Reisnerb411b362009-09-25 16:07:19 -07005478 [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
5479 [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
5480 [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02005481 [P_SUPERSEDED] = { sizeof(struct p_block_ack), got_BlockAck },
Philipp Reisnerb411b362009-09-25 16:07:19 -07005482 [P_NEG_ACK] = { sizeof(struct p_block_ack), got_NegAck },
5483 [P_NEG_DREPLY] = { sizeof(struct p_block_ack), got_NegDReply },
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005484 [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), got_NegRSDReply },
Philipp Reisnerb411b362009-09-25 16:07:19 -07005485 [P_OV_RESULT] = { sizeof(struct p_block_ack), got_OVResult },
5486 [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck },
5487 [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
5488 [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync },
Philipp Reisner02918be2010-08-20 14:35:10 +02005489 [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip },
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005490 [P_RS_CANCEL] = { sizeof(struct p_block_ack), got_NegRSDReply },
5491 [P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), got_conn_RqSReply },
5492 [P_RETRY_WRITE] = { sizeof(struct p_block_ack), got_BlockAck },
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01005493};
Philipp Reisnerb411b362009-09-25 16:07:19 -07005494
5495int drbd_asender(struct drbd_thread *thi)
5496{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005497 struct drbd_connection *connection = thi->connection;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005498 struct asender_cmd *cmd = NULL;
Philipp Reisner77351055b2011-02-07 17:24:26 +01005499 struct packet_info pi;
Philipp Reisner257d0af2011-01-26 12:15:29 +01005500 int rv;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005501 void *buf = connection->meta.rbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005502 int received = 0;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005503 unsigned int header_size = drbd_header_size(connection);
Andreas Gruenbacher52b061a2011-03-30 11:38:49 +02005504 int expect = header_size;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005505 bool ping_timeout_active = false;
5506 struct net_conf *nc;
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005507 int ping_timeo, tcp_cork, ping_int;
Philipp Reisner3990e042013-03-27 14:08:48 +01005508 struct sched_param param = { .sched_priority = 2 };
Philipp Reisnerb411b362009-09-25 16:07:19 -07005509
Philipp Reisner3990e042013-03-27 14:08:48 +01005510 rv = sched_setscheduler(current, SCHED_RR, &param);
5511 if (rv < 0)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005512 drbd_err(connection, "drbd_asender: ERROR set priority, ret=%d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005513
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +01005514 while (get_t_state(thi) == RUNNING) {
Philipp Reisner80822282011-02-08 12:46:30 +01005515 drbd_thread_current_set_cpu(thi);
Philipp Reisner44ed1672011-04-19 17:10:19 +02005516
5517 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005518 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +02005519 ping_timeo = nc->ping_timeo;
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005520 tcp_cork = nc->tcp_cork;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005521 ping_int = nc->ping_int;
5522 rcu_read_unlock();
5523
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005524 if (test_and_clear_bit(SEND_PING, &connection->flags)) {
5525 if (drbd_send_ping(connection)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005526 drbd_err(connection, "drbd_send_ping has failed\n");
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01005527 goto reconnect;
5528 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005529 connection->meta.socket->sk->sk_rcvtimeo = ping_timeo * HZ / 10;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005530 ping_timeout_active = true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005531 }
5532
Philipp Reisner32862ec2011-02-08 16:41:01 +01005533 /* TODO: conditionally cork; it may hurt latency if we cork without
5534 much to send */
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005535 if (tcp_cork)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005536 drbd_tcp_cork(connection->meta.socket);
5537 if (connection_finish_peer_reqs(connection)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005538 drbd_err(connection, "connection_finish_peer_reqs() failed\n");
Philipp Reisner32862ec2011-02-08 16:41:01 +01005539 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005540 }
5541 /* but unconditionally uncork unless disabled */
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005542 if (tcp_cork)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005543 drbd_tcp_uncork(connection->meta.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005544
5545 /* short circuit, recv_msg would return EINTR anyways. */
5546 if (signal_pending(current))
5547 continue;
5548
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005549 rv = drbd_recv_short(connection->meta.socket, buf, expect-received, 0);
5550 clear_bit(SIGNAL_ASENDER, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005551
5552 flush_signals(current);
5553
5554 /* Note:
5555 * -EINTR (on meta) we got a signal
5556 * -EAGAIN (on meta) rcvtimeo expired
5557 * -ECONNRESET other side closed the connection
5558 * -ERESTARTSYS (on data) we got a signal
5559 * rv < 0 other than above: unexpected error!
5560 * rv == expected: full header or command
5561 * rv < expected: "woken" by signal during receive
5562 * rv == 0 : "connection shut down by peer"
5563 */
Lars Ellenbergabde9cc2014-09-11 14:29:11 +02005564received_more:
Philipp Reisnerb411b362009-09-25 16:07:19 -07005565 if (likely(rv > 0)) {
5566 received += rv;
5567 buf += rv;
5568 } else if (rv == 0) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005569 if (test_bit(DISCONNECT_SENT, &connection->flags)) {
Philipp Reisnerb66623e2012-08-08 21:19:09 +02005570 long t;
5571 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005572 t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
Philipp Reisnerb66623e2012-08-08 21:19:09 +02005573 rcu_read_unlock();
5574
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005575 t = wait_event_timeout(connection->ping_wait,
5576 connection->cstate < C_WF_REPORT_PARAMS,
Philipp Reisnerb66623e2012-08-08 21:19:09 +02005577 t);
Philipp Reisner599377a2012-08-17 14:50:22 +02005578 if (t)
5579 break;
5580 }
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005581 drbd_err(connection, "meta connection shut down by peer.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005582 goto reconnect;
5583 } else if (rv == -EAGAIN) {
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02005584 /* If the data socket received something meanwhile,
5585 * that is good enough: peer is still alive. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005586 if (time_after(connection->last_received,
5587 jiffies - connection->meta.socket->sk->sk_rcvtimeo))
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02005588 continue;
Lars Ellenbergf36af182011-03-09 22:44:55 +01005589 if (ping_timeout_active) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005590 drbd_err(connection, "PingAck did not arrive in time.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005591 goto reconnect;
5592 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005593 set_bit(SEND_PING, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005594 continue;
5595 } else if (rv == -EINTR) {
5596 continue;
5597 } else {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005598 drbd_err(connection, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005599 goto reconnect;
5600 }
5601
5602 if (received == expect && cmd == NULL) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005603 if (decode_header(connection, connection->meta.rbuf, &pi))
Philipp Reisnerb411b362009-09-25 16:07:19 -07005604 goto reconnect;
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01005605 cmd = &asender_tbl[pi.cmd];
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005606 if (pi.cmd >= ARRAY_SIZE(asender_tbl) || !cmd->fn) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005607 drbd_err(connection, "Unexpected meta packet %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02005608 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005609 goto disconnect;
5610 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005611 expect = header_size + cmd->pkt_size;
Andreas Gruenbacher52b061a2011-03-30 11:38:49 +02005612 if (pi.size != expect - header_size) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005613 drbd_err(connection, "Wrong packet size on meta (c: %d, l: %d)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01005614 pi.cmd, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005615 goto reconnect;
Philipp Reisner257d0af2011-01-26 12:15:29 +01005616 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005617 }
5618 if (received == expect) {
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005619 bool err;
Philipp Reisnera4fbda82011-03-16 11:13:17 +01005620
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005621 err = cmd->fn(connection, &pi);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005622 if (err) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005623 drbd_err(connection, "%pf failed\n", cmd->fn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005624 goto reconnect;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005625 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005626
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005627 connection->last_received = jiffies;
Lars Ellenbergf36af182011-03-09 22:44:55 +01005628
Philipp Reisner44ed1672011-04-19 17:10:19 +02005629 if (cmd == &asender_tbl[P_PING_ACK]) {
5630 /* restore idle timeout */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005631 connection->meta.socket->sk->sk_rcvtimeo = ping_int * HZ;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005632 ping_timeout_active = false;
5633 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005634
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005635 buf = connection->meta.rbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005636 received = 0;
Andreas Gruenbacher52b061a2011-03-30 11:38:49 +02005637 expect = header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005638 cmd = NULL;
5639 }
Lars Ellenbergabde9cc2014-09-11 14:29:11 +02005640 if (test_bit(SEND_PING, &connection->flags))
5641 continue;
5642 rv = drbd_recv_short(connection->meta.socket, buf, expect-received, MSG_DONTWAIT);
5643 if (rv > 0)
5644 goto received_more;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005645 }
5646
5647 if (0) {
5648reconnect:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005649 conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
5650 conn_md_sync(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005651 }
5652 if (0) {
5653disconnect:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005654 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005655 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005656 clear_bit(SIGNAL_ASENDER, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005657
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005658 drbd_info(connection, "asender terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005659
5660 return 0;
5661}