blob: d326af67c27e17685788b19cb734c48ca4412834 [file] [log] [blame]
Philipp Reisnerb411b362009-09-25 16:07:19 -07001/*
2 drbd_receiver.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23 */
24
25
Philipp Reisnerb411b362009-09-25 16:07:19 -070026#include <linux/module.h>
27
28#include <asm/uaccess.h>
29#include <net/sock.h>
30
Philipp Reisnerb411b362009-09-25 16:07:19 -070031#include <linux/drbd.h>
32#include <linux/fs.h>
33#include <linux/file.h>
34#include <linux/in.h>
35#include <linux/mm.h>
36#include <linux/memcontrol.h>
37#include <linux/mm_inline.h>
38#include <linux/slab.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070039#include <linux/pkt_sched.h>
40#define __KERNEL_SYSCALLS__
41#include <linux/unistd.h>
42#include <linux/vmalloc.h>
43#include <linux/random.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070044#include <linux/string.h>
45#include <linux/scatterlist.h>
46#include "drbd_int.h"
Andreas Gruenbachera3603a62011-05-30 11:47:37 +020047#include "drbd_protocol.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070048#include "drbd_req.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070049#include "drbd_vli.h"
50
Lars Ellenberg20c68fd2014-04-28 18:43:25 +020051#define PRO_FEATURES (FF_TRIM)
52
Philipp Reisner77351055b2011-02-07 17:24:26 +010053struct packet_info {
54 enum drbd_packet cmd;
Andreas Gruenbachere2857212011-03-25 00:57:38 +010055 unsigned int size;
56 unsigned int vnr;
Andreas Gruenbachere6589832011-03-30 12:54:42 +020057 void *data;
Philipp Reisner77351055b2011-02-07 17:24:26 +010058};
59
Philipp Reisnerb411b362009-09-25 16:07:19 -070060enum finish_epoch {
61 FE_STILL_LIVE,
62 FE_DESTROYED,
63 FE_RECYCLED,
64};
65
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +020066static int drbd_do_features(struct drbd_connection *connection);
67static int drbd_do_auth(struct drbd_connection *connection);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +020068static int drbd_disconnected(struct drbd_peer_device *);
Lars Ellenberga0fb3c42014-04-28 18:43:23 +020069static void conn_wait_active_ee_empty(struct drbd_connection *connection);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +020070static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *, struct drbd_epoch *, enum epoch_event);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +010071static int e_end_block(struct drbd_work *, int);
Philipp Reisnerb411b362009-09-25 16:07:19 -070072
Philipp Reisnerb411b362009-09-25 16:07:19 -070073
74#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
75
Lars Ellenberg45bb9122010-05-14 17:10:48 +020076/*
77 * some helper functions to deal with single linked page lists,
78 * page->private being our "next" pointer.
79 */
80
81/* If at least n pages are linked at head, get n pages off.
82 * Otherwise, don't modify head, and return NULL.
83 * Locking is the responsibility of the caller.
84 */
85static struct page *page_chain_del(struct page **head, int n)
86{
87 struct page *page;
88 struct page *tmp;
89
90 BUG_ON(!n);
91 BUG_ON(!head);
92
93 page = *head;
Philipp Reisner23ce4222010-05-20 13:35:31 +020094
95 if (!page)
96 return NULL;
97
Lars Ellenberg45bb9122010-05-14 17:10:48 +020098 while (page) {
99 tmp = page_chain_next(page);
100 if (--n == 0)
101 break; /* found sufficient pages */
102 if (tmp == NULL)
103 /* insufficient pages, don't use any of them. */
104 return NULL;
105 page = tmp;
106 }
107
108 /* add end of list marker for the returned list */
109 set_page_private(page, 0);
110 /* actual return value, and adjustment of head */
111 page = *head;
112 *head = tmp;
113 return page;
114}
115
116/* may be used outside of locks to find the tail of a (usually short)
117 * "private" page chain, before adding it back to a global chain head
118 * with page_chain_add() under a spinlock. */
119static struct page *page_chain_tail(struct page *page, int *len)
120{
121 struct page *tmp;
122 int i = 1;
123 while ((tmp = page_chain_next(page)))
124 ++i, page = tmp;
125 if (len)
126 *len = i;
127 return page;
128}
129
130static int page_chain_free(struct page *page)
131{
132 struct page *tmp;
133 int i = 0;
134 page_chain_for_each_safe(page, tmp) {
135 put_page(page);
136 ++i;
137 }
138 return i;
139}
140
141static void page_chain_add(struct page **head,
142 struct page *chain_first, struct page *chain_last)
143{
144#if 1
145 struct page *tmp;
146 tmp = page_chain_tail(chain_first, NULL);
147 BUG_ON(tmp != chain_last);
148#endif
149
150 /* add chain to head */
151 set_page_private(chain_last, (unsigned long)*head);
152 *head = chain_first;
153}
154
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200155static struct page *__drbd_alloc_pages(struct drbd_device *device,
Andreas Gruenbacher18c2d522011-04-07 21:08:50 +0200156 unsigned int number)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700157{
158 struct page *page = NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200159 struct page *tmp = NULL;
Andreas Gruenbacher18c2d522011-04-07 21:08:50 +0200160 unsigned int i = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700161
162 /* Yes, testing drbd_pp_vacant outside the lock is racy.
163 * So what. It saves a spin_lock. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200164 if (drbd_pp_vacant >= number) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700165 spin_lock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200166 page = page_chain_del(&drbd_pp_pool, number);
167 if (page)
168 drbd_pp_vacant -= number;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700169 spin_unlock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200170 if (page)
171 return page;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700172 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200173
Philipp Reisnerb411b362009-09-25 16:07:19 -0700174 /* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD
175 * "criss-cross" setup, that might cause write-out on some other DRBD,
176 * which in turn might block on the other node at this very place. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200177 for (i = 0; i < number; i++) {
178 tmp = alloc_page(GFP_TRY);
179 if (!tmp)
180 break;
181 set_page_private(tmp, (unsigned long)page);
182 page = tmp;
183 }
184
185 if (i == number)
186 return page;
187
188 /* Not enough pages immediately available this time.
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200189 * No need to jump around here, drbd_alloc_pages will retry this
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200190 * function "soon". */
191 if (page) {
192 tmp = page_chain_tail(page, NULL);
193 spin_lock(&drbd_pp_lock);
194 page_chain_add(&drbd_pp_pool, page, tmp);
195 drbd_pp_vacant += i;
196 spin_unlock(&drbd_pp_lock);
197 }
198 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700199}
200
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200201static void reclaim_finished_net_peer_reqs(struct drbd_device *device,
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200202 struct list_head *to_be_freed)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700203{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200204 struct drbd_peer_request *peer_req, *tmp;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700205
206 /* The EEs are always appended to the end of the list. Since
207 they are sent in order over the wire, they have to finish
208 in order. As soon as we see the first not finished we can
209 stop to examine the list... */
210
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200211 list_for_each_entry_safe(peer_req, tmp, &device->net_ee, w.list) {
Andreas Gruenbacher045417f2011-04-07 21:34:24 +0200212 if (drbd_peer_req_has_active_page(peer_req))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700213 break;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200214 list_move(&peer_req->w.list, to_be_freed);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700215 }
216}
217
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200218static void drbd_kick_lo_and_reclaim_net(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700219{
220 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100221 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700222
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200223 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200224 reclaim_finished_net_peer_reqs(device, &reclaimed);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200225 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700226
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200227 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200228 drbd_free_net_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700229}
230
231/**
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200232 * drbd_alloc_pages() - Returns @number pages, retries forever (or until signalled)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200233 * @device: DRBD device.
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200234 * @number: number of pages requested
235 * @retry: whether to retry, if not enough pages are available right now
Philipp Reisnerb411b362009-09-25 16:07:19 -0700236 *
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200237 * Tries to allocate number pages, first from our own page pool, then from
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200238 * the kernel.
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200239 * Possibly retry until DRBD frees sufficient pages somewhere else.
240 *
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200241 * If this allocation would exceed the max_buffers setting, we throttle
242 * allocation (schedule_timeout) to give the system some room to breathe.
243 *
244 * We do not use max-buffers as hard limit, because it could lead to
245 * congestion and further to a distributed deadlock during online-verify or
246 * (checksum based) resync, if the max-buffers, socket buffer sizes and
247 * resync-rate settings are mis-configured.
248 *
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200249 * Returns a page chain linked via page->private.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700250 */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200251struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int number,
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200252 bool retry)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700253{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200254 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700255 struct page *page = NULL;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200256 struct net_conf *nc;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700257 DEFINE_WAIT(wait);
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200258 unsigned int mxb;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700259
Philipp Reisner44ed1672011-04-19 17:10:19 +0200260 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200261 nc = rcu_dereference(peer_device->connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200262 mxb = nc ? nc->max_buffers : 1000000;
263 rcu_read_unlock();
264
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200265 if (atomic_read(&device->pp_in_use) < mxb)
266 page = __drbd_alloc_pages(device, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700267
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200268 while (page == NULL) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700269 prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
270
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200271 drbd_kick_lo_and_reclaim_net(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700272
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200273 if (atomic_read(&device->pp_in_use) < mxb) {
274 page = __drbd_alloc_pages(device, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700275 if (page)
276 break;
277 }
278
279 if (!retry)
280 break;
281
282 if (signal_pending(current)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200283 drbd_warn(device, "drbd_alloc_pages interrupted!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700284 break;
285 }
286
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200287 if (schedule_timeout(HZ/10) == 0)
288 mxb = UINT_MAX;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700289 }
290 finish_wait(&drbd_pp_wait, &wait);
291
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200292 if (page)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200293 atomic_add(number, &device->pp_in_use);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700294 return page;
295}
296
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200297/* Must not be used from irq, as that may deadlock: see drbd_alloc_pages.
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200298 * Is also used from inside an other spin_lock_irq(&resource->req_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200299 * Either links the page chain back to the global pool,
300 * or returns all pages to the system. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200301static void drbd_free_pages(struct drbd_device *device, struct page *page, int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700302{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200303 atomic_t *a = is_net ? &device->pp_in_use_by_net : &device->pp_in_use;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700304 int i;
Lars Ellenberg435f0742010-09-06 12:30:25 +0200305
Lars Ellenberga73ff322012-06-25 19:15:38 +0200306 if (page == NULL)
307 return;
308
Philipp Reisner81a5d602011-02-22 19:53:16 -0500309 if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200310 i = page_chain_free(page);
311 else {
312 struct page *tmp;
313 tmp = page_chain_tail(page, &i);
314 spin_lock(&drbd_pp_lock);
315 page_chain_add(&drbd_pp_pool, page, tmp);
316 drbd_pp_vacant += i;
317 spin_unlock(&drbd_pp_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700318 }
Lars Ellenberg435f0742010-09-06 12:30:25 +0200319 i = atomic_sub_return(i, a);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200320 if (i < 0)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200321 drbd_warn(device, "ASSERTION FAILED: %s: %d < 0\n",
Lars Ellenberg435f0742010-09-06 12:30:25 +0200322 is_net ? "pp_in_use_by_net" : "pp_in_use", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700323 wake_up(&drbd_pp_wait);
324}
325
326/*
327You need to hold the req_lock:
328 _drbd_wait_ee_list_empty()
329
330You must not have the req_lock:
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +0200331 drbd_free_peer_req()
Andreas Gruenbacher0db55362011-04-06 16:09:15 +0200332 drbd_alloc_peer_req()
Andreas Gruenbacher7721f562011-04-06 17:14:02 +0200333 drbd_free_peer_reqs()
Philipp Reisnerb411b362009-09-25 16:07:19 -0700334 drbd_ee_fix_bhs()
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200335 drbd_finish_peer_reqs()
Philipp Reisnerb411b362009-09-25 16:07:19 -0700336 drbd_clear_done_ee()
337 drbd_wait_ee_list_empty()
338*/
339
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100340struct drbd_peer_request *
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200341drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
Lars Ellenberga0fb3c42014-04-28 18:43:23 +0200342 unsigned int data_size, bool has_payload, gfp_t gfp_mask) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700343{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200344 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100345 struct drbd_peer_request *peer_req;
Lars Ellenberga73ff322012-06-25 19:15:38 +0200346 struct page *page = NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200347 unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700348
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200349 if (drbd_insert_fault(device, DRBD_FAULT_AL_EE))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700350 return NULL;
351
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100352 peer_req = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
353 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700354 if (!(gfp_mask & __GFP_NOWARN))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200355 drbd_err(device, "%s: allocation failed\n", __func__);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700356 return NULL;
357 }
358
Lars Ellenberga0fb3c42014-04-28 18:43:23 +0200359 if (has_payload && data_size) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200360 page = drbd_alloc_pages(peer_device, nr_pages, (gfp_mask & __GFP_WAIT));
Lars Ellenberga73ff322012-06-25 19:15:38 +0200361 if (!page)
362 goto fail;
363 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700364
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100365 drbd_clear_interval(&peer_req->i);
366 peer_req->i.size = data_size;
367 peer_req->i.sector = sector;
368 peer_req->i.local = false;
369 peer_req->i.waiting = false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700370
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100371 peer_req->epoch = NULL;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200372 peer_req->peer_device = peer_device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100373 peer_req->pages = page;
374 atomic_set(&peer_req->pending_bios, 0);
375 peer_req->flags = 0;
Andreas Gruenbacher9a8e7752011-01-11 14:04:09 +0100376 /*
377 * The block_id is opaque to the receiver. It is not endianness
378 * converted, and sent back to the sender unchanged.
379 */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100380 peer_req->block_id = id;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700381
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100382 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700383
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200384 fail:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100385 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700386 return NULL;
387}
388
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200389void __drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request *peer_req,
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100390 int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700391{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100392 if (peer_req->flags & EE_HAS_DIGEST)
393 kfree(peer_req->digest);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200394 drbd_free_pages(device, peer_req->pages, is_net);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +0200395 D_ASSERT(device, atomic_read(&peer_req->pending_bios) == 0);
396 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100397 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700398}
399
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200400int drbd_free_peer_reqs(struct drbd_device *device, struct list_head *list)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700401{
402 LIST_HEAD(work_list);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100403 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700404 int count = 0;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200405 int is_net = list == &device->net_ee;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700406
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200407 spin_lock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700408 list_splice_init(list, &work_list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200409 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700410
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200411 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200412 __drbd_free_peer_req(device, peer_req, is_net);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700413 count++;
414 }
415 return count;
416}
417
Philipp Reisnerb411b362009-09-25 16:07:19 -0700418/*
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200419 * See also comments in _req_mod(,BARRIER_ACKED) and receive_Barrier.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700420 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200421static int drbd_finish_peer_reqs(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700422{
423 LIST_HEAD(work_list);
424 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100425 struct drbd_peer_request *peer_req, *t;
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100426 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700427
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200428 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200429 reclaim_finished_net_peer_reqs(device, &reclaimed);
430 list_splice_init(&device->done_ee, &work_list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200431 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700432
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200433 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200434 drbd_free_net_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700435
436 /* possible callbacks here:
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +0200437 * e_end_block, and e_end_resync_block, e_send_superseded.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700438 * all ignore the last argument.
439 */
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200440 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100441 int err2;
442
Philipp Reisnerb411b362009-09-25 16:07:19 -0700443 /* list_del not necessary, next/prev members not touched */
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200444 err2 = peer_req->w.cb(&peer_req->w, !!err);
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100445 if (!err)
446 err = err2;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200447 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700448 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200449 wake_up(&device->ee_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700450
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100451 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700452}
453
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200454static void _drbd_wait_ee_list_empty(struct drbd_device *device,
Andreas Gruenbacherd4da1532011-04-07 00:06:56 +0200455 struct list_head *head)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700456{
457 DEFINE_WAIT(wait);
458
459 /* avoids spin_lock/unlock
460 * and calling prepare_to_wait in the fast path */
461 while (!list_empty(head)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200462 prepare_to_wait(&device->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200463 spin_unlock_irq(&device->resource->req_lock);
Jens Axboe7eaceac2011-03-10 08:52:07 +0100464 io_schedule();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200465 finish_wait(&device->ee_wait, &wait);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200466 spin_lock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700467 }
468}
469
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200470static void drbd_wait_ee_list_empty(struct drbd_device *device,
Andreas Gruenbacherd4da1532011-04-07 00:06:56 +0200471 struct list_head *head)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700472{
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200473 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200474 _drbd_wait_ee_list_empty(device, head);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200475 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700476}
477
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100478static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700479{
Philipp Reisnerb411b362009-09-25 16:07:19 -0700480 struct kvec iov = {
481 .iov_base = buf,
482 .iov_len = size,
483 };
484 struct msghdr msg = {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700485 .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
486 };
Al Virof730c842014-02-08 21:07:38 -0500487 return kernel_recvmsg(sock, &msg, &iov, 1, size, msg.msg_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700488}
489
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200490static int drbd_recv(struct drbd_connection *connection, void *buf, size_t size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700491{
Philipp Reisnerb411b362009-09-25 16:07:19 -0700492 int rv;
493
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200494 rv = drbd_recv_short(connection->data.socket, buf, size, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700495
Philipp Reisnerdbd08202012-08-17 16:55:47 +0200496 if (rv < 0) {
497 if (rv == -ECONNRESET)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200498 drbd_info(connection, "sock was reset by peer\n");
Philipp Reisnerdbd08202012-08-17 16:55:47 +0200499 else if (rv != -ERESTARTSYS)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200500 drbd_err(connection, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerdbd08202012-08-17 16:55:47 +0200501 } else if (rv == 0) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200502 if (test_bit(DISCONNECT_SENT, &connection->flags)) {
Philipp Reisnerb66623e2012-08-08 21:19:09 +0200503 long t;
504 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200505 t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
Philipp Reisnerb66623e2012-08-08 21:19:09 +0200506 rcu_read_unlock();
507
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200508 t = wait_event_timeout(connection->ping_wait, connection->cstate < C_WF_REPORT_PARAMS, t);
Philipp Reisnerb66623e2012-08-08 21:19:09 +0200509
Philipp Reisner599377a2012-08-17 14:50:22 +0200510 if (t)
511 goto out;
512 }
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200513 drbd_info(connection, "sock was shut down by peer\n");
Philipp Reisner599377a2012-08-17 14:50:22 +0200514 }
515
Philipp Reisnerb411b362009-09-25 16:07:19 -0700516 if (rv != size)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200517 conn_request_state(connection, NS(conn, C_BROKEN_PIPE), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700518
Philipp Reisner599377a2012-08-17 14:50:22 +0200519out:
Philipp Reisnerb411b362009-09-25 16:07:19 -0700520 return rv;
521}
522
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200523static int drbd_recv_all(struct drbd_connection *connection, void *buf, size_t size)
Andreas Gruenbacherc6967742011-03-17 17:15:20 +0100524{
525 int err;
526
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200527 err = drbd_recv(connection, buf, size);
Andreas Gruenbacherc6967742011-03-17 17:15:20 +0100528 if (err != size) {
529 if (err >= 0)
530 err = -EIO;
531 } else
532 err = 0;
533 return err;
534}
535
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200536static int drbd_recv_all_warn(struct drbd_connection *connection, void *buf, size_t size)
Andreas Gruenbachera5c31902011-03-24 03:28:04 +0100537{
538 int err;
539
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200540 err = drbd_recv_all(connection, buf, size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +0100541 if (err && !signal_pending(current))
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200542 drbd_warn(connection, "short read (expected size %d)\n", (int)size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +0100543 return err;
544}
545
Lars Ellenberg5dbf1672010-05-25 16:18:01 +0200546/* quoting tcp(7):
547 * On individual connections, the socket buffer size must be set prior to the
548 * listen(2) or connect(2) calls in order to have it take effect.
549 * This is our wrapper to do so.
550 */
551static void drbd_setbufsize(struct socket *sock, unsigned int snd,
552 unsigned int rcv)
553{
554 /* open coded SO_SNDBUF, SO_RCVBUF */
555 if (snd) {
556 sock->sk->sk_sndbuf = snd;
557 sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
558 }
559 if (rcv) {
560 sock->sk->sk_rcvbuf = rcv;
561 sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
562 }
563}
564
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200565static struct socket *drbd_try_connect(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700566{
567 const char *what;
568 struct socket *sock;
569 struct sockaddr_in6 src_in6;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200570 struct sockaddr_in6 peer_in6;
571 struct net_conf *nc;
572 int err, peer_addr_len, my_addr_len;
Andreas Gruenbacher69ef82d2011-05-11 14:34:35 +0200573 int sndbuf_size, rcvbuf_size, connect_int;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700574 int disconnect_on_error = 1;
575
Philipp Reisner44ed1672011-04-19 17:10:19 +0200576 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200577 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200578 if (!nc) {
579 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700580 return NULL;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200581 }
Philipp Reisner44ed1672011-04-19 17:10:19 +0200582 sndbuf_size = nc->sndbuf_size;
583 rcvbuf_size = nc->rcvbuf_size;
Andreas Gruenbacher69ef82d2011-05-11 14:34:35 +0200584 connect_int = nc->connect_int;
Andreas Gruenbacher089c0752011-06-14 18:28:09 +0200585 rcu_read_unlock();
Philipp Reisner44ed1672011-04-19 17:10:19 +0200586
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200587 my_addr_len = min_t(int, connection->my_addr_len, sizeof(src_in6));
588 memcpy(&src_in6, &connection->my_addr, my_addr_len);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200589
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200590 if (((struct sockaddr *)&connection->my_addr)->sa_family == AF_INET6)
Philipp Reisner44ed1672011-04-19 17:10:19 +0200591 src_in6.sin6_port = 0;
592 else
593 ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
594
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200595 peer_addr_len = min_t(int, connection->peer_addr_len, sizeof(src_in6));
596 memcpy(&peer_in6, &connection->peer_addr, peer_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700597
598 what = "sock_create_kern";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200599 err = sock_create_kern(((struct sockaddr *)&src_in6)->sa_family,
600 SOCK_STREAM, IPPROTO_TCP, &sock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700601 if (err < 0) {
602 sock = NULL;
603 goto out;
604 }
605
606 sock->sk->sk_rcvtimeo =
Andreas Gruenbacher69ef82d2011-05-11 14:34:35 +0200607 sock->sk->sk_sndtimeo = connect_int * HZ;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200608 drbd_setbufsize(sock, sndbuf_size, rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700609
610 /* explicitly bind to the configured IP as source IP
611 * for the outgoing connections.
612 * This is needed for multihomed hosts and to be
613 * able to use lo: interfaces for drbd.
614 * Make sure to use 0 as port number, so linux selects
615 * a free one dynamically.
616 */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700617 what = "bind before connect";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200618 err = sock->ops->bind(sock, (struct sockaddr *) &src_in6, my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700619 if (err < 0)
620 goto out;
621
622 /* connect may fail, peer not yet available.
623 * stay C_WF_CONNECTION, don't go Disconnecting! */
624 disconnect_on_error = 0;
625 what = "connect";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200626 err = sock->ops->connect(sock, (struct sockaddr *) &peer_in6, peer_addr_len, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700627
628out:
629 if (err < 0) {
630 if (sock) {
631 sock_release(sock);
632 sock = NULL;
633 }
634 switch (-err) {
635 /* timeout, busy, signal pending */
636 case ETIMEDOUT: case EAGAIN: case EINPROGRESS:
637 case EINTR: case ERESTARTSYS:
638 /* peer not (yet) available, network problem */
639 case ECONNREFUSED: case ENETUNREACH:
640 case EHOSTDOWN: case EHOSTUNREACH:
641 disconnect_on_error = 0;
642 break;
643 default:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200644 drbd_err(connection, "%s failed, err = %d\n", what, err);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700645 }
646 if (disconnect_on_error)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200647 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700648 }
Philipp Reisner44ed1672011-04-19 17:10:19 +0200649
Philipp Reisnerb411b362009-09-25 16:07:19 -0700650 return sock;
651}
652
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200653struct accept_wait_data {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200654 struct drbd_connection *connection;
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200655 struct socket *s_listen;
656 struct completion door_bell;
657 void (*original_sk_state_change)(struct sock *sk);
658
659};
660
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200661static void drbd_incoming_connection(struct sock *sk)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700662{
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200663 struct accept_wait_data *ad = sk->sk_user_data;
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200664 void (*state_change)(struct sock *sk);
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200665
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200666 state_change = ad->original_sk_state_change;
667 if (sk->sk_state == TCP_ESTABLISHED)
668 complete(&ad->door_bell);
669 state_change(sk);
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200670}
671
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200672static int prepare_listen_socket(struct drbd_connection *connection, struct accept_wait_data *ad)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700673{
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200674 int err, sndbuf_size, rcvbuf_size, my_addr_len;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200675 struct sockaddr_in6 my_addr;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200676 struct socket *s_listen;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200677 struct net_conf *nc;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700678 const char *what;
679
Philipp Reisner44ed1672011-04-19 17:10:19 +0200680 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200681 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200682 if (!nc) {
683 rcu_read_unlock();
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200684 return -EIO;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200685 }
Philipp Reisner44ed1672011-04-19 17:10:19 +0200686 sndbuf_size = nc->sndbuf_size;
687 rcvbuf_size = nc->rcvbuf_size;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200688 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700689
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200690 my_addr_len = min_t(int, connection->my_addr_len, sizeof(struct sockaddr_in6));
691 memcpy(&my_addr, &connection->my_addr, my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700692
693 what = "sock_create_kern";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200694 err = sock_create_kern(((struct sockaddr *)&my_addr)->sa_family,
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200695 SOCK_STREAM, IPPROTO_TCP, &s_listen);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700696 if (err) {
697 s_listen = NULL;
698 goto out;
699 }
700
Philipp Reisner98683652012-11-09 14:18:43 +0100701 s_listen->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
Philipp Reisner44ed1672011-04-19 17:10:19 +0200702 drbd_setbufsize(s_listen, sndbuf_size, rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700703
704 what = "bind before listen";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200705 err = s_listen->ops->bind(s_listen, (struct sockaddr *)&my_addr, my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700706 if (err < 0)
707 goto out;
708
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200709 ad->s_listen = s_listen;
710 write_lock_bh(&s_listen->sk->sk_callback_lock);
711 ad->original_sk_state_change = s_listen->sk->sk_state_change;
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200712 s_listen->sk->sk_state_change = drbd_incoming_connection;
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200713 s_listen->sk->sk_user_data = ad;
714 write_unlock_bh(&s_listen->sk->sk_callback_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700715
Philipp Reisner2820fd32012-07-12 10:22:48 +0200716 what = "listen";
717 err = s_listen->ops->listen(s_listen, 5);
718 if (err < 0)
719 goto out;
720
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200721 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700722out:
723 if (s_listen)
724 sock_release(s_listen);
725 if (err < 0) {
726 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200727 drbd_err(connection, "%s failed, err = %d\n", what, err);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200728 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700729 }
730 }
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200731
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200732 return -EIO;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200733}
734
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200735static void unregister_state_change(struct sock *sk, struct accept_wait_data *ad)
736{
737 write_lock_bh(&sk->sk_callback_lock);
738 sk->sk_state_change = ad->original_sk_state_change;
739 sk->sk_user_data = NULL;
740 write_unlock_bh(&sk->sk_callback_lock);
741}
742
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200743static struct socket *drbd_wait_for_connect(struct drbd_connection *connection, struct accept_wait_data *ad)
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200744{
745 int timeo, connect_int, err = 0;
746 struct socket *s_estab = NULL;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200747 struct net_conf *nc;
748
749 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200750 nc = rcu_dereference(connection->net_conf);
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200751 if (!nc) {
752 rcu_read_unlock();
753 return NULL;
754 }
755 connect_int = nc->connect_int;
756 rcu_read_unlock();
757
758 timeo = connect_int * HZ;
Akinobu Mita38b682b22013-04-29 16:21:31 -0700759 /* 28.5% random jitter */
760 timeo += (prandom_u32() & 1) ? timeo / 7 : -timeo / 7;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200761
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200762 err = wait_for_completion_interruptible_timeout(&ad->door_bell, timeo);
763 if (err <= 0)
764 return NULL;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200765
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200766 err = kernel_accept(ad->s_listen, &s_estab, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700767 if (err < 0) {
768 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200769 drbd_err(connection, "accept failed, err = %d\n", err);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200770 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700771 }
772 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700773
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200774 if (s_estab)
775 unregister_state_change(s_estab->sk, ad);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700776
777 return s_estab;
778}
779
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200780static int decode_header(struct drbd_connection *, void *, struct packet_info *);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700781
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200782static int send_first_packet(struct drbd_connection *connection, struct drbd_socket *sock,
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200783 enum drbd_packet cmd)
784{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200785 if (!conn_prepare_command(connection, sock))
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200786 return -EIO;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200787 return conn_send_command(connection, sock, cmd, 0, NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700788}
789
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200790static int receive_first_packet(struct drbd_connection *connection, struct socket *sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700791{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200792 unsigned int header_size = drbd_header_size(connection);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200793 struct packet_info pi;
794 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700795
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200796 err = drbd_recv_short(sock, connection->data.rbuf, header_size, 0);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200797 if (err != header_size) {
798 if (err >= 0)
799 err = -EIO;
800 return err;
801 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200802 err = decode_header(connection, connection->data.rbuf, &pi);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200803 if (err)
804 return err;
805 return pi.cmd;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700806}
807
808/**
809 * drbd_socket_okay() - Free the socket if its connection is not okay
Philipp Reisnerb411b362009-09-25 16:07:19 -0700810 * @sock: pointer to the pointer to the socket.
811 */
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100812static int drbd_socket_okay(struct socket **sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700813{
814 int rr;
815 char tb[4];
816
817 if (!*sock)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100818 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700819
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100820 rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700821
822 if (rr > 0 || rr == -EAGAIN) {
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100823 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700824 } else {
825 sock_release(*sock);
826 *sock = NULL;
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100827 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700828 }
829}
Philipp Reisner2325eb62011-03-15 16:56:18 +0100830/* Gets called if a connection is established, or if a new minor gets created
831 in a connection */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200832int drbd_connected(struct drbd_peer_device *peer_device)
Philipp Reisner907599e2011-02-08 11:25:37 +0100833{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200834 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100835 int err;
Philipp Reisner907599e2011-02-08 11:25:37 +0100836
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200837 atomic_set(&device->packet_seq, 0);
838 device->peer_seq = 0;
Philipp Reisner907599e2011-02-08 11:25:37 +0100839
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200840 device->state_mutex = peer_device->connection->agreed_pro_version < 100 ?
841 &peer_device->connection->cstate_mutex :
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200842 &device->own_state_mutex;
Philipp Reisner8410da82011-02-11 20:11:10 +0100843
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200844 err = drbd_send_sync_param(peer_device);
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100845 if (!err)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200846 err = drbd_send_sizes(peer_device, 0, 0);
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100847 if (!err)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200848 err = drbd_send_uuids(peer_device);
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100849 if (!err)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200850 err = drbd_send_current_state(peer_device);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200851 clear_bit(USE_DEGR_WFC_T, &device->flags);
852 clear_bit(RESIZE_PENDING, &device->flags);
853 atomic_set(&device->ap_in_flight, 0);
854 mod_timer(&device->request_timer, jiffies + HZ); /* just start it here. */
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100855 return err;
Philipp Reisner907599e2011-02-08 11:25:37 +0100856}
Philipp Reisnerb411b362009-09-25 16:07:19 -0700857
858/*
859 * return values:
860 * 1 yes, we have a valid connection
861 * 0 oops, did not work out, please try again
862 * -1 peer talks different language,
863 * no point in trying again, please go standalone.
864 * -2 We do not have a network config...
865 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200866static int conn_connect(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700867{
Philipp Reisner7da35862011-12-19 22:42:56 +0100868 struct drbd_socket sock, msock;
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +0200869 struct drbd_peer_device *peer_device;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200870 struct net_conf *nc;
Philipp Reisner92f14952012-08-01 11:41:01 +0200871 int vnr, timeout, h, ok;
Philipp Reisner08b165b2011-09-05 16:22:33 +0200872 bool discard_my_data;
Philipp Reisner197296f2012-03-26 16:47:11 +0200873 enum drbd_state_rv rv;
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200874 struct accept_wait_data ad = {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200875 .connection = connection,
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200876 .door_bell = COMPLETION_INITIALIZER_ONSTACK(ad.door_bell),
877 };
Philipp Reisnerb411b362009-09-25 16:07:19 -0700878
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200879 clear_bit(DISCONNECT_SENT, &connection->flags);
880 if (conn_request_state(connection, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700881 return -2;
882
Philipp Reisner7da35862011-12-19 22:42:56 +0100883 mutex_init(&sock.mutex);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200884 sock.sbuf = connection->data.sbuf;
885 sock.rbuf = connection->data.rbuf;
Philipp Reisner7da35862011-12-19 22:42:56 +0100886 sock.socket = NULL;
887 mutex_init(&msock.mutex);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200888 msock.sbuf = connection->meta.sbuf;
889 msock.rbuf = connection->meta.rbuf;
Philipp Reisner7da35862011-12-19 22:42:56 +0100890 msock.socket = NULL;
891
Andreas Gruenbacher0916e0e2011-03-21 14:10:15 +0100892 /* Assume that the peer only understands protocol 80 until we know better. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200893 connection->agreed_pro_version = 80;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700894
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200895 if (prepare_listen_socket(connection, &ad))
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200896 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700897
898 do {
Andreas Gruenbacher2bf89622011-03-28 16:33:12 +0200899 struct socket *s;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700900
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200901 s = drbd_try_connect(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700902 if (s) {
Philipp Reisner7da35862011-12-19 22:42:56 +0100903 if (!sock.socket) {
904 sock.socket = s;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200905 send_first_packet(connection, &sock, P_INITIAL_DATA);
Philipp Reisner7da35862011-12-19 22:42:56 +0100906 } else if (!msock.socket) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200907 clear_bit(RESOLVE_CONFLICTS, &connection->flags);
Philipp Reisner7da35862011-12-19 22:42:56 +0100908 msock.socket = s;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200909 send_first_packet(connection, &msock, P_INITIAL_META);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700910 } else {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200911 drbd_err(connection, "Logic error in conn_connect()\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700912 goto out_release_sockets;
913 }
914 }
915
Philipp Reisner7da35862011-12-19 22:42:56 +0100916 if (sock.socket && msock.socket) {
917 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200918 nc = rcu_dereference(connection->net_conf);
Philipp Reisner7da35862011-12-19 22:42:56 +0100919 timeout = nc->ping_timeo * HZ / 10;
920 rcu_read_unlock();
921 schedule_timeout_interruptible(timeout);
922 ok = drbd_socket_okay(&sock.socket);
923 ok = drbd_socket_okay(&msock.socket) && ok;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700924 if (ok)
925 break;
926 }
927
928retry:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200929 s = drbd_wait_for_connect(connection, &ad);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700930 if (s) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200931 int fp = receive_first_packet(connection, s);
Philipp Reisner7da35862011-12-19 22:42:56 +0100932 drbd_socket_okay(&sock.socket);
933 drbd_socket_okay(&msock.socket);
Philipp Reisner92f14952012-08-01 11:41:01 +0200934 switch (fp) {
Andreas Gruenbachere5d6f332011-03-28 16:44:40 +0200935 case P_INITIAL_DATA:
Philipp Reisner7da35862011-12-19 22:42:56 +0100936 if (sock.socket) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200937 drbd_warn(connection, "initial packet S crossed\n");
Philipp Reisner7da35862011-12-19 22:42:56 +0100938 sock_release(sock.socket);
Philipp Reisner80c6eed2012-08-01 14:53:39 +0200939 sock.socket = s;
940 goto randomize;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700941 }
Philipp Reisner7da35862011-12-19 22:42:56 +0100942 sock.socket = s;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700943 break;
Andreas Gruenbachere5d6f332011-03-28 16:44:40 +0200944 case P_INITIAL_META:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200945 set_bit(RESOLVE_CONFLICTS, &connection->flags);
Philipp Reisner7da35862011-12-19 22:42:56 +0100946 if (msock.socket) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200947 drbd_warn(connection, "initial packet M crossed\n");
Philipp Reisner7da35862011-12-19 22:42:56 +0100948 sock_release(msock.socket);
Philipp Reisner80c6eed2012-08-01 14:53:39 +0200949 msock.socket = s;
950 goto randomize;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700951 }
Philipp Reisner7da35862011-12-19 22:42:56 +0100952 msock.socket = s;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700953 break;
954 default:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200955 drbd_warn(connection, "Error receiving initial packet\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700956 sock_release(s);
Philipp Reisner80c6eed2012-08-01 14:53:39 +0200957randomize:
Akinobu Mita38b682b22013-04-29 16:21:31 -0700958 if (prandom_u32() & 1)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700959 goto retry;
960 }
961 }
962
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200963 if (connection->cstate <= C_DISCONNECTING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700964 goto out_release_sockets;
965 if (signal_pending(current)) {
966 flush_signals(current);
967 smp_rmb();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200968 if (get_t_state(&connection->receiver) == EXITING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700969 goto out_release_sockets;
970 }
971
Philipp Reisnerb666dbf2012-07-26 14:12:59 +0200972 ok = drbd_socket_okay(&sock.socket);
973 ok = drbd_socket_okay(&msock.socket) && ok;
974 } while (!ok);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700975
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200976 if (ad.s_listen)
977 sock_release(ad.s_listen);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700978
Philipp Reisner98683652012-11-09 14:18:43 +0100979 sock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
980 msock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700981
Philipp Reisner7da35862011-12-19 22:42:56 +0100982 sock.socket->sk->sk_allocation = GFP_NOIO;
983 msock.socket->sk->sk_allocation = GFP_NOIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700984
Philipp Reisner7da35862011-12-19 22:42:56 +0100985 sock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
986 msock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700987
Philipp Reisnerb411b362009-09-25 16:07:19 -0700988 /* NOT YET ...
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200989 * sock.socket->sk->sk_sndtimeo = connection->net_conf->timeout*HZ/10;
Philipp Reisner7da35862011-12-19 22:42:56 +0100990 * sock.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
Andreas Gruenbacher60381782011-03-28 17:05:50 +0200991 * first set it to the P_CONNECTION_FEATURES timeout,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700992 * which we set to 4x the configured ping_timeout. */
Philipp Reisner44ed1672011-04-19 17:10:19 +0200993 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200994 nc = rcu_dereference(connection->net_conf);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700995
Philipp Reisner7da35862011-12-19 22:42:56 +0100996 sock.socket->sk->sk_sndtimeo =
997 sock.socket->sk->sk_rcvtimeo = nc->ping_timeo*4*HZ/10;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200998
Philipp Reisner7da35862011-12-19 22:42:56 +0100999 msock.socket->sk->sk_rcvtimeo = nc->ping_int*HZ;
Philipp Reisner44ed1672011-04-19 17:10:19 +02001000 timeout = nc->timeout * HZ / 10;
Philipp Reisner08b165b2011-09-05 16:22:33 +02001001 discard_my_data = nc->discard_my_data;
Philipp Reisner44ed1672011-04-19 17:10:19 +02001002 rcu_read_unlock();
1003
Philipp Reisner7da35862011-12-19 22:42:56 +01001004 msock.socket->sk->sk_sndtimeo = timeout;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001005
1006 /* we don't want delays.
Lucas De Marchi25985ed2011-03-30 22:57:33 -03001007 * we use TCP_CORK where appropriate, though */
Philipp Reisner7da35862011-12-19 22:42:56 +01001008 drbd_tcp_nodelay(sock.socket);
1009 drbd_tcp_nodelay(msock.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001010
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001011 connection->data.socket = sock.socket;
1012 connection->meta.socket = msock.socket;
1013 connection->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001014
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001015 h = drbd_do_features(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001016 if (h <= 0)
1017 return h;
1018
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001019 if (connection->cram_hmac_tfm) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001020 /* drbd_request_state(device, NS(conn, WFAuth)); */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001021 switch (drbd_do_auth(connection)) {
Johannes Thomab10d96c2010-01-07 16:02:50 +01001022 case -1:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001023 drbd_err(connection, "Authentication of peer failed\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001024 return -1;
Johannes Thomab10d96c2010-01-07 16:02:50 +01001025 case 0:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001026 drbd_err(connection, "Authentication of peer failed, trying again.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01001027 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001028 }
1029 }
1030
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001031 connection->data.socket->sk->sk_sndtimeo = timeout;
1032 connection->data.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001033
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001034 if (drbd_send_protocol(connection) == -EOPNOTSUPP)
Philipp Reisner7e2455c2010-04-22 14:50:23 +02001035 return -1;
Philipp Reisner1e86ac42011-08-04 10:33:08 +02001036
Philipp Reisner31007742014-04-28 18:43:12 +02001037 /* Prevent a race between resync-handshake and
1038 * being promoted to Primary.
1039 *
1040 * Grab and release the state mutex, so we know that any current
1041 * drbd_set_role() is finished, and any incoming drbd_set_role
1042 * will see the STATE_SENT flag, and wait for it to be cleared.
1043 */
1044 idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
1045 mutex_lock(peer_device->device->state_mutex);
1046
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001047 set_bit(STATE_SENT, &connection->flags);
Philipp Reisner197296f2012-03-26 16:47:11 +02001048
Philipp Reisner31007742014-04-28 18:43:12 +02001049 idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
1050 mutex_unlock(peer_device->device->state_mutex);
1051
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02001052 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001053 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1054 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001055 kref_get(&device->kref);
Andreas Gruenbacher26ea8f92013-06-25 16:50:03 +02001056 rcu_read_unlock();
1057
Philipp Reisner08b165b2011-09-05 16:22:33 +02001058 if (discard_my_data)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001059 set_bit(DISCARD_MY_DATA, &device->flags);
Philipp Reisner08b165b2011-09-05 16:22:33 +02001060 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001061 clear_bit(DISCARD_MY_DATA, &device->flags);
Philipp Reisner08b165b2011-09-05 16:22:33 +02001062
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001063 drbd_connected(peer_device);
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001064 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02001065 rcu_read_lock();
1066 }
1067 rcu_read_unlock();
1068
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001069 rv = conn_request_state(connection, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE);
1070 if (rv < SS_SUCCESS || connection->cstate != C_WF_REPORT_PARAMS) {
1071 clear_bit(STATE_SENT, &connection->flags);
Philipp Reisner1e86ac42011-08-04 10:33:08 +02001072 return 0;
Philipp Reisnera1096a62012-04-06 12:07:34 +02001073 }
Philipp Reisner1e86ac42011-08-04 10:33:08 +02001074
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001075 drbd_thread_start(&connection->asender);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001076
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001077 mutex_lock(&connection->resource->conf_update);
Philipp Reisner08b165b2011-09-05 16:22:33 +02001078 /* The discard_my_data flag is a single-shot modifier to the next
1079 * connection attempt, the handshake of which is now well underway.
1080 * No need for rcu style copying of the whole struct
1081 * just to clear a single value. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001082 connection->net_conf->discard_my_data = 0;
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001083 mutex_unlock(&connection->resource->conf_update);
Philipp Reisner08b165b2011-09-05 16:22:33 +02001084
Philipp Reisnerd3fcb492011-04-13 14:46:05 -07001085 return h;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001086
1087out_release_sockets:
Philipp Reisner7a426fd2012-07-12 14:22:37 +02001088 if (ad.s_listen)
1089 sock_release(ad.s_listen);
Philipp Reisner7da35862011-12-19 22:42:56 +01001090 if (sock.socket)
1091 sock_release(sock.socket);
1092 if (msock.socket)
1093 sock_release(msock.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001094 return -1;
1095}
1096
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001097static int decode_header(struct drbd_connection *connection, void *header, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001098{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001099 unsigned int header_size = drbd_header_size(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001100
Andreas Gruenbacher0c8e36d2011-03-30 16:00:17 +02001101 if (header_size == sizeof(struct p_header100) &&
1102 *(__be32 *)header == cpu_to_be32(DRBD_MAGIC_100)) {
1103 struct p_header100 *h = header;
1104 if (h->pad != 0) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001105 drbd_err(connection, "Header padding is not zero\n");
Andreas Gruenbacher0c8e36d2011-03-30 16:00:17 +02001106 return -EINVAL;
1107 }
1108 pi->vnr = be16_to_cpu(h->volume);
1109 pi->cmd = be16_to_cpu(h->command);
1110 pi->size = be32_to_cpu(h->length);
1111 } else if (header_size == sizeof(struct p_header95) &&
1112 *(__be16 *)header == cpu_to_be16(DRBD_MAGIC_BIG)) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001113 struct p_header95 *h = header;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001114 pi->cmd = be16_to_cpu(h->command);
Andreas Gruenbacherb55d84b2011-03-22 13:17:47 +01001115 pi->size = be32_to_cpu(h->length);
1116 pi->vnr = 0;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001117 } else if (header_size == sizeof(struct p_header80) &&
1118 *(__be32 *)header == cpu_to_be32(DRBD_MAGIC)) {
1119 struct p_header80 *h = header;
1120 pi->cmd = be16_to_cpu(h->command);
1121 pi->size = be16_to_cpu(h->length);
Philipp Reisner77351055b2011-02-07 17:24:26 +01001122 pi->vnr = 0;
Philipp Reisner02918be2010-08-20 14:35:10 +02001123 } else {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001124 drbd_err(connection, "Wrong magic value 0x%08x in protocol version %d\n",
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001125 be32_to_cpu(*(__be32 *)header),
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001126 connection->agreed_pro_version);
Andreas Gruenbacher8172f3e2011-03-16 17:22:39 +01001127 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001128 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001129 pi->data = header + header_size;
Andreas Gruenbacher8172f3e2011-03-16 17:22:39 +01001130 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001131}
1132
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001133static int drbd_recv_header(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisner257d0af2011-01-26 12:15:29 +01001134{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001135 void *buffer = connection->data.rbuf;
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001136 int err;
Philipp Reisner257d0af2011-01-26 12:15:29 +01001137
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001138 err = drbd_recv_all_warn(connection, buffer, drbd_header_size(connection));
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001139 if (err)
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001140 return err;
Philipp Reisner257d0af2011-01-26 12:15:29 +01001141
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001142 err = decode_header(connection, buffer, pi);
1143 connection->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001144
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001145 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001146}
1147
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001148static void drbd_flush(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001149{
1150 int rv;
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001151 struct drbd_peer_device *peer_device;
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001152 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001153
Philipp Reisnere9526582013-11-22 15:53:41 +01001154 if (connection->resource->write_ordering >= WO_bdev_flush) {
Lars Ellenberg615e0872011-11-17 14:32:12 +01001155 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001156 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1157 struct drbd_device *device = peer_device->device;
1158
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001159 if (!get_ldev(device))
Lars Ellenberg615e0872011-11-17 14:32:12 +01001160 continue;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001161 kref_get(&device->kref);
Lars Ellenberg615e0872011-11-17 14:32:12 +01001162 rcu_read_unlock();
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001163
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001164 rv = blkdev_issue_flush(device->ldev->backing_bdev,
Lars Ellenberg615e0872011-11-17 14:32:12 +01001165 GFP_NOIO, NULL);
1166 if (rv) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001167 drbd_info(device, "local disk flush failed with status %d\n", rv);
Lars Ellenberg615e0872011-11-17 14:32:12 +01001168 /* would rather check on EOPNOTSUPP, but that is not reliable.
1169 * don't try again for ANY return value != 0
1170 * if (rv == -EOPNOTSUPP) */
Philipp Reisner8fe39aa2013-11-22 13:22:13 +01001171 drbd_bump_write_ordering(connection->resource, NULL, WO_drain_io);
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001172 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001173 put_ldev(device);
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001174 kref_put(&device->kref, drbd_destroy_device);
Lars Ellenberg615e0872011-11-17 14:32:12 +01001175
1176 rcu_read_lock();
1177 if (rv)
1178 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001179 }
Lars Ellenberg615e0872011-11-17 14:32:12 +01001180 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07001181 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001182}
1183
1184/**
1185 * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it.
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001186 * @device: DRBD device.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001187 * @epoch: Epoch object.
1188 * @ev: Epoch event.
1189 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001190static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *connection,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001191 struct drbd_epoch *epoch,
1192 enum epoch_event ev)
1193{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001194 int epoch_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001195 struct drbd_epoch *next_epoch;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001196 enum finish_epoch rv = FE_STILL_LIVE;
1197
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001198 spin_lock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001199 do {
1200 next_epoch = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001201
1202 epoch_size = atomic_read(&epoch->epoch_size);
1203
1204 switch (ev & ~EV_CLEANUP) {
1205 case EV_PUT:
1206 atomic_dec(&epoch->active);
1207 break;
1208 case EV_GOT_BARRIER_NR:
1209 set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001210 break;
1211 case EV_BECAME_LAST:
1212 /* nothing to do*/
1213 break;
1214 }
1215
Philipp Reisnerb411b362009-09-25 16:07:19 -07001216 if (epoch_size != 0 &&
1217 atomic_read(&epoch->active) == 0 &&
Philipp Reisner80f9fd52011-07-18 15:45:15 +02001218 (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) || ev & EV_CLEANUP)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001219 if (!(ev & EV_CLEANUP)) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001220 spin_unlock(&connection->epoch_lock);
1221 drbd_send_b_ack(epoch->connection, epoch->barrier_nr, epoch_size);
1222 spin_lock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001223 }
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001224#if 0
1225 /* FIXME: dec unacked on connection, once we have
1226 * something to count pending connection packets in. */
Philipp Reisner80f9fd52011-07-18 15:45:15 +02001227 if (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags))
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001228 dec_unacked(epoch->connection);
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001229#endif
Philipp Reisnerb411b362009-09-25 16:07:19 -07001230
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001231 if (connection->current_epoch != epoch) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001232 next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
1233 list_del(&epoch->list);
1234 ev = EV_BECAME_LAST | (ev & EV_CLEANUP);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001235 connection->epochs--;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001236 kfree(epoch);
1237
1238 if (rv == FE_STILL_LIVE)
1239 rv = FE_DESTROYED;
1240 } else {
1241 epoch->flags = 0;
1242 atomic_set(&epoch->epoch_size, 0);
Uwe Kleine-König698f9312010-07-02 20:41:51 +02001243 /* atomic_set(&epoch->active, 0); is already zero */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001244 if (rv == FE_STILL_LIVE)
1245 rv = FE_RECYCLED;
1246 }
1247 }
1248
1249 if (!next_epoch)
1250 break;
1251
1252 epoch = next_epoch;
1253 } while (1);
1254
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001255 spin_unlock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001256
Philipp Reisnerb411b362009-09-25 16:07:19 -07001257 return rv;
1258}
1259
Philipp Reisner8fe39aa2013-11-22 13:22:13 +01001260static enum write_ordering_e
1261max_allowed_wo(struct drbd_backing_dev *bdev, enum write_ordering_e wo)
1262{
1263 struct disk_conf *dc;
1264
1265 dc = rcu_dereference(bdev->disk_conf);
1266
1267 if (wo == WO_bdev_flush && !dc->disk_flushes)
1268 wo = WO_drain_io;
1269 if (wo == WO_drain_io && !dc->disk_drain)
1270 wo = WO_none;
1271
1272 return wo;
1273}
1274
Philipp Reisnerb411b362009-09-25 16:07:19 -07001275/**
1276 * drbd_bump_write_ordering() - Fall back to an other write ordering method
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001277 * @connection: DRBD connection.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001278 * @wo: Write ordering method to try.
1279 */
Philipp Reisner8fe39aa2013-11-22 13:22:13 +01001280void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backing_dev *bdev,
1281 enum write_ordering_e wo)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001282{
Philipp Reisnere9526582013-11-22 15:53:41 +01001283 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001284 enum write_ordering_e pwo;
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001285 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001286 static char *write_ordering_str[] = {
1287 [WO_none] = "none",
1288 [WO_drain_io] = "drain",
1289 [WO_bdev_flush] = "flush",
Philipp Reisnerb411b362009-09-25 16:07:19 -07001290 };
1291
Philipp Reisnere9526582013-11-22 15:53:41 +01001292 pwo = resource->write_ordering;
Lars Ellenberg70df7092013-12-20 11:17:02 +01001293 if (wo != WO_bdev_flush)
1294 wo = min(pwo, wo);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001295 rcu_read_lock();
Philipp Reisnere9526582013-11-22 15:53:41 +01001296 idr_for_each_entry(&resource->devices, device, vnr) {
Philipp Reisner8fe39aa2013-11-22 13:22:13 +01001297 if (get_ldev(device)) {
1298 wo = max_allowed_wo(device->ldev, wo);
1299 if (device->ldev == bdev)
1300 bdev = NULL;
1301 put_ldev(device);
1302 }
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001303 }
Philipp Reisner8fe39aa2013-11-22 13:22:13 +01001304
1305 if (bdev)
1306 wo = max_allowed_wo(bdev, wo);
1307
Lars Ellenberg70df7092013-12-20 11:17:02 +01001308 rcu_read_unlock();
1309
Philipp Reisnere9526582013-11-22 15:53:41 +01001310 resource->write_ordering = wo;
1311 if (pwo != resource->write_ordering || wo == WO_bdev_flush)
1312 drbd_info(resource, "Method to ensure write ordering: %s\n", write_ordering_str[resource->write_ordering]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001313}
1314
1315/**
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001316 * drbd_submit_peer_request()
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001317 * @device: DRBD device.
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001318 * @peer_req: peer request
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001319 * @rw: flag field, see bio->bi_rw
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001320 *
1321 * May spread the pages to multiple bios,
1322 * depending on bio_add_page restrictions.
1323 *
1324 * Returns 0 if all bios have been submitted,
1325 * -ENOMEM if we could not allocate enough bios,
1326 * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a
1327 * single page to an empty bio (which should never happen and likely indicates
1328 * that the lower level IO stack is in some way broken). This has been observed
1329 * on certain Xen deployments.
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001330 */
1331/* TODO allocate from our own bio_set. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001332int drbd_submit_peer_request(struct drbd_device *device,
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001333 struct drbd_peer_request *peer_req,
1334 const unsigned rw, const int fault_type)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001335{
1336 struct bio *bios = NULL;
1337 struct bio *bio;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001338 struct page *page = peer_req->pages;
1339 sector_t sector = peer_req->i.sector;
1340 unsigned ds = peer_req->i.size;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001341 unsigned n_bios = 0;
1342 unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT;
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001343 int err = -ENOMEM;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001344
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001345 if (peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) {
1346 /* wait for all pending IO completions, before we start
1347 * zeroing things out. */
1348 conn_wait_active_ee_empty(first_peer_device(device)->connection);
1349 if (blkdev_issue_zeroout(device->ldev->backing_bdev,
1350 sector, ds >> 9, GFP_NOIO))
1351 peer_req->flags |= EE_WAS_ERROR;
1352 drbd_endio_write_sec_final(peer_req);
1353 return 0;
1354 }
1355
Lars Ellenberg54ed4ed2014-06-25 17:52:38 +02001356 /* Discards don't have any payload.
1357 * But the scsi layer still expects a bio_vec it can use internally,
1358 * see sd_setup_discard_cmnd() and blk_add_request_payload(). */
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001359 if (peer_req->flags & EE_IS_TRIM)
Lars Ellenberg54ed4ed2014-06-25 17:52:38 +02001360 nr_pages = 1;
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001361
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001362 /* In most cases, we will only need one bio. But in case the lower
1363 * level restrictions happen to be different at this offset on this
1364 * side than those of the sending peer, we may need to submit the
Lars Ellenberg9476f392011-02-23 17:02:01 +01001365 * request in more than one bio.
1366 *
1367 * Plain bio_alloc is good enough here, this is no DRBD internally
1368 * generated bio, but a bio allocated on behalf of the peer.
1369 */
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001370next_bio:
1371 bio = bio_alloc(GFP_NOIO, nr_pages);
1372 if (!bio) {
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001373 drbd_err(device, "submit_ee: Allocation of a bio failed (nr_pages=%u)\n", nr_pages);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001374 goto fail;
1375 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001376 /* > peer_req->i.sector, unless this is the first bio */
Kent Overstreet4f024f32013-10-11 15:44:27 -07001377 bio->bi_iter.bi_sector = sector;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001378 bio->bi_bdev = device->ldev->backing_bdev;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001379 bio->bi_rw = rw;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001380 bio->bi_private = peer_req;
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001381 bio->bi_end_io = drbd_peer_request_endio;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001382
1383 bio->bi_next = bios;
1384 bios = bio;
1385 ++n_bios;
1386
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001387 if (rw & REQ_DISCARD) {
1388 bio->bi_iter.bi_size = ds;
1389 goto submit;
1390 }
1391
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001392 page_chain_for_each(page) {
1393 unsigned len = min_t(unsigned, ds, PAGE_SIZE);
1394 if (!bio_add_page(bio, page, len, 0)) {
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001395 /* A single page must always be possible!
1396 * But in case it fails anyways,
1397 * we deal with it, and complain (below). */
1398 if (bio->bi_vcnt == 0) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001399 drbd_err(device,
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001400 "bio_add_page failed for len=%u, "
1401 "bi_vcnt=0 (bi_sector=%llu)\n",
Kent Overstreet4f024f32013-10-11 15:44:27 -07001402 len, (uint64_t)bio->bi_iter.bi_sector);
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001403 err = -ENOSPC;
1404 goto fail;
1405 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001406 goto next_bio;
1407 }
1408 ds -= len;
1409 sector += len >> 9;
1410 --nr_pages;
1411 }
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001412 D_ASSERT(device, ds == 0);
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001413submit:
1414 D_ASSERT(device, page == NULL);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001415
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001416 atomic_set(&peer_req->pending_bios, n_bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001417 do {
1418 bio = bios;
1419 bios = bios->bi_next;
1420 bio->bi_next = NULL;
1421
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001422 drbd_generic_make_request(device, fault_type, bio);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001423 } while (bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001424 return 0;
1425
1426fail:
1427 while (bios) {
1428 bio = bios;
1429 bios = bios->bi_next;
1430 bio_put(bio);
1431 }
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001432 return err;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001433}
1434
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001435static void drbd_remove_epoch_entry_interval(struct drbd_device *device,
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001436 struct drbd_peer_request *peer_req)
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001437{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001438 struct drbd_interval *i = &peer_req->i;
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001439
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001440 drbd_remove_interval(&device->write_requests, i);
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001441 drbd_clear_interval(i);
1442
Andreas Gruenbacher6c852be2011-02-04 15:38:52 +01001443 /* Wake up any processes waiting for this peer request to complete. */
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001444 if (i->waiting)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001445 wake_up(&device->misc_wait);
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001446}
1447
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001448static void conn_wait_active_ee_empty(struct drbd_connection *connection)
Philipp Reisner77fede52011-11-10 21:19:11 +01001449{
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001450 struct drbd_peer_device *peer_device;
Philipp Reisner77fede52011-11-10 21:19:11 +01001451 int vnr;
1452
1453 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001454 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1455 struct drbd_device *device = peer_device->device;
1456
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001457 kref_get(&device->kref);
Philipp Reisner77fede52011-11-10 21:19:11 +01001458 rcu_read_unlock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001459 drbd_wait_ee_list_empty(device, &device->active_ee);
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001460 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisner77fede52011-11-10 21:19:11 +01001461 rcu_read_lock();
1462 }
1463 rcu_read_unlock();
1464}
1465
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001466static struct drbd_peer_device *
1467conn_peer_device(struct drbd_connection *connection, int volume_number)
1468{
1469 return idr_find(&connection->peer_devices, volume_number);
1470}
1471
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001472static int receive_Barrier(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001473{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001474 int rv;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001475 struct p_barrier *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001476 struct drbd_epoch *epoch;
1477
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001478 /* FIXME these are unacked on connection,
1479 * not a specific (peer)device.
1480 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001481 connection->current_epoch->barrier_nr = p->barrier;
1482 connection->current_epoch->connection = connection;
1483 rv = drbd_may_finish_epoch(connection, connection->current_epoch, EV_GOT_BARRIER_NR);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001484
1485 /* P_BARRIER_ACK may imply that the corresponding extent is dropped from
1486 * the activity log, which means it would not be resynced in case the
1487 * R_PRIMARY crashes now.
1488 * Therefore we must send the barrier_ack after the barrier request was
1489 * completed. */
Philipp Reisnere9526582013-11-22 15:53:41 +01001490 switch (connection->resource->write_ordering) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001491 case WO_none:
1492 if (rv == FE_RECYCLED)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001493 return 0;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001494
1495 /* receiver context, in the writeout path of the other node.
1496 * avoid potential distributed deadlock */
1497 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1498 if (epoch)
1499 break;
1500 else
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001501 drbd_warn(connection, "Allocation of an epoch failed, slowing down\n");
Philipp Reisner2451fc32010-08-24 13:43:11 +02001502 /* Fall through */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001503
1504 case WO_bdev_flush:
1505 case WO_drain_io:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001506 conn_wait_active_ee_empty(connection);
1507 drbd_flush(connection);
Philipp Reisner2451fc32010-08-24 13:43:11 +02001508
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001509 if (atomic_read(&connection->current_epoch->epoch_size)) {
Philipp Reisner2451fc32010-08-24 13:43:11 +02001510 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1511 if (epoch)
1512 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001513 }
1514
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001515 return 0;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001516 default:
Philipp Reisnere9526582013-11-22 15:53:41 +01001517 drbd_err(connection, "Strangeness in connection->write_ordering %d\n",
1518 connection->resource->write_ordering);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001519 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001520 }
1521
1522 epoch->flags = 0;
1523 atomic_set(&epoch->epoch_size, 0);
1524 atomic_set(&epoch->active, 0);
1525
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001526 spin_lock(&connection->epoch_lock);
1527 if (atomic_read(&connection->current_epoch->epoch_size)) {
1528 list_add(&epoch->list, &connection->current_epoch->list);
1529 connection->current_epoch = epoch;
1530 connection->epochs++;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001531 } else {
1532 /* The current_epoch got recycled while we allocated this one... */
1533 kfree(epoch);
1534 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001535 spin_unlock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001536
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001537 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001538}
1539
1540/* used from receive_RSDataReply (recv_resync_read)
1541 * and from receive_Data */
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +01001542static struct drbd_peer_request *
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001543read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001544 struct packet_info *pi) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001545{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001546 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001547 const sector_t capacity = drbd_get_capacity(device->this_bdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001548 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001549 struct page *page;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001550 int dgs, ds, err;
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001551 int data_size = pi->size;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001552 void *dig_in = peer_device->connection->int_dig_in;
1553 void *dig_vv = peer_device->connection->int_dig_vv;
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001554 unsigned long *data;
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001555 struct p_trim *trim = (pi->cmd == P_TRIM) ? pi->data : NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001556
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001557 dgs = 0;
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001558 if (!trim && peer_device->connection->peer_integrity_tfm) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001559 dgs = crypto_hash_digestsize(peer_device->connection->peer_integrity_tfm);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001560 /*
1561 * FIXME: Receive the incoming digest into the receive buffer
1562 * here, together with its struct p_data?
1563 */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001564 err = drbd_recv_all_warn(peer_device->connection, dig_in, dgs);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001565 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001566 return NULL;
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001567 data_size -= dgs;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001568 }
1569
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001570 if (trim) {
1571 D_ASSERT(peer_device, data_size == 0);
1572 data_size = be32_to_cpu(trim->size);
1573 }
1574
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01001575 if (!expect(IS_ALIGNED(data_size, 512)))
1576 return NULL;
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001577 /* prepare for larger trim requests. */
1578 if (!trim && !expect(data_size <= DRBD_MAX_BIO_SIZE))
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01001579 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001580
Lars Ellenberg66660322010-04-06 12:15:04 +02001581 /* even though we trust out peer,
1582 * we sometimes have to double check. */
1583 if (sector + (data_size>>9) > capacity) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001584 drbd_err(device, "request from peer beyond end of local disk: "
Lars Ellenbergfdda6542011-01-24 15:11:01 +01001585 "capacity: %llus < sector: %llus + size: %u\n",
Lars Ellenberg66660322010-04-06 12:15:04 +02001586 (unsigned long long)capacity,
1587 (unsigned long long)sector, data_size);
1588 return NULL;
1589 }
1590
Philipp Reisnerb411b362009-09-25 16:07:19 -07001591 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
1592 * "criss-cross" setup, that might cause write-out on some other DRBD,
1593 * which in turn might block on the other node at this very place. */
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001594 peer_req = drbd_alloc_peer_req(peer_device, id, sector, data_size, trim == NULL, GFP_NOIO);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001595 if (!peer_req)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001596 return NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001597
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001598 if (trim)
Lars Ellenberg81a35372012-07-30 09:00:54 +02001599 return peer_req;
Lars Ellenberga73ff322012-06-25 19:15:38 +02001600
Philipp Reisnerb411b362009-09-25 16:07:19 -07001601 ds = data_size;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001602 page = peer_req->pages;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001603 page_chain_for_each(page) {
1604 unsigned len = min_t(int, ds, PAGE_SIZE);
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001605 data = kmap(page);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001606 err = drbd_recv_all_warn(peer_device->connection, data, len);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001607 if (drbd_insert_fault(device, DRBD_FAULT_RECEIVE)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001608 drbd_err(device, "Fault injection: Corrupting data on receive\n");
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001609 data[0] = data[0] ^ (unsigned long)-1;
1610 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001611 kunmap(page);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001612 if (err) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001613 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001614 return NULL;
1615 }
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001616 ds -= len;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001617 }
1618
1619 if (dgs) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001620 drbd_csum_ee(peer_device->connection->peer_integrity_tfm, peer_req, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001621 if (memcmp(dig_in, dig_vv, dgs)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001622 drbd_err(device, "Digest integrity check FAILED: %llus +%u\n",
Lars Ellenberg470be442010-11-10 10:36:52 +01001623 (unsigned long long)sector, data_size);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001624 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001625 return NULL;
1626 }
1627 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001628 device->recv_cnt += data_size>>9;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001629 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001630}
1631
1632/* drbd_drain_block() just takes a data block
1633 * out of the socket input buffer, and discards it.
1634 */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001635static int drbd_drain_block(struct drbd_peer_device *peer_device, int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001636{
1637 struct page *page;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001638 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001639 void *data;
1640
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001641 if (!data_size)
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001642 return 0;
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001643
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001644 page = drbd_alloc_pages(peer_device, 1, 1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001645
1646 data = kmap(page);
1647 while (data_size) {
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001648 unsigned int len = min_t(int, data_size, PAGE_SIZE);
1649
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001650 err = drbd_recv_all_warn(peer_device->connection, data, len);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001651 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001652 break;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001653 data_size -= len;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001654 }
1655 kunmap(page);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001656 drbd_free_pages(peer_device->device, page, 0);
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001657 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001658}
1659
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001660static int recv_dless_read(struct drbd_peer_device *peer_device, struct drbd_request *req,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001661 sector_t sector, int data_size)
1662{
Kent Overstreet79886132013-11-23 17:19:00 -08001663 struct bio_vec bvec;
1664 struct bvec_iter iter;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001665 struct bio *bio;
Kent Overstreet79886132013-11-23 17:19:00 -08001666 int dgs, err, expect;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001667 void *dig_in = peer_device->connection->int_dig_in;
1668 void *dig_vv = peer_device->connection->int_dig_vv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001669
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001670 dgs = 0;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001671 if (peer_device->connection->peer_integrity_tfm) {
1672 dgs = crypto_hash_digestsize(peer_device->connection->peer_integrity_tfm);
1673 err = drbd_recv_all_warn(peer_device->connection, dig_in, dgs);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001674 if (err)
1675 return err;
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001676 data_size -= dgs;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001677 }
1678
Philipp Reisnerb411b362009-09-25 16:07:19 -07001679 /* optimistically update recv_cnt. if receiving fails below,
1680 * we disconnect anyways, and counters will be reset. */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001681 peer_device->device->recv_cnt += data_size>>9;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001682
1683 bio = req->master_bio;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001684 D_ASSERT(peer_device->device, sector == bio->bi_iter.bi_sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001685
Kent Overstreet79886132013-11-23 17:19:00 -08001686 bio_for_each_segment(bvec, bio, iter) {
1687 void *mapped = kmap(bvec.bv_page) + bvec.bv_offset;
1688 expect = min_t(int, data_size, bvec.bv_len);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001689 err = drbd_recv_all_warn(peer_device->connection, mapped, expect);
Kent Overstreet79886132013-11-23 17:19:00 -08001690 kunmap(bvec.bv_page);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001691 if (err)
1692 return err;
1693 data_size -= expect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001694 }
1695
1696 if (dgs) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001697 drbd_csum_bio(peer_device->connection->peer_integrity_tfm, bio, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001698 if (memcmp(dig_in, dig_vv, dgs)) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001699 drbd_err(peer_device, "Digest integrity check FAILED. Broken NICs?\n");
Andreas Gruenbacher28284ce2011-03-16 17:54:02 +01001700 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001701 }
1702 }
1703
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001704 D_ASSERT(peer_device->device, data_size == 0);
Andreas Gruenbacher28284ce2011-03-16 17:54:02 +01001705 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001706}
1707
Andreas Gruenbachera990be42011-04-06 17:56:48 +02001708/*
1709 * e_end_resync_block() is called in asender context via
1710 * drbd_finish_peer_reqs().
1711 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001712static int e_end_resync_block(struct drbd_work *w, int unused)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001713{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001714 struct drbd_peer_request *peer_req =
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001715 container_of(w, struct drbd_peer_request, w);
1716 struct drbd_peer_device *peer_device = peer_req->peer_device;
1717 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001718 sector_t sector = peer_req->i.sector;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001719 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001720
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001721 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001722
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001723 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001724 drbd_set_in_sync(device, sector, peer_req->i.size);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001725 err = drbd_send_ack(peer_device, P_RS_WRITE_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001726 } else {
1727 /* Record failure to sync */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001728 drbd_rs_failed_io(device, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001729
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001730 err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001731 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001732 dec_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001733
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001734 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001735}
1736
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001737static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t sector,
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001738 struct packet_info *pi) __releases(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001739{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001740 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001741 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001742
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001743 peer_req = read_in_block(peer_device, ID_SYNCER, sector, pi);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001744 if (!peer_req)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001745 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001746
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001747 dec_rs_pending(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001748
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001749 inc_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001750 /* corresponding dec_unacked() in e_end_resync_block()
1751 * respective _drbd_clear_done_ee */
1752
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001753 peer_req->w.cb = e_end_resync_block;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001754
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001755 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001756 list_add(&peer_req->w.list, &device->sync_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001757 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001758
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001759 atomic_add(pi->size >> 9, &device->rs_sect_ev);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001760 if (drbd_submit_peer_request(device, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0)
Andreas Gruenbachere1c1b0f2011-03-16 17:58:27 +01001761 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001762
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001763 /* don't care for the reason here */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001764 drbd_err(device, "submit failed, triggering re-connect\n");
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001765 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001766 list_del(&peer_req->w.list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001767 spin_unlock_irq(&device->resource->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02001768
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001769 drbd_free_peer_req(device, peer_req);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001770fail:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001771 put_ldev(device);
Andreas Gruenbachere1c1b0f2011-03-16 17:58:27 +01001772 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001773}
1774
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001775static struct drbd_request *
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001776find_request(struct drbd_device *device, struct rb_root *root, u64 id,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001777 sector_t sector, bool missing_ok, const char *func)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001778{
1779 struct drbd_request *req;
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001780
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001781 /* Request object according to our peer */
1782 req = (struct drbd_request *)(unsigned long)id;
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001783 if (drbd_contains_interval(root, sector, &req->i) && req->i.local)
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001784 return req;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001785 if (!missing_ok) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001786 drbd_err(device, "%s: failed to find request 0x%lx, sector %llus\n", func,
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001787 (unsigned long)id, (unsigned long long)sector);
1788 }
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001789 return NULL;
1790}
1791
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001792static int receive_DataReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001793{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001794 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001795 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001796 struct drbd_request *req;
1797 sector_t sector;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001798 int err;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001799 struct p_data *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001800
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001801 peer_device = conn_peer_device(connection, pi->vnr);
1802 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001803 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001804 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001805
1806 sector = be64_to_cpu(p->sector);
1807
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001808 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001809 req = find_request(device, &device->read_requests, p->block_id, sector, false, __func__);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001810 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001811 if (unlikely(!req))
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001812 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001813
Bart Van Assche24c48302011-05-21 18:32:29 +02001814 /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid
Philipp Reisnerb411b362009-09-25 16:07:19 -07001815 * special casing it there for the various failure cases.
1816 * still no race with drbd_fail_pending_reads */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001817 err = recv_dless_read(peer_device, req, sector, pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001818 if (!err)
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001819 req_mod(req, DATA_RECEIVED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001820 /* else: nothing. handled from drbd_disconnect...
1821 * I don't think we may complete this just yet
1822 * in case we are "on-disconnect: freeze" */
1823
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001824 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001825}
1826
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001827static int receive_RSDataReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001828{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001829 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001830 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001831 sector_t sector;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001832 int err;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001833 struct p_data *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001834
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001835 peer_device = conn_peer_device(connection, pi->vnr);
1836 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001837 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001838 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001839
1840 sector = be64_to_cpu(p->sector);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001841 D_ASSERT(device, p->block_id == ID_SYNCER);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001842
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001843 if (get_ldev(device)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001844 /* data is submitted to disk within recv_resync_read.
1845 * corresponding put_ldev done below on error,
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001846 * or in drbd_peer_request_endio. */
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001847 err = recv_resync_read(peer_device, sector, pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001848 } else {
1849 if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001850 drbd_err(device, "Can not write resync data to local disk.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001851
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001852 err = drbd_drain_block(peer_device, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001853
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001854 drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001855 }
1856
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001857 atomic_add(pi->size >> 9, &device->rs_sect_in);
Philipp Reisner778f2712010-07-06 11:14:00 +02001858
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001859 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001860}
1861
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001862static void restart_conflicting_writes(struct drbd_device *device,
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001863 sector_t sector, int size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001864{
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001865 struct drbd_interval *i;
1866 struct drbd_request *req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001867
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001868 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001869 if (!i->local)
1870 continue;
1871 req = container_of(i, struct drbd_request, i);
1872 if (req->rq_state & RQ_LOCAL_PENDING ||
1873 !(req->rq_state & RQ_POSTPONED))
1874 continue;
Lars Ellenberg2312f0b32011-11-24 10:36:25 +01001875 /* as it is RQ_POSTPONED, this will cause it to
1876 * be queued on the retry workqueue. */
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02001877 __req_mod(req, CONFLICT_RESOLVED, NULL);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001878 }
1879}
1880
Andreas Gruenbachera990be42011-04-06 17:56:48 +02001881/*
1882 * e_end_block() is called in asender context via drbd_finish_peer_reqs().
Philipp Reisnerb411b362009-09-25 16:07:19 -07001883 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001884static int e_end_block(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001885{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001886 struct drbd_peer_request *peer_req =
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001887 container_of(w, struct drbd_peer_request, w);
1888 struct drbd_peer_device *peer_device = peer_req->peer_device;
1889 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001890 sector_t sector = peer_req->i.sector;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001891 int err = 0, pcmd;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001892
Philipp Reisner303d1442011-04-13 16:24:47 -07001893 if (peer_req->flags & EE_SEND_WRITE_ACK) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001894 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001895 pcmd = (device->state.conn >= C_SYNC_SOURCE &&
1896 device->state.conn <= C_PAUSED_SYNC_T &&
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001897 peer_req->flags & EE_MAY_SET_IN_SYNC) ?
Philipp Reisnerb411b362009-09-25 16:07:19 -07001898 P_RS_WRITE_ACK : P_WRITE_ACK;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001899 err = drbd_send_ack(peer_device, pcmd, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001900 if (pcmd == P_RS_WRITE_ACK)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001901 drbd_set_in_sync(device, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001902 } else {
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001903 err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001904 /* we expect it to be marked out of sync anyways...
1905 * maybe assert this? */
1906 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001907 dec_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001908 }
1909 /* we delete from the conflict detection hash _after_ we sent out the
1910 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */
Philipp Reisner302bdea2011-04-21 11:36:49 +02001911 if (peer_req->flags & EE_IN_INTERVAL_TREE) {
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001912 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001913 D_ASSERT(device, !drbd_interval_empty(&peer_req->i));
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001914 drbd_remove_epoch_entry_interval(device, peer_req);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001915 if (peer_req->flags & EE_RESTART_REQUESTS)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001916 restart_conflicting_writes(device, sector, peer_req->i.size);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001917 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +01001918 } else
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001919 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001920
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001921 drbd_may_finish_epoch(first_peer_device(device)->connection, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001922
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001923 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001924}
1925
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001926static int e_send_ack(struct drbd_work *w, enum drbd_packet ack)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001927{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001928 struct drbd_peer_request *peer_req =
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001929 container_of(w, struct drbd_peer_request, w);
1930 struct drbd_peer_device *peer_device = peer_req->peer_device;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001931 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001932
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001933 err = drbd_send_ack(peer_device, ack, peer_req);
1934 dec_unacked(peer_device->device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001935
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001936 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001937}
1938
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02001939static int e_send_superseded(struct drbd_work *w, int unused)
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001940{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001941 return e_send_ack(w, P_SUPERSEDED);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001942}
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001943
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001944static int e_send_retry_write(struct drbd_work *w, int unused)
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001945{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001946 struct drbd_peer_request *peer_req =
1947 container_of(w, struct drbd_peer_request, w);
1948 struct drbd_connection *connection = peer_req->peer_device->connection;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001949
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001950 return e_send_ack(w, connection->agreed_pro_version >= 100 ?
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02001951 P_RETRY_WRITE : P_SUPERSEDED);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001952}
1953
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001954static bool seq_greater(u32 a, u32 b)
1955{
1956 /*
1957 * We assume 32-bit wrap-around here.
1958 * For 24-bit wrap-around, we would have to shift:
1959 * a <<= 8; b <<= 8;
1960 */
1961 return (s32)a - (s32)b > 0;
1962}
1963
1964static u32 seq_max(u32 a, u32 b)
1965{
1966 return seq_greater(a, b) ? a : b;
1967}
1968
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001969static void update_peer_seq(struct drbd_peer_device *peer_device, unsigned int peer_seq)
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001970{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001971 struct drbd_device *device = peer_device->device;
Lars Ellenberg3c13b682011-02-23 16:10:01 +01001972 unsigned int newest_peer_seq;
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001973
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001974 if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001975 spin_lock(&device->peer_seq_lock);
1976 newest_peer_seq = seq_max(device->peer_seq, peer_seq);
1977 device->peer_seq = newest_peer_seq;
1978 spin_unlock(&device->peer_seq_lock);
1979 /* wake up only if we actually changed device->peer_seq */
Lars Ellenberg3c13b682011-02-23 16:10:01 +01001980 if (peer_seq == newest_peer_seq)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001981 wake_up(&device->seq_wait);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001982 }
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001983}
1984
Lars Ellenbergd93f6302012-03-26 15:49:13 +02001985static inline int overlaps(sector_t s1, int l1, sector_t s2, int l2)
1986{
1987 return !((s1 + (l1>>9) <= s2) || (s1 >= s2 + (l2>>9)));
1988}
1989
1990/* maybe change sync_ee into interval trees as well? */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001991static bool overlapping_resync_write(struct drbd_device *device, struct drbd_peer_request *peer_req)
Lars Ellenbergd93f6302012-03-26 15:49:13 +02001992{
1993 struct drbd_peer_request *rs_req;
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001994 bool rv = 0;
1995
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001996 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001997 list_for_each_entry(rs_req, &device->sync_ee, w.list) {
Lars Ellenbergd93f6302012-03-26 15:49:13 +02001998 if (overlaps(peer_req->i.sector, peer_req->i.size,
1999 rs_req->i.sector, rs_req->i.size)) {
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01002000 rv = 1;
2001 break;
2002 }
2003 }
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002004 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01002005
2006 return rv;
2007}
2008
Philipp Reisnerb411b362009-09-25 16:07:19 -07002009/* Called from receive_Data.
2010 * Synchronize packets on sock with packets on msock.
2011 *
2012 * This is here so even when a P_DATA packet traveling via sock overtook an Ack
2013 * packet traveling on msock, they are still processed in the order they have
2014 * been sent.
2015 *
2016 * Note: we don't care for Ack packets overtaking P_DATA packets.
2017 *
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002018 * In case packet_seq is larger than device->peer_seq number, there are
Philipp Reisnerb411b362009-09-25 16:07:19 -07002019 * outstanding packets on the msock. We wait for them to arrive.
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002020 * In case we are the logically next packet, we update device->peer_seq
Philipp Reisnerb411b362009-09-25 16:07:19 -07002021 * ourselves. Correctly handles 32bit wrap around.
2022 *
2023 * Assume we have a 10 GBit connection, that is about 1<<30 byte per second,
2024 * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds
2025 * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have
2026 * 1<<9 == 512 seconds aka ages for the 32bit wrap around...
2027 *
2028 * returns 0 if we may process the packet,
2029 * -ERESTARTSYS if we were interrupted (by disconnect signal). */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002030static int wait_for_and_update_peer_seq(struct drbd_peer_device *peer_device, const u32 peer_seq)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002031{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002032 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002033 DEFINE_WAIT(wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002034 long timeout;
Philipp Reisnerb874d232013-10-23 10:59:16 +02002035 int ret = 0, tp;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002036
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002037 if (!test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags))
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002038 return 0;
2039
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002040 spin_lock(&device->peer_seq_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002041 for (;;) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002042 if (!seq_greater(peer_seq - 1, device->peer_seq)) {
2043 device->peer_seq = seq_max(device->peer_seq, peer_seq);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002044 break;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002045 }
Philipp Reisnerb874d232013-10-23 10:59:16 +02002046
Philipp Reisnerb411b362009-09-25 16:07:19 -07002047 if (signal_pending(current)) {
2048 ret = -ERESTARTSYS;
2049 break;
2050 }
Philipp Reisnerb874d232013-10-23 10:59:16 +02002051
2052 rcu_read_lock();
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002053 tp = rcu_dereference(first_peer_device(device)->connection->net_conf)->two_primaries;
Philipp Reisnerb874d232013-10-23 10:59:16 +02002054 rcu_read_unlock();
2055
2056 if (!tp)
2057 break;
2058
2059 /* Only need to wait if two_primaries is enabled */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002060 prepare_to_wait(&device->seq_wait, &wait, TASK_INTERRUPTIBLE);
2061 spin_unlock(&device->peer_seq_lock);
Philipp Reisner44ed1672011-04-19 17:10:19 +02002062 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002063 timeout = rcu_dereference(peer_device->connection->net_conf)->ping_timeo*HZ/10;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002064 rcu_read_unlock();
Andreas Gruenbacher71b1c1e2011-03-01 15:40:43 +01002065 timeout = schedule_timeout(timeout);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002066 spin_lock(&device->peer_seq_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002067 if (!timeout) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002068 ret = -ETIMEDOUT;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002069 drbd_err(device, "Timed out waiting for missing ack packets; disconnecting\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002070 break;
2071 }
2072 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002073 spin_unlock(&device->peer_seq_lock);
2074 finish_wait(&device->seq_wait, &wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002075 return ret;
2076}
2077
Lars Ellenberg688593c2010-11-17 22:25:03 +01002078/* see also bio_flags_to_wire()
2079 * DRBD_REQ_*, because we need to semantically map the flags to data packet
2080 * flags and back. We may replicate to other kernel versions. */
Andreas Gruenbacher81f0ffd2011-08-30 16:22:33 +02002081static unsigned long wire_flags_to_bio(u32 dpf)
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02002082{
Lars Ellenberg688593c2010-11-17 22:25:03 +01002083 return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
2084 (dpf & DP_FUA ? REQ_FUA : 0) |
2085 (dpf & DP_FLUSH ? REQ_FLUSH : 0) |
2086 (dpf & DP_DISCARD ? REQ_DISCARD : 0);
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02002087}
2088
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002089static void fail_postponed_requests(struct drbd_device *device, sector_t sector,
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002090 unsigned int size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002091{
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002092 struct drbd_interval *i;
2093
2094 repeat:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002095 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002096 struct drbd_request *req;
2097 struct bio_and_error m;
2098
2099 if (!i->local)
2100 continue;
2101 req = container_of(i, struct drbd_request, i);
2102 if (!(req->rq_state & RQ_POSTPONED))
2103 continue;
2104 req->rq_state &= ~RQ_POSTPONED;
2105 __req_mod(req, NEG_ACKED, &m);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002106 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002107 if (m.bio)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002108 complete_master_bio(device, &m);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002109 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002110 goto repeat;
2111 }
2112}
2113
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002114static int handle_write_conflicts(struct drbd_device *device,
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002115 struct drbd_peer_request *peer_req)
2116{
Andreas Gruenbachere33b32d2011-08-30 15:38:04 +02002117 struct drbd_connection *connection = peer_req->peer_device->connection;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002118 bool resolve_conflicts = test_bit(RESOLVE_CONFLICTS, &connection->flags);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002119 sector_t sector = peer_req->i.sector;
2120 const unsigned int size = peer_req->i.size;
2121 struct drbd_interval *i;
2122 bool equal;
2123 int err;
2124
2125 /*
2126 * Inserting the peer request into the write_requests tree will prevent
2127 * new conflicting local requests from being added.
2128 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002129 drbd_insert_interval(&device->write_requests, &peer_req->i);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002130
2131 repeat:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002132 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002133 if (i == &peer_req->i)
2134 continue;
2135
2136 if (!i->local) {
2137 /*
2138 * Our peer has sent a conflicting remote request; this
2139 * should not happen in a two-node setup. Wait for the
2140 * earlier peer request to complete.
2141 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002142 err = drbd_wait_misc(device, i);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002143 if (err)
2144 goto out;
2145 goto repeat;
2146 }
2147
2148 equal = i->sector == sector && i->size == size;
2149 if (resolve_conflicts) {
2150 /*
2151 * If the peer request is fully contained within the
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002152 * overlapping request, it can be considered overwritten
2153 * and thus superseded; otherwise, it will be retried
2154 * once all overlapping requests have completed.
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002155 */
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002156 bool superseded = i->sector <= sector && i->sector +
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002157 (i->size >> 9) >= sector + (size >> 9);
2158
2159 if (!equal)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002160 drbd_alert(device, "Concurrent writes detected: "
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002161 "local=%llus +%u, remote=%llus +%u, "
2162 "assuming %s came first\n",
2163 (unsigned long long)i->sector, i->size,
2164 (unsigned long long)sector, size,
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002165 superseded ? "local" : "remote");
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002166
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002167 inc_unacked(device);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002168 peer_req->w.cb = superseded ? e_send_superseded :
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002169 e_send_retry_write;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002170 list_add_tail(&peer_req->w.list, &device->done_ee);
Andreas Gruenbachere33b32d2011-08-30 15:38:04 +02002171 wake_asender(connection);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002172
2173 err = -ENOENT;
2174 goto out;
2175 } else {
2176 struct drbd_request *req =
2177 container_of(i, struct drbd_request, i);
2178
2179 if (!equal)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002180 drbd_alert(device, "Concurrent writes detected: "
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002181 "local=%llus +%u, remote=%llus +%u\n",
2182 (unsigned long long)i->sector, i->size,
2183 (unsigned long long)sector, size);
2184
2185 if (req->rq_state & RQ_LOCAL_PENDING ||
2186 !(req->rq_state & RQ_POSTPONED)) {
2187 /*
2188 * Wait for the node with the discard flag to
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002189 * decide if this request has been superseded
2190 * or needs to be retried.
2191 * Requests that have been superseded will
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002192 * disappear from the write_requests tree.
2193 *
2194 * In addition, wait for the conflicting
2195 * request to finish locally before submitting
2196 * the conflicting peer request.
2197 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002198 err = drbd_wait_misc(device, &req->i);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002199 if (err) {
Andreas Gruenbachere33b32d2011-08-30 15:38:04 +02002200 _conn_request_state(connection, NS(conn, C_TIMEOUT), CS_HARD);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002201 fail_postponed_requests(device, sector, size);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002202 goto out;
2203 }
2204 goto repeat;
2205 }
2206 /*
2207 * Remember to restart the conflicting requests after
2208 * the new peer request has completed.
2209 */
2210 peer_req->flags |= EE_RESTART_REQUESTS;
2211 }
2212 }
2213 err = 0;
2214
2215 out:
2216 if (err)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002217 drbd_remove_epoch_entry_interval(device, peer_req);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002218 return err;
2219}
2220
Philipp Reisnerb411b362009-09-25 16:07:19 -07002221/* mirrored write */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002222static int receive_Data(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002223{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002224 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002225 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002226 sector_t sector;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002227 struct drbd_peer_request *peer_req;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02002228 struct p_data *p = pi->data;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002229 u32 peer_seq = be32_to_cpu(p->seq_num);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002230 int rw = WRITE;
2231 u32 dp_flags;
Philipp Reisner302bdea2011-04-21 11:36:49 +02002232 int err, tp;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002233
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002234 peer_device = conn_peer_device(connection, pi->vnr);
2235 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002236 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002237 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002238
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002239 if (!get_ldev(device)) {
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002240 int err2;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002241
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002242 err = wait_for_and_update_peer_seq(peer_device, peer_seq);
2243 drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002244 atomic_inc(&connection->current_epoch->epoch_size);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002245 err2 = drbd_drain_block(peer_device, pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002246 if (!err)
2247 err = err2;
2248 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002249 }
2250
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01002251 /*
2252 * Corresponding put_ldev done either below (on various errors), or in
2253 * drbd_peer_request_endio, if we successfully submit the data at the
2254 * end of this function.
2255 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002256
2257 sector = be64_to_cpu(p->sector);
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02002258 peer_req = read_in_block(peer_device, p->block_id, sector, pi);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002259 if (!peer_req) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002260 put_ldev(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002261 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002262 }
2263
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002264 peer_req->w.cb = e_end_block;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002265
Lars Ellenberg688593c2010-11-17 22:25:03 +01002266 dp_flags = be32_to_cpu(p->dp_flags);
Andreas Gruenbacher81f0ffd2011-08-30 16:22:33 +02002267 rw |= wire_flags_to_bio(dp_flags);
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02002268 if (pi->cmd == P_TRIM) {
2269 struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev);
2270 peer_req->flags |= EE_IS_TRIM;
2271 if (!blk_queue_discard(q))
2272 peer_req->flags |= EE_IS_TRIM_USE_ZEROOUT;
2273 D_ASSERT(peer_device, peer_req->i.size > 0);
2274 D_ASSERT(peer_device, rw & REQ_DISCARD);
2275 D_ASSERT(peer_device, peer_req->pages == NULL);
2276 } else if (peer_req->pages == NULL) {
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02002277 D_ASSERT(device, peer_req->i.size == 0);
2278 D_ASSERT(device, dp_flags & DP_FLUSH);
Lars Ellenberga73ff322012-06-25 19:15:38 +02002279 }
Lars Ellenberg688593c2010-11-17 22:25:03 +01002280
2281 if (dp_flags & DP_MAY_SET_IN_SYNC)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002282 peer_req->flags |= EE_MAY_SET_IN_SYNC;
Lars Ellenberg688593c2010-11-17 22:25:03 +01002283
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002284 spin_lock(&connection->epoch_lock);
2285 peer_req->epoch = connection->current_epoch;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002286 atomic_inc(&peer_req->epoch->epoch_size);
2287 atomic_inc(&peer_req->epoch->active);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002288 spin_unlock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002289
Philipp Reisner302bdea2011-04-21 11:36:49 +02002290 rcu_read_lock();
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002291 tp = rcu_dereference(peer_device->connection->net_conf)->two_primaries;
Philipp Reisner302bdea2011-04-21 11:36:49 +02002292 rcu_read_unlock();
2293 if (tp) {
2294 peer_req->flags |= EE_IN_INTERVAL_TREE;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002295 err = wait_for_and_update_peer_seq(peer_device, peer_seq);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002296 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002297 goto out_interrupted;
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002298 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002299 err = handle_write_conflicts(device, peer_req);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002300 if (err) {
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002301 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002302 if (err == -ENOENT) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002303 put_ldev(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002304 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002305 }
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002306 goto out_interrupted;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002307 }
Philipp Reisnerb874d232013-10-23 10:59:16 +02002308 } else {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002309 update_peer_seq(peer_device, peer_seq);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002310 spin_lock_irq(&device->resource->req_lock);
Philipp Reisnerb874d232013-10-23 10:59:16 +02002311 }
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02002312 /* if we use the zeroout fallback code, we process synchronously
2313 * and we wait for all pending requests, respectively wait for
2314 * active_ee to become empty in drbd_submit_peer_request();
2315 * better not add ourselves here. */
2316 if ((peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) == 0)
2317 list_add(&peer_req->w.list, &device->active_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002318 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002319
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002320 if (device->state.conn == C_SYNC_TARGET)
2321 wait_event(device->ee_wait, !overlapping_resync_write(device, peer_req));
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01002322
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002323 if (peer_device->connection->agreed_pro_version < 100) {
Philipp Reisner44ed1672011-04-19 17:10:19 +02002324 rcu_read_lock();
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002325 switch (rcu_dereference(peer_device->connection->net_conf)->wire_protocol) {
Philipp Reisner303d1442011-04-13 16:24:47 -07002326 case DRBD_PROT_C:
2327 dp_flags |= DP_SEND_WRITE_ACK;
2328 break;
2329 case DRBD_PROT_B:
2330 dp_flags |= DP_SEND_RECEIVE_ACK;
2331 break;
2332 }
Philipp Reisner44ed1672011-04-19 17:10:19 +02002333 rcu_read_unlock();
Philipp Reisner303d1442011-04-13 16:24:47 -07002334 }
2335
2336 if (dp_flags & DP_SEND_WRITE_ACK) {
2337 peer_req->flags |= EE_SEND_WRITE_ACK;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002338 inc_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002339 /* corresponding dec_unacked() in e_end_block()
2340 * respective _drbd_clear_done_ee */
Philipp Reisner303d1442011-04-13 16:24:47 -07002341 }
2342
2343 if (dp_flags & DP_SEND_RECEIVE_ACK) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002344 /* I really don't like it that the receiver thread
2345 * sends on the msock, but anyways */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002346 drbd_send_ack(first_peer_device(device), P_RECV_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002347 }
2348
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002349 if (device->state.pdsk < D_INCONSISTENT) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002350 /* In case we have the only disk of the cluster, */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002351 drbd_set_out_of_sync(device, peer_req->i.sector, peer_req->i.size);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002352 peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
2353 peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
Lars Ellenberg4dd726f2014-02-11 11:15:36 +01002354 drbd_al_begin_io(device, &peer_req->i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002355 }
2356
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002357 err = drbd_submit_peer_request(device, peer_req, rw, DRBD_FAULT_DT_WR);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002358 if (!err)
2359 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002360
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002361 /* don't care for the reason here */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002362 drbd_err(device, "submit failed, triggering re-connect\n");
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002363 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002364 list_del(&peer_req->w.list);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002365 drbd_remove_epoch_entry_interval(device, peer_req);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002366 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002367 if (peer_req->flags & EE_CALL_AL_COMPLETE_IO)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002368 drbd_al_complete_io(device, &peer_req->i);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002369
Philipp Reisnerb411b362009-09-25 16:07:19 -07002370out_interrupted:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002371 drbd_may_finish_epoch(connection, peer_req->epoch, EV_PUT + EV_CLEANUP);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002372 put_ldev(device);
2373 drbd_free_peer_req(device, peer_req);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002374 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002375}
2376
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002377/* We may throttle resync, if the lower device seems to be busy,
2378 * and current sync rate is above c_min_rate.
2379 *
2380 * To decide whether or not the lower device is busy, we use a scheme similar
2381 * to MD RAID is_mddev_idle(): if the partition stats reveal "significant"
2382 * (more than 64 sectors) of activity we cannot account for with our own resync
2383 * activity, it obviously is "busy".
2384 *
2385 * The current sync rate used here uses only the most recent two step marks,
2386 * to have a short time average so we can react faster.
2387 */
Lars Ellenberge8299872014-04-28 18:43:19 +02002388bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector)
2389{
2390 struct lc_element *tmp;
2391 bool throttle = true;
2392
2393 if (!drbd_rs_c_min_rate_throttle(device))
2394 return false;
2395
2396 spin_lock_irq(&device->al_lock);
2397 tmp = lc_find(device->resync, BM_SECT_TO_EXT(sector));
2398 if (tmp) {
2399 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
2400 if (test_bit(BME_PRIORITY, &bm_ext->flags))
2401 throttle = false;
2402 /* Do not slow down if app IO is already waiting for this extent */
2403 }
2404 spin_unlock_irq(&device->al_lock);
2405
2406 return throttle;
2407}
2408
2409bool drbd_rs_c_min_rate_throttle(struct drbd_device *device)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002410{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002411 struct gendisk *disk = device->ldev->backing_bdev->bd_contains->bd_disk;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002412 unsigned long db, dt, dbdt;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002413 unsigned int c_min_rate;
Lars Ellenberge8299872014-04-28 18:43:19 +02002414 int curr_events;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002415
2416 rcu_read_lock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002417 c_min_rate = rcu_dereference(device->ldev->disk_conf)->c_min_rate;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002418 rcu_read_unlock();
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002419
2420 /* feature disabled? */
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002421 if (c_min_rate == 0)
Lars Ellenberge8299872014-04-28 18:43:19 +02002422 return false;
Philipp Reisnere3555d82010-11-07 15:56:29 +01002423
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002424 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
2425 (int)part_stat_read(&disk->part0, sectors[1]) -
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002426 atomic_read(&device->rs_sect_ev);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002427 if (!device->rs_last_events || curr_events - device->rs_last_events > 64) {
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002428 unsigned long rs_left;
2429 int i;
2430
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002431 device->rs_last_events = curr_events;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002432
2433 /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
2434 * approx. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002435 i = (device->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
Lars Ellenberg2649f082010-11-05 10:05:47 +01002436
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002437 if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T)
2438 rs_left = device->ov_left;
Lars Ellenberg2649f082010-11-05 10:05:47 +01002439 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002440 rs_left = drbd_bm_total_weight(device) - device->rs_failed;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002441
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002442 dt = ((long)jiffies - (long)device->rs_mark_time[i]) / HZ;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002443 if (!dt)
2444 dt++;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002445 db = device->rs_mark_left[i] - rs_left;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002446 dbdt = Bit2KB(db/dt);
2447
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002448 if (dbdt > c_min_rate)
Lars Ellenberge8299872014-04-28 18:43:19 +02002449 return true;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002450 }
Lars Ellenberge8299872014-04-28 18:43:19 +02002451 return false;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002452}
2453
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002454static int receive_DataRequest(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002455{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002456 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002457 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002458 sector_t sector;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002459 sector_t capacity;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002460 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002461 struct digest_info *di = NULL;
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002462 int size, verb;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002463 unsigned int fault_type;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02002464 struct p_block_req *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002465
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002466 peer_device = conn_peer_device(connection, pi->vnr);
2467 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002468 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002469 device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002470 capacity = drbd_get_capacity(device->this_bdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002471
2472 sector = be64_to_cpu(p->sector);
2473 size = be32_to_cpu(p->blksize);
2474
Andreas Gruenbacherc670a392011-02-21 12:41:39 +01002475 if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002476 drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
Philipp Reisnerb411b362009-09-25 16:07:19 -07002477 (unsigned long long)sector, size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002478 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002479 }
2480 if (sector + (size>>9) > capacity) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002481 drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
Philipp Reisnerb411b362009-09-25 16:07:19 -07002482 (unsigned long long)sector, size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002483 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002484 }
2485
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002486 if (!get_ldev_if_state(device, D_UP_TO_DATE)) {
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002487 verb = 1;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002488 switch (pi->cmd) {
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002489 case P_DATA_REQUEST:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002490 drbd_send_ack_rp(peer_device, P_NEG_DREPLY, p);
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002491 break;
2492 case P_RS_DATA_REQUEST:
2493 case P_CSUM_RS_REQUEST:
2494 case P_OV_REQUEST:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002495 drbd_send_ack_rp(peer_device, P_NEG_RS_DREPLY , p);
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002496 break;
2497 case P_OV_REPLY:
2498 verb = 0;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002499 dec_rs_pending(device);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002500 drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size, ID_IN_SYNC);
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002501 break;
2502 default:
Andreas Gruenbacher49ba9b12011-03-25 00:35:45 +01002503 BUG();
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002504 }
2505 if (verb && __ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002506 drbd_err(device, "Can not satisfy peer's read request, "
Philipp Reisnerb411b362009-09-25 16:07:19 -07002507 "no local data.\n");
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002508
Lars Ellenberga821cc42010-09-06 12:31:37 +02002509 /* drain possibly payload */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002510 return drbd_drain_block(peer_device, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002511 }
2512
2513 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
2514 * "criss-cross" setup, that might cause write-out on some other DRBD,
2515 * which in turn might block on the other node at this very place. */
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02002516 peer_req = drbd_alloc_peer_req(peer_device, p->block_id, sector, size,
2517 true /* has real payload */, GFP_NOIO);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002518 if (!peer_req) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002519 put_ldev(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002520 return -ENOMEM;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002521 }
2522
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002523 switch (pi->cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002524 case P_DATA_REQUEST:
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002525 peer_req->w.cb = w_e_end_data_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002526 fault_type = DRBD_FAULT_DT_RD;
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002527 /* application IO, don't drbd_rs_begin_io */
2528 goto submit;
2529
Philipp Reisnerb411b362009-09-25 16:07:19 -07002530 case P_RS_DATA_REQUEST:
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002531 peer_req->w.cb = w_e_end_rsdata_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002532 fault_type = DRBD_FAULT_RS_RD;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002533 /* used in the sector offset progress display */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002534 device->bm_resync_fo = BM_SECT_TO_BIT(sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002535 break;
2536
2537 case P_OV_REPLY:
2538 case P_CSUM_RS_REQUEST:
2539 fault_type = DRBD_FAULT_RS_RD;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002540 di = kmalloc(sizeof(*di) + pi->size, GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002541 if (!di)
2542 goto out_free_e;
2543
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002544 di->digest_size = pi->size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002545 di->digest = (((char *)di)+sizeof(struct digest_info));
2546
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002547 peer_req->digest = di;
2548 peer_req->flags |= EE_HAS_DIGEST;
Lars Ellenbergc36c3ce2010-08-11 20:42:55 +02002549
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002550 if (drbd_recv_all(peer_device->connection, di->digest, pi->size))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002551 goto out_free_e;
2552
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002553 if (pi->cmd == P_CSUM_RS_REQUEST) {
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002554 D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002555 peer_req->w.cb = w_e_end_csum_rs_req;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002556 /* used in the sector offset progress display */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002557 device->bm_resync_fo = BM_SECT_TO_BIT(sector);
Lars Ellenbergaaaba342014-03-18 12:30:09 +01002558 /* remember to report stats in drbd_resync_finished */
2559 device->use_csums = true;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002560 } else if (pi->cmd == P_OV_REPLY) {
Lars Ellenberg2649f082010-11-05 10:05:47 +01002561 /* track progress, we may need to throttle */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002562 atomic_add(size >> 9, &device->rs_sect_in);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002563 peer_req->w.cb = w_e_end_ov_reply;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002564 dec_rs_pending(device);
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002565 /* drbd_rs_begin_io done when we sent this request,
2566 * but accounting still needs to be done. */
2567 goto submit_for_resync;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002568 }
2569 break;
2570
2571 case P_OV_REQUEST:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002572 if (device->ov_start_sector == ~(sector_t)0 &&
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002573 peer_device->connection->agreed_pro_version >= 90) {
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002574 unsigned long now = jiffies;
2575 int i;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002576 device->ov_start_sector = sector;
2577 device->ov_position = sector;
2578 device->ov_left = drbd_bm_bits(device) - BM_SECT_TO_BIT(sector);
2579 device->rs_total = device->ov_left;
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002580 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002581 device->rs_mark_left[i] = device->ov_left;
2582 device->rs_mark_time[i] = now;
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002583 }
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002584 drbd_info(device, "Online Verify start sector: %llu\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07002585 (unsigned long long)sector);
2586 }
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002587 peer_req->w.cb = w_e_end_ov_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002588 fault_type = DRBD_FAULT_RS_RD;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002589 break;
2590
Philipp Reisnerb411b362009-09-25 16:07:19 -07002591 default:
Andreas Gruenbacher49ba9b12011-03-25 00:35:45 +01002592 BUG();
Philipp Reisnerb411b362009-09-25 16:07:19 -07002593 }
2594
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002595 /* Throttle, drbd_rs_begin_io and submit should become asynchronous
2596 * wrt the receiver, but it is not as straightforward as it may seem.
2597 * Various places in the resync start and stop logic assume resync
2598 * requests are processed in order, requeuing this on the worker thread
2599 * introduces a bunch of new code for synchronization between threads.
2600 *
2601 * Unlimited throttling before drbd_rs_begin_io may stall the resync
2602 * "forever", throttling after drbd_rs_begin_io will lock that extent
2603 * for application writes for the same time. For now, just throttle
2604 * here, where the rest of the code expects the receiver to sleep for
2605 * a while, anyways.
2606 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002607
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002608 /* Throttle before drbd_rs_begin_io, as that locks out application IO;
2609 * this defers syncer requests for some time, before letting at least
2610 * on request through. The resync controller on the receiving side
2611 * will adapt to the incoming rate accordingly.
2612 *
2613 * We cannot throttle here if remote is Primary/SyncTarget:
2614 * we would also throttle its application reads.
2615 * In that case, throttling is done on the SyncTarget only.
2616 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002617 if (device->state.peer != R_PRIMARY && drbd_rs_should_slow_down(device, sector))
Philipp Reisnere3555d82010-11-07 15:56:29 +01002618 schedule_timeout_uninterruptible(HZ/10);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002619 if (drbd_rs_begin_io(device, sector))
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002620 goto out_free_e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002621
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002622submit_for_resync:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002623 atomic_add(size >> 9, &device->rs_sect_ev);
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002624
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002625submit:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002626 inc_unacked(device);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002627 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002628 list_add_tail(&peer_req->w.list, &device->read_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002629 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002630
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002631 if (drbd_submit_peer_request(device, peer_req, READ, fault_type) == 0)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002632 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002633
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002634 /* don't care for the reason here */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002635 drbd_err(device, "submit failed, triggering re-connect\n");
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002636 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002637 list_del(&peer_req->w.list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002638 spin_unlock_irq(&device->resource->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002639 /* no drbd_rs_complete_io(), we are dropping the connection anyways */
2640
Philipp Reisnerb411b362009-09-25 16:07:19 -07002641out_free_e:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002642 put_ldev(device);
2643 drbd_free_peer_req(device, peer_req);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002644 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002645}
2646
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002647/**
2648 * drbd_asb_recover_0p - Recover after split-brain with no remaining primaries
2649 */
2650static int drbd_asb_recover_0p(struct drbd_peer_device *peer_device) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002651{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002652 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002653 int self, peer, rv = -100;
2654 unsigned long ch_self, ch_peer;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002655 enum drbd_after_sb_p after_sb_0p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002656
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002657 self = device->ldev->md.uuid[UI_BITMAP] & 1;
2658 peer = device->p_uuid[UI_BITMAP] & 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002659
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002660 ch_peer = device->p_uuid[UI_SIZE];
2661 ch_self = device->comm_bm_set;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002662
Philipp Reisner44ed1672011-04-19 17:10:19 +02002663 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002664 after_sb_0p = rcu_dereference(peer_device->connection->net_conf)->after_sb_0p;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002665 rcu_read_unlock();
2666 switch (after_sb_0p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002667 case ASB_CONSENSUS:
2668 case ASB_DISCARD_SECONDARY:
2669 case ASB_CALL_HELPER:
Philipp Reisner44ed1672011-04-19 17:10:19 +02002670 case ASB_VIOLENTLY:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002671 drbd_err(device, "Configuration error.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002672 break;
2673 case ASB_DISCONNECT:
2674 break;
2675 case ASB_DISCARD_YOUNGER_PRI:
2676 if (self == 0 && peer == 1) {
2677 rv = -1;
2678 break;
2679 }
2680 if (self == 1 && peer == 0) {
2681 rv = 1;
2682 break;
2683 }
2684 /* Else fall through to one of the other strategies... */
2685 case ASB_DISCARD_OLDER_PRI:
2686 if (self == 0 && peer == 1) {
2687 rv = 1;
2688 break;
2689 }
2690 if (self == 1 && peer == 0) {
2691 rv = -1;
2692 break;
2693 }
2694 /* Else fall through to one of the other strategies... */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002695 drbd_warn(device, "Discard younger/older primary did not find a decision\n"
Philipp Reisnerb411b362009-09-25 16:07:19 -07002696 "Using discard-least-changes instead\n");
2697 case ASB_DISCARD_ZERO_CHG:
2698 if (ch_peer == 0 && ch_self == 0) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002699 rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002700 ? -1 : 1;
2701 break;
2702 } else {
2703 if (ch_peer == 0) { rv = 1; break; }
2704 if (ch_self == 0) { rv = -1; break; }
2705 }
Philipp Reisner44ed1672011-04-19 17:10:19 +02002706 if (after_sb_0p == ASB_DISCARD_ZERO_CHG)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002707 break;
2708 case ASB_DISCARD_LEAST_CHG:
2709 if (ch_self < ch_peer)
2710 rv = -1;
2711 else if (ch_self > ch_peer)
2712 rv = 1;
2713 else /* ( ch_self == ch_peer ) */
2714 /* Well, then use something else. */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002715 rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002716 ? -1 : 1;
2717 break;
2718 case ASB_DISCARD_LOCAL:
2719 rv = -1;
2720 break;
2721 case ASB_DISCARD_REMOTE:
2722 rv = 1;
2723 }
2724
2725 return rv;
2726}
2727
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002728/**
2729 * drbd_asb_recover_1p - Recover after split-brain with one remaining primary
2730 */
2731static int drbd_asb_recover_1p(struct drbd_peer_device *peer_device) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002732{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002733 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002734 int hg, rv = -100;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002735 enum drbd_after_sb_p after_sb_1p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002736
Philipp Reisner44ed1672011-04-19 17:10:19 +02002737 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002738 after_sb_1p = rcu_dereference(peer_device->connection->net_conf)->after_sb_1p;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002739 rcu_read_unlock();
2740 switch (after_sb_1p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002741 case ASB_DISCARD_YOUNGER_PRI:
2742 case ASB_DISCARD_OLDER_PRI:
2743 case ASB_DISCARD_LEAST_CHG:
2744 case ASB_DISCARD_LOCAL:
2745 case ASB_DISCARD_REMOTE:
Philipp Reisner44ed1672011-04-19 17:10:19 +02002746 case ASB_DISCARD_ZERO_CHG:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002747 drbd_err(device, "Configuration error.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002748 break;
2749 case ASB_DISCONNECT:
2750 break;
2751 case ASB_CONSENSUS:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002752 hg = drbd_asb_recover_0p(peer_device);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002753 if (hg == -1 && device->state.role == R_SECONDARY)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002754 rv = hg;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002755 if (hg == 1 && device->state.role == R_PRIMARY)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002756 rv = hg;
2757 break;
2758 case ASB_VIOLENTLY:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002759 rv = drbd_asb_recover_0p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002760 break;
2761 case ASB_DISCARD_SECONDARY:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002762 return device->state.role == R_PRIMARY ? 1 : -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002763 case ASB_CALL_HELPER:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002764 hg = drbd_asb_recover_0p(peer_device);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002765 if (hg == -1 && device->state.role == R_PRIMARY) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002766 enum drbd_state_rv rv2;
2767
Philipp Reisnerb411b362009-09-25 16:07:19 -07002768 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2769 * we might be here in C_WF_REPORT_PARAMS which is transient.
2770 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002771 rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002772 if (rv2 != SS_SUCCESS) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002773 drbd_khelper(device, "pri-lost-after-sb");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002774 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002775 drbd_warn(device, "Successfully gave up primary role.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002776 rv = hg;
2777 }
2778 } else
2779 rv = hg;
2780 }
2781
2782 return rv;
2783}
2784
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002785/**
2786 * drbd_asb_recover_2p - Recover after split-brain with two remaining primaries
2787 */
2788static int drbd_asb_recover_2p(struct drbd_peer_device *peer_device) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002789{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002790 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002791 int hg, rv = -100;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002792 enum drbd_after_sb_p after_sb_2p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002793
Philipp Reisner44ed1672011-04-19 17:10:19 +02002794 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002795 after_sb_2p = rcu_dereference(peer_device->connection->net_conf)->after_sb_2p;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002796 rcu_read_unlock();
2797 switch (after_sb_2p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002798 case ASB_DISCARD_YOUNGER_PRI:
2799 case ASB_DISCARD_OLDER_PRI:
2800 case ASB_DISCARD_LEAST_CHG:
2801 case ASB_DISCARD_LOCAL:
2802 case ASB_DISCARD_REMOTE:
2803 case ASB_CONSENSUS:
2804 case ASB_DISCARD_SECONDARY:
Philipp Reisner44ed1672011-04-19 17:10:19 +02002805 case ASB_DISCARD_ZERO_CHG:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002806 drbd_err(device, "Configuration error.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002807 break;
2808 case ASB_VIOLENTLY:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002809 rv = drbd_asb_recover_0p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002810 break;
2811 case ASB_DISCONNECT:
2812 break;
2813 case ASB_CALL_HELPER:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002814 hg = drbd_asb_recover_0p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002815 if (hg == -1) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002816 enum drbd_state_rv rv2;
2817
Philipp Reisnerb411b362009-09-25 16:07:19 -07002818 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2819 * we might be here in C_WF_REPORT_PARAMS which is transient.
2820 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002821 rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002822 if (rv2 != SS_SUCCESS) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002823 drbd_khelper(device, "pri-lost-after-sb");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002824 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002825 drbd_warn(device, "Successfully gave up primary role.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002826 rv = hg;
2827 }
2828 } else
2829 rv = hg;
2830 }
2831
2832 return rv;
2833}
2834
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002835static void drbd_uuid_dump(struct drbd_device *device, char *text, u64 *uuid,
Philipp Reisnerb411b362009-09-25 16:07:19 -07002836 u64 bits, u64 flags)
2837{
2838 if (!uuid) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002839 drbd_info(device, "%s uuid info vanished while I was looking!\n", text);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002840 return;
2841 }
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002842 drbd_info(device, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07002843 text,
2844 (unsigned long long)uuid[UI_CURRENT],
2845 (unsigned long long)uuid[UI_BITMAP],
2846 (unsigned long long)uuid[UI_HISTORY_START],
2847 (unsigned long long)uuid[UI_HISTORY_END],
2848 (unsigned long long)bits,
2849 (unsigned long long)flags);
2850}
2851
2852/*
2853 100 after split brain try auto recover
2854 2 C_SYNC_SOURCE set BitMap
2855 1 C_SYNC_SOURCE use BitMap
2856 0 no Sync
2857 -1 C_SYNC_TARGET use BitMap
2858 -2 C_SYNC_TARGET set BitMap
2859 -100 after split brain, disconnect
2860-1000 unrelated data
Philipp Reisner4a23f262011-01-11 17:42:17 +01002861-1091 requires proto 91
2862-1096 requires proto 96
Philipp Reisnerb411b362009-09-25 16:07:19 -07002863 */
Lars Ellenberg44a4d552013-11-22 12:40:58 +01002864static int drbd_uuid_compare(struct drbd_device *const device, int *rule_nr) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002865{
Lars Ellenberg44a4d552013-11-22 12:40:58 +01002866 struct drbd_peer_device *const peer_device = first_peer_device(device);
2867 struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002868 u64 self, peer;
2869 int i, j;
2870
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002871 self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2872 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002873
2874 *rule_nr = 10;
2875 if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED)
2876 return 0;
2877
2878 *rule_nr = 20;
2879 if ((self == UUID_JUST_CREATED || self == (u64)0) &&
2880 peer != UUID_JUST_CREATED)
2881 return -2;
2882
2883 *rule_nr = 30;
2884 if (self != UUID_JUST_CREATED &&
2885 (peer == UUID_JUST_CREATED || peer == (u64)0))
2886 return 2;
2887
2888 if (self == peer) {
2889 int rct, dc; /* roles at crash time */
2890
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002891 if (device->p_uuid[UI_BITMAP] == (u64)0 && device->ldev->md.uuid[UI_BITMAP] != (u64)0) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002892
Lars Ellenberg44a4d552013-11-22 12:40:58 +01002893 if (connection->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002894 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002895
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002896 if ((device->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
2897 (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002898 drbd_info(device, "was SyncSource, missed the resync finished event, corrected myself:\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002899 drbd_uuid_move_history(device);
2900 device->ldev->md.uuid[UI_HISTORY_START] = device->ldev->md.uuid[UI_BITMAP];
2901 device->ldev->md.uuid[UI_BITMAP] = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002902
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002903 drbd_uuid_dump(device, "self", device->ldev->md.uuid,
2904 device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002905 *rule_nr = 34;
2906 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002907 drbd_info(device, "was SyncSource (peer failed to write sync_uuid)\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002908 *rule_nr = 36;
2909 }
2910
2911 return 1;
2912 }
2913
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002914 if (device->ldev->md.uuid[UI_BITMAP] == (u64)0 && device->p_uuid[UI_BITMAP] != (u64)0) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002915
Lars Ellenberg44a4d552013-11-22 12:40:58 +01002916 if (connection->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002917 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002918
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002919 if ((device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_BITMAP] & ~((u64)1)) &&
2920 (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002921 drbd_info(device, "was SyncTarget, peer missed the resync finished event, corrected peer:\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002922
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002923 device->p_uuid[UI_HISTORY_START + 1] = device->p_uuid[UI_HISTORY_START];
2924 device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_BITMAP];
2925 device->p_uuid[UI_BITMAP] = 0UL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002926
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002927 drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002928 *rule_nr = 35;
2929 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002930 drbd_info(device, "was SyncTarget (failed to write sync_uuid)\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002931 *rule_nr = 37;
2932 }
2933
2934 return -1;
2935 }
2936
2937 /* Common power [off|failure] */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002938 rct = (test_bit(CRASHED_PRIMARY, &device->flags) ? 1 : 0) +
2939 (device->p_uuid[UI_FLAGS] & 2);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002940 /* lowest bit is set when we were primary,
2941 * next bit (weight 2) is set when peer was primary */
2942 *rule_nr = 40;
2943
2944 switch (rct) {
2945 case 0: /* !self_pri && !peer_pri */ return 0;
2946 case 1: /* self_pri && !peer_pri */ return 1;
2947 case 2: /* !self_pri && peer_pri */ return -1;
2948 case 3: /* self_pri && peer_pri */
Lars Ellenberg44a4d552013-11-22 12:40:58 +01002949 dc = test_bit(RESOLVE_CONFLICTS, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002950 return dc ? -1 : 1;
2951 }
2952 }
2953
2954 *rule_nr = 50;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002955 peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002956 if (self == peer)
2957 return -1;
2958
2959 *rule_nr = 51;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002960 peer = device->p_uuid[UI_HISTORY_START] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002961 if (self == peer) {
Lars Ellenberg44a4d552013-11-22 12:40:58 +01002962 if (connection->agreed_pro_version < 96 ?
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002963 (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
2964 (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
2965 peer + UUID_NEW_BM_OFFSET == (device->p_uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002966 /* The last P_SYNC_UUID did not get though. Undo the last start of
2967 resync as sync source modifications of the peer's UUIDs. */
2968
Lars Ellenberg44a4d552013-11-22 12:40:58 +01002969 if (connection->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002970 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002971
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002972 device->p_uuid[UI_BITMAP] = device->p_uuid[UI_HISTORY_START];
2973 device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_HISTORY_START + 1];
Philipp Reisner4a23f262011-01-11 17:42:17 +01002974
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002975 drbd_info(device, "Lost last syncUUID packet, corrected:\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002976 drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
Philipp Reisner4a23f262011-01-11 17:42:17 +01002977
Philipp Reisnerb411b362009-09-25 16:07:19 -07002978 return -1;
2979 }
2980 }
2981
2982 *rule_nr = 60;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002983 self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002984 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002985 peer = device->p_uuid[i] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002986 if (self == peer)
2987 return -2;
2988 }
2989
2990 *rule_nr = 70;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002991 self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2992 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002993 if (self == peer)
2994 return 1;
2995
2996 *rule_nr = 71;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002997 self = device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002998 if (self == peer) {
Lars Ellenberg44a4d552013-11-22 12:40:58 +01002999 if (connection->agreed_pro_version < 96 ?
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003000 (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
3001 (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
3002 self + UUID_NEW_BM_OFFSET == (device->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003003 /* The last P_SYNC_UUID did not get though. Undo the last start of
3004 resync as sync source modifications of our UUIDs. */
3005
Lars Ellenberg44a4d552013-11-22 12:40:58 +01003006 if (connection->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01003007 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003008
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003009 __drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_HISTORY_START]);
3010 __drbd_uuid_set(device, UI_HISTORY_START, device->ldev->md.uuid[UI_HISTORY_START + 1]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003011
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003012 drbd_info(device, "Last syncUUID did not get through, corrected:\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003013 drbd_uuid_dump(device, "self", device->ldev->md.uuid,
3014 device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003015
3016 return 1;
3017 }
3018 }
3019
3020
3021 *rule_nr = 80;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003022 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003023 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003024 self = device->ldev->md.uuid[i] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003025 if (self == peer)
3026 return 2;
3027 }
3028
3029 *rule_nr = 90;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003030 self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
3031 peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003032 if (self == peer && self != ((u64)0))
3033 return 100;
3034
3035 *rule_nr = 100;
3036 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003037 self = device->ldev->md.uuid[i] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003038 for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003039 peer = device->p_uuid[j] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003040 if (self == peer)
3041 return -100;
3042 }
3043 }
3044
3045 return -1000;
3046}
3047
3048/* drbd_sync_handshake() returns the new conn state on success, or
3049 CONN_MASK (-1) on failure.
3050 */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003051static enum drbd_conns drbd_sync_handshake(struct drbd_peer_device *peer_device,
3052 enum drbd_role peer_role,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003053 enum drbd_disk_state peer_disk) __must_hold(local)
3054{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003055 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003056 enum drbd_conns rv = C_MASK;
3057 enum drbd_disk_state mydisk;
Philipp Reisner44ed1672011-04-19 17:10:19 +02003058 struct net_conf *nc;
Andreas Gruenbacher6dff2902011-06-28 14:18:12 +02003059 int hg, rule_nr, rr_conflict, tentative;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003060
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003061 mydisk = device->state.disk;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003062 if (mydisk == D_NEGOTIATING)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003063 mydisk = device->new_state_tmp.disk;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003064
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003065 drbd_info(device, "drbd_sync_handshake:\n");
Philipp Reisner9f2247b2012-08-16 14:25:58 +02003066
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003067 spin_lock_irq(&device->ldev->md.uuid_lock);
3068 drbd_uuid_dump(device, "self", device->ldev->md.uuid, device->comm_bm_set, 0);
3069 drbd_uuid_dump(device, "peer", device->p_uuid,
3070 device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003071
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003072 hg = drbd_uuid_compare(device, &rule_nr);
3073 spin_unlock_irq(&device->ldev->md.uuid_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003074
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003075 drbd_info(device, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003076
3077 if (hg == -1000) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003078 drbd_alert(device, "Unrelated data, aborting!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003079 return C_MASK;
3080 }
Philipp Reisner4a23f262011-01-11 17:42:17 +01003081 if (hg < -1000) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003082 drbd_alert(device, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003083 return C_MASK;
3084 }
3085
3086 if ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) ||
3087 (peer_disk == D_INCONSISTENT && mydisk > D_INCONSISTENT)) {
3088 int f = (hg == -100) || abs(hg) == 2;
3089 hg = mydisk > D_INCONSISTENT ? 1 : -1;
3090 if (f)
3091 hg = hg*2;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003092 drbd_info(device, "Becoming sync %s due to disk states.\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07003093 hg > 0 ? "source" : "target");
3094 }
3095
Adam Gandelman3a11a482010-04-08 16:48:23 -07003096 if (abs(hg) == 100)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003097 drbd_khelper(device, "initial-split-brain");
Adam Gandelman3a11a482010-04-08 16:48:23 -07003098
Philipp Reisner44ed1672011-04-19 17:10:19 +02003099 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003100 nc = rcu_dereference(peer_device->connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +02003101
3102 if (hg == 100 || (hg == -100 && nc->always_asbp)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003103 int pcount = (device->state.role == R_PRIMARY)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003104 + (peer_role == R_PRIMARY);
3105 int forced = (hg == -100);
3106
3107 switch (pcount) {
3108 case 0:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003109 hg = drbd_asb_recover_0p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003110 break;
3111 case 1:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003112 hg = drbd_asb_recover_1p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003113 break;
3114 case 2:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003115 hg = drbd_asb_recover_2p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003116 break;
3117 }
3118 if (abs(hg) < 100) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003119 drbd_warn(device, "Split-Brain detected, %d primaries, "
Philipp Reisnerb411b362009-09-25 16:07:19 -07003120 "automatically solved. Sync from %s node\n",
3121 pcount, (hg < 0) ? "peer" : "this");
3122 if (forced) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003123 drbd_warn(device, "Doing a full sync, since"
Philipp Reisnerb411b362009-09-25 16:07:19 -07003124 " UUIDs where ambiguous.\n");
3125 hg = hg*2;
3126 }
3127 }
3128 }
3129
3130 if (hg == -100) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003131 if (test_bit(DISCARD_MY_DATA, &device->flags) && !(device->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003132 hg = -1;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003133 if (!test_bit(DISCARD_MY_DATA, &device->flags) && (device->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003134 hg = 1;
3135
3136 if (abs(hg) < 100)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003137 drbd_warn(device, "Split-Brain detected, manually solved. "
Philipp Reisnerb411b362009-09-25 16:07:19 -07003138 "Sync from %s node\n",
3139 (hg < 0) ? "peer" : "this");
3140 }
Philipp Reisner44ed1672011-04-19 17:10:19 +02003141 rr_conflict = nc->rr_conflict;
Andreas Gruenbacher6dff2902011-06-28 14:18:12 +02003142 tentative = nc->tentative;
Philipp Reisner44ed1672011-04-19 17:10:19 +02003143 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07003144
3145 if (hg == -100) {
Lars Ellenberg580b9762010-02-26 23:15:23 +01003146 /* FIXME this log message is not correct if we end up here
3147 * after an attempted attach on a diskless node.
3148 * We just refuse to attach -- well, we drop the "connection"
3149 * to that disk, in a way... */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003150 drbd_alert(device, "Split-Brain detected but unresolved, dropping connection!\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003151 drbd_khelper(device, "split-brain");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003152 return C_MASK;
3153 }
3154
3155 if (hg > 0 && mydisk <= D_INCONSISTENT) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003156 drbd_err(device, "I shall become SyncSource, but I am inconsistent!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003157 return C_MASK;
3158 }
3159
3160 if (hg < 0 && /* by intention we do not use mydisk here. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003161 device->state.role == R_PRIMARY && device->state.disk >= D_CONSISTENT) {
Philipp Reisner44ed1672011-04-19 17:10:19 +02003162 switch (rr_conflict) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003163 case ASB_CALL_HELPER:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003164 drbd_khelper(device, "pri-lost");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003165 /* fall through */
3166 case ASB_DISCONNECT:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003167 drbd_err(device, "I shall become SyncTarget, but I am primary!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003168 return C_MASK;
3169 case ASB_VIOLENTLY:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003170 drbd_warn(device, "Becoming SyncTarget, violating the stable-data"
Philipp Reisnerb411b362009-09-25 16:07:19 -07003171 "assumption\n");
3172 }
3173 }
3174
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003175 if (tentative || test_bit(CONN_DRY_RUN, &peer_device->connection->flags)) {
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003176 if (hg == 0)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003177 drbd_info(device, "dry-run connect: No resync, would become Connected immediately.\n");
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003178 else
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003179 drbd_info(device, "dry-run connect: Would become %s, doing a %s resync.",
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003180 drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET),
3181 abs(hg) >= 2 ? "full" : "bit-map based");
3182 return C_MASK;
3183 }
3184
Philipp Reisnerb411b362009-09-25 16:07:19 -07003185 if (abs(hg) >= 2) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003186 drbd_info(device, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003187 if (drbd_bitmap_io(device, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003188 BM_LOCKED_SET_ALLOWED))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003189 return C_MASK;
3190 }
3191
3192 if (hg > 0) { /* become sync source. */
3193 rv = C_WF_BITMAP_S;
3194 } else if (hg < 0) { /* become sync target */
3195 rv = C_WF_BITMAP_T;
3196 } else {
3197 rv = C_CONNECTED;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003198 if (drbd_bm_total_weight(device)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003199 drbd_info(device, "No resync, but %lu bits in bitmap!\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003200 drbd_bm_total_weight(device));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003201 }
3202 }
3203
3204 return rv;
3205}
3206
Philipp Reisnerf179d762011-05-16 17:31:47 +02003207static enum drbd_after_sb_p convert_after_sb(enum drbd_after_sb_p peer)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003208{
3209 /* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */
Philipp Reisnerf179d762011-05-16 17:31:47 +02003210 if (peer == ASB_DISCARD_REMOTE)
3211 return ASB_DISCARD_LOCAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003212
3213 /* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */
Philipp Reisnerf179d762011-05-16 17:31:47 +02003214 if (peer == ASB_DISCARD_LOCAL)
3215 return ASB_DISCARD_REMOTE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003216
3217 /* everything else is valid if they are equal on both sides. */
Philipp Reisnerf179d762011-05-16 17:31:47 +02003218 return peer;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003219}
3220
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003221static int receive_protocol(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003222{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003223 struct p_protocol *p = pi->data;
Philipp Reisner036b17e2011-05-16 17:38:11 +02003224 enum drbd_after_sb_p p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
3225 int p_proto, p_discard_my_data, p_two_primaries, cf;
3226 struct net_conf *nc, *old_net_conf, *new_net_conf = NULL;
3227 char integrity_alg[SHARED_SECRET_MAX] = "";
Andreas Gruenbacheraccdbcc2011-07-15 17:41:09 +02003228 struct crypto_hash *peer_integrity_tfm = NULL;
Philipp Reisner7aca6c72011-05-17 10:12:56 +02003229 void *int_dig_in = NULL, *int_dig_vv = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003230
Philipp Reisnerb411b362009-09-25 16:07:19 -07003231 p_proto = be32_to_cpu(p->protocol);
3232 p_after_sb_0p = be32_to_cpu(p->after_sb_0p);
3233 p_after_sb_1p = be32_to_cpu(p->after_sb_1p);
3234 p_after_sb_2p = be32_to_cpu(p->after_sb_2p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003235 p_two_primaries = be32_to_cpu(p->two_primaries);
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003236 cf = be32_to_cpu(p->conn_flags);
Andreas Gruenbacher6139f602011-05-06 20:00:02 +02003237 p_discard_my_data = cf & CF_DISCARD_MY_DATA;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003238
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003239 if (connection->agreed_pro_version >= 87) {
Andreas Gruenbacher86db0612011-04-28 15:24:18 +02003240 int err;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003241
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02003242 if (pi->size > sizeof(integrity_alg))
Andreas Gruenbacher86db0612011-04-28 15:24:18 +02003243 return -EIO;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003244 err = drbd_recv_all(connection, integrity_alg, pi->size);
Andreas Gruenbacher86db0612011-04-28 15:24:18 +02003245 if (err)
3246 return err;
Philipp Reisner036b17e2011-05-16 17:38:11 +02003247 integrity_alg[SHARED_SECRET_MAX - 1] = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003248 }
3249
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003250 if (pi->cmd != P_PROTOCOL_UPDATE) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003251 clear_bit(CONN_DRY_RUN, &connection->flags);
Philipp Reisner036b17e2011-05-16 17:38:11 +02003252
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003253 if (cf & CF_DRY_RUN)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003254 set_bit(CONN_DRY_RUN, &connection->flags);
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003255
3256 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003257 nc = rcu_dereference(connection->net_conf);
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003258
3259 if (p_proto != nc->wire_protocol) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003260 drbd_err(connection, "incompatible %s settings\n", "protocol");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003261 goto disconnect_rcu_unlock;
3262 }
3263
3264 if (convert_after_sb(p_after_sb_0p) != nc->after_sb_0p) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003265 drbd_err(connection, "incompatible %s settings\n", "after-sb-0pri");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003266 goto disconnect_rcu_unlock;
3267 }
3268
3269 if (convert_after_sb(p_after_sb_1p) != nc->after_sb_1p) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003270 drbd_err(connection, "incompatible %s settings\n", "after-sb-1pri");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003271 goto disconnect_rcu_unlock;
3272 }
3273
3274 if (convert_after_sb(p_after_sb_2p) != nc->after_sb_2p) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003275 drbd_err(connection, "incompatible %s settings\n", "after-sb-2pri");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003276 goto disconnect_rcu_unlock;
3277 }
3278
3279 if (p_discard_my_data && nc->discard_my_data) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003280 drbd_err(connection, "incompatible %s settings\n", "discard-my-data");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003281 goto disconnect_rcu_unlock;
3282 }
3283
3284 if (p_two_primaries != nc->two_primaries) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003285 drbd_err(connection, "incompatible %s settings\n", "allow-two-primaries");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003286 goto disconnect_rcu_unlock;
3287 }
3288
3289 if (strcmp(integrity_alg, nc->integrity_alg)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003290 drbd_err(connection, "incompatible %s settings\n", "data-integrity-alg");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003291 goto disconnect_rcu_unlock;
3292 }
3293
3294 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07003295 }
3296
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003297 if (integrity_alg[0]) {
3298 int hash_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003299
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003300 /*
3301 * We can only change the peer data integrity algorithm
3302 * here. Changing our own data integrity algorithm
3303 * requires that we send a P_PROTOCOL_UPDATE packet at
3304 * the same time; otherwise, the peer has no way to
3305 * tell between which packets the algorithm should
3306 * change.
3307 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003308
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003309 peer_integrity_tfm = crypto_alloc_hash(integrity_alg, 0, CRYPTO_ALG_ASYNC);
3310 if (!peer_integrity_tfm) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003311 drbd_err(connection, "peer data-integrity-alg %s not supported\n",
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003312 integrity_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003313 goto disconnect;
3314 }
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003315
3316 hash_size = crypto_hash_digestsize(peer_integrity_tfm);
3317 int_dig_in = kmalloc(hash_size, GFP_KERNEL);
3318 int_dig_vv = kmalloc(hash_size, GFP_KERNEL);
3319 if (!(int_dig_in && int_dig_vv)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003320 drbd_err(connection, "Allocation of buffers for data integrity checking failed\n");
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003321 goto disconnect;
3322 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003323 }
3324
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003325 new_net_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL);
3326 if (!new_net_conf) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003327 drbd_err(connection, "Allocation of new net_conf failed\n");
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003328 goto disconnect;
3329 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003330
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003331 mutex_lock(&connection->data.mutex);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003332 mutex_lock(&connection->resource->conf_update);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003333 old_net_conf = connection->net_conf;
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003334 *new_net_conf = *old_net_conf;
3335
3336 new_net_conf->wire_protocol = p_proto;
3337 new_net_conf->after_sb_0p = convert_after_sb(p_after_sb_0p);
3338 new_net_conf->after_sb_1p = convert_after_sb(p_after_sb_1p);
3339 new_net_conf->after_sb_2p = convert_after_sb(p_after_sb_2p);
3340 new_net_conf->two_primaries = p_two_primaries;
3341
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003342 rcu_assign_pointer(connection->net_conf, new_net_conf);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003343 mutex_unlock(&connection->resource->conf_update);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003344 mutex_unlock(&connection->data.mutex);
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003345
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003346 crypto_free_hash(connection->peer_integrity_tfm);
3347 kfree(connection->int_dig_in);
3348 kfree(connection->int_dig_vv);
3349 connection->peer_integrity_tfm = peer_integrity_tfm;
3350 connection->int_dig_in = int_dig_in;
3351 connection->int_dig_vv = int_dig_vv;
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003352
3353 if (strcmp(old_net_conf->integrity_alg, integrity_alg))
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003354 drbd_info(connection, "peer data-integrity-alg: %s\n",
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003355 integrity_alg[0] ? integrity_alg : "(none)");
3356
3357 synchronize_rcu();
3358 kfree(old_net_conf);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003359 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003360
Philipp Reisner44ed1672011-04-19 17:10:19 +02003361disconnect_rcu_unlock:
3362 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07003363disconnect:
Andreas Gruenbacherb792c352011-07-15 16:48:49 +02003364 crypto_free_hash(peer_integrity_tfm);
Philipp Reisner036b17e2011-05-16 17:38:11 +02003365 kfree(int_dig_in);
3366 kfree(int_dig_vv);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003367 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003368 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003369}
3370
3371/* helper function
3372 * input: alg name, feature name
3373 * return: NULL (alg name was "")
3374 * ERR_PTR(error) if something goes wrong
3375 * or the crypto hash ptr, if it worked out ok. */
Lars Ellenberg8ce953a2014-02-27 09:46:18 +01003376static struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_device *device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003377 const char *alg, const char *name)
3378{
3379 struct crypto_hash *tfm;
3380
3381 if (!alg[0])
3382 return NULL;
3383
3384 tfm = crypto_alloc_hash(alg, 0, CRYPTO_ALG_ASYNC);
3385 if (IS_ERR(tfm)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003386 drbd_err(device, "Can not allocate \"%s\" as %s (reason: %ld)\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07003387 alg, name, PTR_ERR(tfm));
3388 return tfm;
3389 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003390 return tfm;
3391}
3392
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003393static int ignore_remaining_packet(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003394{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003395 void *buffer = connection->data.rbuf;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003396 int size = pi->size;
3397
3398 while (size) {
3399 int s = min_t(int, size, DRBD_SOCKET_BUFFER_SIZE);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003400 s = drbd_recv(connection, buffer, s);
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003401 if (s <= 0) {
3402 if (s < 0)
3403 return s;
3404 break;
3405 }
3406 size -= s;
3407 }
3408 if (size)
3409 return -EIO;
3410 return 0;
3411}
3412
3413/*
3414 * config_unknown_volume - device configuration command for unknown volume
3415 *
3416 * When a device is added to an existing connection, the node on which the
3417 * device is added first will send configuration commands to its peer but the
3418 * peer will not know about the device yet. It will warn and ignore these
3419 * commands. Once the device is added on the second node, the second node will
3420 * send the same device configuration commands, but in the other direction.
3421 *
3422 * (We can also end up here if drbd is misconfigured.)
3423 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003424static int config_unknown_volume(struct drbd_connection *connection, struct packet_info *pi)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003425{
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003426 drbd_warn(connection, "%s packet received for volume %u, which is not configured locally\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02003427 cmdname(pi->cmd), pi->vnr);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003428 return ignore_remaining_packet(connection, pi);
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003429}
3430
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003431static int receive_SyncParam(struct drbd_connection *connection, struct packet_info *pi)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003432{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003433 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003434 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003435 struct p_rs_param_95 *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003436 unsigned int header_size, data_size, exp_max_sz;
3437 struct crypto_hash *verify_tfm = NULL;
3438 struct crypto_hash *csums_tfm = NULL;
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003439 struct net_conf *old_net_conf, *new_net_conf = NULL;
Philipp Reisner813472c2011-05-03 16:47:02 +02003440 struct disk_conf *old_disk_conf = NULL, *new_disk_conf = NULL;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003441 const int apv = connection->agreed_pro_version;
Philipp Reisner813472c2011-05-03 16:47:02 +02003442 struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
Philipp Reisner778f2712010-07-06 11:14:00 +02003443 int fifo_size = 0;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003444 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003445
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003446 peer_device = conn_peer_device(connection, pi->vnr);
3447 if (!peer_device)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003448 return config_unknown_volume(connection, pi);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003449 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003450
3451 exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param)
3452 : apv == 88 ? sizeof(struct p_rs_param)
3453 + SHARED_SECRET_MAX
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003454 : apv <= 94 ? sizeof(struct p_rs_param_89)
3455 : /* apv >= 95 */ sizeof(struct p_rs_param_95);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003456
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003457 if (pi->size > exp_max_sz) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003458 drbd_err(device, "SyncParam packet too long: received %u, expected <= %u bytes\n",
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003459 pi->size, exp_max_sz);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003460 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003461 }
3462
3463 if (apv <= 88) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003464 header_size = sizeof(struct p_rs_param);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003465 data_size = pi->size - header_size;
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003466 } else if (apv <= 94) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003467 header_size = sizeof(struct p_rs_param_89);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003468 data_size = pi->size - header_size;
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02003469 D_ASSERT(device, data_size == 0);
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003470 } else {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003471 header_size = sizeof(struct p_rs_param_95);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003472 data_size = pi->size - header_size;
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02003473 D_ASSERT(device, data_size == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003474 }
3475
3476 /* initialize verify_alg and csums_alg */
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003477 p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003478 memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
3479
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003480 err = drbd_recv_all(peer_device->connection, p, header_size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003481 if (err)
3482 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003483
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003484 mutex_lock(&connection->resource->conf_update);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003485 old_net_conf = peer_device->connection->net_conf;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003486 if (get_ldev(device)) {
Philipp Reisner813472c2011-05-03 16:47:02 +02003487 new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3488 if (!new_disk_conf) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003489 put_ldev(device);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003490 mutex_unlock(&connection->resource->conf_update);
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003491 drbd_err(device, "Allocation of new disk_conf failed\n");
Philipp Reisner813472c2011-05-03 16:47:02 +02003492 return -ENOMEM;
3493 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003494
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003495 old_disk_conf = device->ldev->disk_conf;
Philipp Reisner813472c2011-05-03 16:47:02 +02003496 *new_disk_conf = *old_disk_conf;
3497
Andreas Gruenbacher6394b932011-05-11 14:29:52 +02003498 new_disk_conf->resync_rate = be32_to_cpu(p->resync_rate);
Philipp Reisner813472c2011-05-03 16:47:02 +02003499 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003500
3501 if (apv >= 88) {
3502 if (apv == 88) {
Philipp Reisner5de73822012-03-28 10:17:32 +02003503 if (data_size > SHARED_SECRET_MAX || data_size == 0) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003504 drbd_err(device, "verify-alg of wrong size, "
Philipp Reisner5de73822012-03-28 10:17:32 +02003505 "peer wants %u, accepting only up to %u byte\n",
3506 data_size, SHARED_SECRET_MAX);
Philipp Reisner813472c2011-05-03 16:47:02 +02003507 err = -EIO;
3508 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003509 }
3510
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003511 err = drbd_recv_all(peer_device->connection, p->verify_alg, data_size);
Philipp Reisner813472c2011-05-03 16:47:02 +02003512 if (err)
3513 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003514 /* we expect NUL terminated string */
3515 /* but just in case someone tries to be evil */
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02003516 D_ASSERT(device, p->verify_alg[data_size-1] == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003517 p->verify_alg[data_size-1] = 0;
3518
3519 } else /* apv >= 89 */ {
3520 /* we still expect NUL terminated strings */
3521 /* but just in case someone tries to be evil */
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02003522 D_ASSERT(device, p->verify_alg[SHARED_SECRET_MAX-1] == 0);
3523 D_ASSERT(device, p->csums_alg[SHARED_SECRET_MAX-1] == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003524 p->verify_alg[SHARED_SECRET_MAX-1] = 0;
3525 p->csums_alg[SHARED_SECRET_MAX-1] = 0;
3526 }
3527
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003528 if (strcmp(old_net_conf->verify_alg, p->verify_alg)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003529 if (device->state.conn == C_WF_REPORT_PARAMS) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003530 drbd_err(device, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003531 old_net_conf->verify_alg, p->verify_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003532 goto disconnect;
3533 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003534 verify_tfm = drbd_crypto_alloc_digest_safe(device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003535 p->verify_alg, "verify-alg");
3536 if (IS_ERR(verify_tfm)) {
3537 verify_tfm = NULL;
3538 goto disconnect;
3539 }
3540 }
3541
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003542 if (apv >= 89 && strcmp(old_net_conf->csums_alg, p->csums_alg)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003543 if (device->state.conn == C_WF_REPORT_PARAMS) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003544 drbd_err(device, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003545 old_net_conf->csums_alg, p->csums_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003546 goto disconnect;
3547 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003548 csums_tfm = drbd_crypto_alloc_digest_safe(device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003549 p->csums_alg, "csums-alg");
3550 if (IS_ERR(csums_tfm)) {
3551 csums_tfm = NULL;
3552 goto disconnect;
3553 }
3554 }
3555
Philipp Reisner813472c2011-05-03 16:47:02 +02003556 if (apv > 94 && new_disk_conf) {
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003557 new_disk_conf->c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
3558 new_disk_conf->c_delay_target = be32_to_cpu(p->c_delay_target);
3559 new_disk_conf->c_fill_target = be32_to_cpu(p->c_fill_target);
3560 new_disk_conf->c_max_rate = be32_to_cpu(p->c_max_rate);
Philipp Reisner778f2712010-07-06 11:14:00 +02003561
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003562 fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003563 if (fifo_size != device->rs_plan_s->size) {
Philipp Reisner813472c2011-05-03 16:47:02 +02003564 new_plan = fifo_alloc(fifo_size);
3565 if (!new_plan) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003566 drbd_err(device, "kmalloc of fifo_buffer failed");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003567 put_ldev(device);
Philipp Reisner778f2712010-07-06 11:14:00 +02003568 goto disconnect;
3569 }
3570 }
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003571 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003572
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003573 if (verify_tfm || csums_tfm) {
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003574 new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
3575 if (!new_net_conf) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003576 drbd_err(device, "Allocation of new net_conf failed\n");
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003577 goto disconnect;
3578 }
3579
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003580 *new_net_conf = *old_net_conf;
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003581
3582 if (verify_tfm) {
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003583 strcpy(new_net_conf->verify_alg, p->verify_alg);
3584 new_net_conf->verify_alg_len = strlen(p->verify_alg) + 1;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003585 crypto_free_hash(peer_device->connection->verify_tfm);
3586 peer_device->connection->verify_tfm = verify_tfm;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003587 drbd_info(device, "using verify-alg: \"%s\"\n", p->verify_alg);
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003588 }
3589 if (csums_tfm) {
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003590 strcpy(new_net_conf->csums_alg, p->csums_alg);
3591 new_net_conf->csums_alg_len = strlen(p->csums_alg) + 1;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003592 crypto_free_hash(peer_device->connection->csums_tfm);
3593 peer_device->connection->csums_tfm = csums_tfm;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003594 drbd_info(device, "using csums-alg: \"%s\"\n", p->csums_alg);
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003595 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003596 rcu_assign_pointer(connection->net_conf, new_net_conf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003597 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003598 }
3599
Philipp Reisner813472c2011-05-03 16:47:02 +02003600 if (new_disk_conf) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003601 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
3602 put_ldev(device);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003603 }
Philipp Reisner813472c2011-05-03 16:47:02 +02003604
3605 if (new_plan) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003606 old_plan = device->rs_plan_s;
3607 rcu_assign_pointer(device->rs_plan_s, new_plan);
Philipp Reisner813472c2011-05-03 16:47:02 +02003608 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003609
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003610 mutex_unlock(&connection->resource->conf_update);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003611 synchronize_rcu();
3612 if (new_net_conf)
3613 kfree(old_net_conf);
3614 kfree(old_disk_conf);
Philipp Reisner813472c2011-05-03 16:47:02 +02003615 kfree(old_plan);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003616
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003617 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003618
Philipp Reisner813472c2011-05-03 16:47:02 +02003619reconnect:
3620 if (new_disk_conf) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003621 put_ldev(device);
Philipp Reisner813472c2011-05-03 16:47:02 +02003622 kfree(new_disk_conf);
3623 }
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003624 mutex_unlock(&connection->resource->conf_update);
Philipp Reisner813472c2011-05-03 16:47:02 +02003625 return -EIO;
3626
Philipp Reisnerb411b362009-09-25 16:07:19 -07003627disconnect:
Philipp Reisner813472c2011-05-03 16:47:02 +02003628 kfree(new_plan);
3629 if (new_disk_conf) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003630 put_ldev(device);
Philipp Reisner813472c2011-05-03 16:47:02 +02003631 kfree(new_disk_conf);
3632 }
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003633 mutex_unlock(&connection->resource->conf_update);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003634 /* just for completeness: actually not needed,
3635 * as this is not reached if csums_tfm was ok. */
3636 crypto_free_hash(csums_tfm);
3637 /* but free the verify_tfm again, if csums_tfm did not work out */
3638 crypto_free_hash(verify_tfm);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003639 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003640 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003641}
3642
Philipp Reisnerb411b362009-09-25 16:07:19 -07003643/* warn if the arguments differ by more than 12.5% */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003644static void warn_if_differ_considerably(struct drbd_device *device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003645 const char *s, sector_t a, sector_t b)
3646{
3647 sector_t d;
3648 if (a == 0 || b == 0)
3649 return;
3650 d = (a > b) ? (a - b) : (b - a);
3651 if (d > (a>>3) || d > (b>>3))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003652 drbd_warn(device, "Considerable difference in %s: %llus vs. %llus\n", s,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003653 (unsigned long long)a, (unsigned long long)b);
3654}
3655
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003656static int receive_sizes(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003657{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003658 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003659 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003660 struct p_sizes *p = pi->data;
Philipp Reisnere96c9632013-06-25 16:50:07 +02003661 enum determine_dev_size dd = DS_UNCHANGED;
Lars Ellenberg6a8d68b2014-03-18 12:22:14 +01003662 sector_t p_size, p_usize, p_csize, my_usize;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003663 int ldsc = 0; /* local disk size changed */
Philipp Reisnere89b5912010-03-24 17:11:33 +01003664 enum dds_flags ddsf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003665
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003666 peer_device = conn_peer_device(connection, pi->vnr);
3667 if (!peer_device)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003668 return config_unknown_volume(connection, pi);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003669 device = peer_device->device;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003670
Philipp Reisnerb411b362009-09-25 16:07:19 -07003671 p_size = be64_to_cpu(p->d_size);
3672 p_usize = be64_to_cpu(p->u_size);
Lars Ellenberg6a8d68b2014-03-18 12:22:14 +01003673 p_csize = be64_to_cpu(p->c_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003674
Philipp Reisnerb411b362009-09-25 16:07:19 -07003675 /* just store the peer's disk size for now.
3676 * we still need to figure out whether we accept that. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003677 device->p_size = p_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003678
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003679 if (get_ldev(device)) {
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003680 rcu_read_lock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003681 my_usize = rcu_dereference(device->ldev->disk_conf)->disk_size;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003682 rcu_read_unlock();
3683
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003684 warn_if_differ_considerably(device, "lower level device sizes",
3685 p_size, drbd_get_max_capacity(device->ldev));
3686 warn_if_differ_considerably(device, "user requested size",
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003687 p_usize, my_usize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003688
3689 /* if this is the first connect, or an otherwise expected
3690 * param exchange, choose the minimum */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003691 if (device->state.conn == C_WF_REPORT_PARAMS)
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003692 p_usize = min_not_zero(my_usize, p_usize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003693
3694 /* Never shrink a device with usable data during connect.
3695 But allow online shrinking if we are connected. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003696 if (drbd_new_dev_size(device, device->ldev, p_usize, 0) <
3697 drbd_get_capacity(device->this_bdev) &&
3698 device->state.disk >= D_OUTDATED &&
3699 device->state.conn < C_CONNECTED) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003700 drbd_err(device, "The peer's disk size is too small!\n");
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003701 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003702 put_ldev(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003703 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003704 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003705
3706 if (my_usize != p_usize) {
3707 struct disk_conf *old_disk_conf, *new_disk_conf = NULL;
3708
3709 new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3710 if (!new_disk_conf) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003711 drbd_err(device, "Allocation of new disk_conf failed\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003712 put_ldev(device);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003713 return -ENOMEM;
3714 }
3715
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003716 mutex_lock(&connection->resource->conf_update);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003717 old_disk_conf = device->ldev->disk_conf;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003718 *new_disk_conf = *old_disk_conf;
3719 new_disk_conf->disk_size = p_usize;
3720
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003721 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003722 mutex_unlock(&connection->resource->conf_update);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003723 synchronize_rcu();
3724 kfree(old_disk_conf);
3725
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003726 drbd_info(device, "Peer sets u_size to %lu sectors\n",
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003727 (unsigned long)my_usize);
3728 }
3729
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003730 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003731 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003732
Lars Ellenberg20c68fd2014-04-28 18:43:25 +02003733 device->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
Lars Ellenberg20c68fd2014-04-28 18:43:25 +02003734 /* Leave drbd_reconsider_max_bio_size() before drbd_determine_dev_size().
3735 In case we cleared the QUEUE_FLAG_DISCARD from our queue in
3736 drbd_reconsider_max_bio_size(), we can be sure that after
3737 drbd_determine_dev_size() no REQ_DISCARDs are in the queue. */
3738
Philipp Reisnere89b5912010-03-24 17:11:33 +01003739 ddsf = be16_to_cpu(p->dds_flags);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003740 if (get_ldev(device)) {
Philipp Reisner8fe39aa2013-11-22 13:22:13 +01003741 drbd_reconsider_max_bio_size(device, device->ldev);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003742 dd = drbd_determine_dev_size(device, ddsf, NULL);
3743 put_ldev(device);
Philipp Reisnere96c9632013-06-25 16:50:07 +02003744 if (dd == DS_ERROR)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003745 return -EIO;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003746 drbd_md_sync(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003747 } else {
Lars Ellenberg6a8d68b2014-03-18 12:22:14 +01003748 /*
3749 * I am diskless, need to accept the peer's *current* size.
3750 * I must NOT accept the peers backing disk size,
3751 * it may have been larger than mine all along...
3752 *
3753 * At this point, the peer knows more about my disk, or at
3754 * least about what we last agreed upon, than myself.
3755 * So if his c_size is less than his d_size, the most likely
3756 * reason is that *my* d_size was smaller last time we checked.
3757 *
3758 * However, if he sends a zero current size,
3759 * take his (user-capped or) backing disk size anyways.
3760 */
Philipp Reisner8fe39aa2013-11-22 13:22:13 +01003761 drbd_reconsider_max_bio_size(device, NULL);
Lars Ellenberg6a8d68b2014-03-18 12:22:14 +01003762 drbd_set_my_capacity(device, p_csize ?: p_usize ?: p_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003763 }
3764
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003765 if (get_ldev(device)) {
3766 if (device->ldev->known_size != drbd_get_capacity(device->ldev->backing_bdev)) {
3767 device->ldev->known_size = drbd_get_capacity(device->ldev->backing_bdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003768 ldsc = 1;
3769 }
3770
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003771 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003772 }
3773
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003774 if (device->state.conn > C_WF_REPORT_PARAMS) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003775 if (be64_to_cpu(p->c_size) !=
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003776 drbd_get_capacity(device->this_bdev) || ldsc) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003777 /* we have different sizes, probably peer
3778 * needs to know my new size... */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003779 drbd_send_sizes(peer_device, 0, ddsf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003780 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003781 if (test_and_clear_bit(RESIZE_PENDING, &device->flags) ||
3782 (dd == DS_GREW && device->state.conn == C_CONNECTED)) {
3783 if (device->state.pdsk >= D_INCONSISTENT &&
3784 device->state.disk >= D_INCONSISTENT) {
Philipp Reisnere89b5912010-03-24 17:11:33 +01003785 if (ddsf & DDSF_NO_RESYNC)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003786 drbd_info(device, "Resync of new storage suppressed with --assume-clean\n");
Philipp Reisnere89b5912010-03-24 17:11:33 +01003787 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003788 resync_after_online_grow(device);
Philipp Reisnere89b5912010-03-24 17:11:33 +01003789 } else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003790 set_bit(RESYNC_AFTER_NEG, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003791 }
3792 }
3793
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003794 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003795}
3796
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003797static int receive_uuids(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003798{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003799 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003800 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003801 struct p_uuids *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003802 u64 *p_uuid;
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003803 int i, updated_uuids = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003804
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003805 peer_device = conn_peer_device(connection, pi->vnr);
3806 if (!peer_device)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003807 return config_unknown_volume(connection, pi);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003808 device = peer_device->device;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003809
Philipp Reisnerb411b362009-09-25 16:07:19 -07003810 p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO);
Jing Wang063eacf2012-10-25 15:00:56 +08003811 if (!p_uuid) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003812 drbd_err(device, "kmalloc of p_uuid failed\n");
Jing Wang063eacf2012-10-25 15:00:56 +08003813 return false;
3814 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003815
3816 for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
3817 p_uuid[i] = be64_to_cpu(p->uuid[i]);
3818
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003819 kfree(device->p_uuid);
3820 device->p_uuid = p_uuid;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003821
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003822 if (device->state.conn < C_CONNECTED &&
3823 device->state.disk < D_INCONSISTENT &&
3824 device->state.role == R_PRIMARY &&
3825 (device->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003826 drbd_err(device, "Can only connect to data with current UUID=%016llX\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003827 (unsigned long long)device->ed_uuid);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003828 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003829 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003830 }
3831
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003832 if (get_ldev(device)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003833 int skip_initial_sync =
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003834 device->state.conn == C_CONNECTED &&
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003835 peer_device->connection->agreed_pro_version >= 90 &&
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003836 device->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003837 (p_uuid[UI_FLAGS] & 8);
3838 if (skip_initial_sync) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003839 drbd_info(device, "Accepted new current UUID, preparing to skip initial sync\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003840 drbd_bitmap_io(device, &drbd_bmio_clear_n_write,
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003841 "clear_n_write from receive_uuids",
3842 BM_LOCKED_TEST_ALLOWED);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003843 _drbd_uuid_set(device, UI_CURRENT, p_uuid[UI_CURRENT]);
3844 _drbd_uuid_set(device, UI_BITMAP, 0);
3845 _drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
Philipp Reisnerb411b362009-09-25 16:07:19 -07003846 CS_VERBOSE, NULL);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003847 drbd_md_sync(device);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003848 updated_uuids = 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003849 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003850 put_ldev(device);
3851 } else if (device->state.disk < D_INCONSISTENT &&
3852 device->state.role == R_PRIMARY) {
Philipp Reisner18a50fa2010-06-21 14:14:15 +02003853 /* I am a diskless primary, the peer just created a new current UUID
3854 for me. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003855 updated_uuids = drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003856 }
3857
3858 /* Before we test for the disk state, we should wait until an eventually
3859 ongoing cluster wide state change is finished. That is important if
3860 we are primary and are detaching from our disk. We need to see the
3861 new disk state... */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003862 mutex_lock(device->state_mutex);
3863 mutex_unlock(device->state_mutex);
3864 if (device->state.conn >= C_CONNECTED && device->state.disk < D_INCONSISTENT)
3865 updated_uuids |= drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003866
3867 if (updated_uuids)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003868 drbd_print_uuids(device, "receiver updated UUIDs to");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003869
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003870 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003871}
3872
3873/**
3874 * convert_state() - Converts the peer's view of the cluster state to our point of view
3875 * @ps: The state as seen by the peer.
3876 */
3877static union drbd_state convert_state(union drbd_state ps)
3878{
3879 union drbd_state ms;
3880
3881 static enum drbd_conns c_tab[] = {
Philipp Reisner369bea62011-07-06 23:04:44 +02003882 [C_WF_REPORT_PARAMS] = C_WF_REPORT_PARAMS,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003883 [C_CONNECTED] = C_CONNECTED,
3884
3885 [C_STARTING_SYNC_S] = C_STARTING_SYNC_T,
3886 [C_STARTING_SYNC_T] = C_STARTING_SYNC_S,
3887 [C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */
3888 [C_VERIFY_S] = C_VERIFY_T,
3889 [C_MASK] = C_MASK,
3890 };
3891
3892 ms.i = ps.i;
3893
3894 ms.conn = c_tab[ps.conn];
3895 ms.peer = ps.role;
3896 ms.role = ps.peer;
3897 ms.pdsk = ps.disk;
3898 ms.disk = ps.pdsk;
3899 ms.peer_isp = (ps.aftr_isp | ps.user_isp);
3900
3901 return ms;
3902}
3903
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003904static int receive_req_state(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003905{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003906 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003907 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003908 struct p_req_state *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003909 union drbd_state mask, val;
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +01003910 enum drbd_state_rv rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003911
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003912 peer_device = conn_peer_device(connection, pi->vnr);
3913 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003914 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003915 device = peer_device->device;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003916
Philipp Reisnerb411b362009-09-25 16:07:19 -07003917 mask.i = be32_to_cpu(p->mask);
3918 val.i = be32_to_cpu(p->val);
3919
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003920 if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags) &&
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003921 mutex_is_locked(device->state_mutex)) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003922 drbd_send_sr_reply(peer_device, SS_CONCURRENT_ST_CHG);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003923 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003924 }
3925
3926 mask = convert_state(mask);
3927 val = convert_state(val);
3928
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003929 rv = drbd_change_state(device, CS_VERBOSE, mask, val);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003930 drbd_send_sr_reply(peer_device, rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003931
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003932 drbd_md_sync(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003933
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003934 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003935}
3936
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003937static int receive_req_conn_state(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003938{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003939 struct p_req_state *p = pi->data;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003940 union drbd_state mask, val;
3941 enum drbd_state_rv rv;
3942
3943 mask.i = be32_to_cpu(p->mask);
3944 val.i = be32_to_cpu(p->val);
3945
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003946 if (test_bit(RESOLVE_CONFLICTS, &connection->flags) &&
3947 mutex_is_locked(&connection->cstate_mutex)) {
3948 conn_send_sr_reply(connection, SS_CONCURRENT_ST_CHG);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003949 return 0;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003950 }
3951
3952 mask = convert_state(mask);
3953 val = convert_state(val);
3954
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003955 rv = conn_request_state(connection, mask, val, CS_VERBOSE | CS_LOCAL_ONLY | CS_IGN_OUTD_FAIL);
3956 conn_send_sr_reply(connection, rv);
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003957
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003958 return 0;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003959}
3960
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003961static int receive_state(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003962{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003963 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003964 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003965 struct p_state *p = pi->data;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003966 union drbd_state os, ns, peer_state;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003967 enum drbd_disk_state real_peer_disk;
Philipp Reisner65d922c2010-06-16 16:18:09 +02003968 enum chg_state_flags cs_flags;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003969 int rv;
3970
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003971 peer_device = conn_peer_device(connection, pi->vnr);
3972 if (!peer_device)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003973 return config_unknown_volume(connection, pi);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003974 device = peer_device->device;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003975
Philipp Reisnerb411b362009-09-25 16:07:19 -07003976 peer_state.i = be32_to_cpu(p->state);
3977
3978 real_peer_disk = peer_state.disk;
3979 if (peer_state.disk == D_NEGOTIATING) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003980 real_peer_disk = device->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003981 drbd_info(device, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003982 }
3983
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003984 spin_lock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003985 retry:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003986 os = ns = drbd_read_state(device);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003987 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003988
Lars Ellenberg545752d2011-12-05 14:39:25 +01003989 /* If some other part of the code (asender thread, timeout)
3990 * already decided to close the connection again,
3991 * we must not "re-establish" it here. */
3992 if (os.conn <= C_TEAR_DOWN)
Lars Ellenberg58ffa582012-07-26 14:09:49 +02003993 return -ECONNRESET;
Lars Ellenberg545752d2011-12-05 14:39:25 +01003994
Lars Ellenberg40424e42011-09-26 15:24:56 +02003995 /* If this is the "end of sync" confirmation, usually the peer disk
3996 * transitions from D_INCONSISTENT to D_UP_TO_DATE. For empty (0 bits
3997 * set) resync started in PausedSyncT, or if the timing of pause-/
3998 * unpause-sync events has been "just right", the peer disk may
3999 * transition from D_CONSISTENT to D_UP_TO_DATE as well.
4000 */
4001 if ((os.pdsk == D_INCONSISTENT || os.pdsk == D_CONSISTENT) &&
4002 real_peer_disk == D_UP_TO_DATE &&
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02004003 os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
4004 /* If we are (becoming) SyncSource, but peer is still in sync
4005 * preparation, ignore its uptodate-ness to avoid flapping, it
4006 * will change to inconsistent once the peer reaches active
4007 * syncing states.
4008 * It may have changed syncer-paused flags, however, so we
4009 * cannot ignore this completely. */
4010 if (peer_state.conn > C_CONNECTED &&
4011 peer_state.conn < C_SYNC_SOURCE)
4012 real_peer_disk = D_INCONSISTENT;
4013
4014 /* if peer_state changes to connected at the same time,
4015 * it explicitly notifies us that it finished resync.
4016 * Maybe we should finish it up, too? */
4017 else if (os.conn >= C_SYNC_SOURCE &&
4018 peer_state.conn == C_CONNECTED) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004019 if (drbd_bm_total_weight(device) <= device->rs_failed)
4020 drbd_resync_finished(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004021 return 0;
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02004022 }
4023 }
4024
Lars Ellenberg02b91b52012-06-28 18:26:52 +02004025 /* explicit verify finished notification, stop sector reached. */
4026 if (os.conn == C_VERIFY_T && os.disk == D_UP_TO_DATE &&
4027 peer_state.conn == C_CONNECTED && real_peer_disk == D_UP_TO_DATE) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004028 ov_out_of_sync_print(device);
4029 drbd_resync_finished(device);
Lars Ellenberg58ffa582012-07-26 14:09:49 +02004030 return 0;
Lars Ellenberg02b91b52012-06-28 18:26:52 +02004031 }
4032
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02004033 /* peer says his disk is inconsistent, while we think it is uptodate,
4034 * and this happens while the peer still thinks we have a sync going on,
4035 * but we think we are already done with the sync.
4036 * We ignore this to avoid flapping pdsk.
4037 * This should not happen, if the peer is a recent version of drbd. */
4038 if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT &&
4039 os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE)
4040 real_peer_disk = D_UP_TO_DATE;
4041
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004042 if (ns.conn == C_WF_REPORT_PARAMS)
4043 ns.conn = C_CONNECTED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004044
Philipp Reisner67531712010-10-27 12:21:30 +02004045 if (peer_state.conn == C_AHEAD)
4046 ns.conn = C_BEHIND;
4047
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004048 if (device->p_uuid && peer_state.disk >= D_NEGOTIATING &&
4049 get_ldev_if_state(device, D_NEGOTIATING)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004050 int cr; /* consider resync */
4051
4052 /* if we established a new connection */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004053 cr = (os.conn < C_CONNECTED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004054 /* if we had an established connection
4055 * and one of the nodes newly attaches a disk */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004056 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07004057 (peer_state.disk == D_NEGOTIATING ||
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004058 os.disk == D_NEGOTIATING));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004059 /* if we have both been inconsistent, and the peer has been
4060 * forced to be UpToDate with --overwrite-data */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004061 cr |= test_bit(CONSIDER_RESYNC, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004062 /* if we had been plain connected, and the admin requested to
4063 * start a sync by "invalidate" or "invalidate-remote" */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004064 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07004065 (peer_state.conn >= C_STARTING_SYNC_S &&
4066 peer_state.conn <= C_WF_BITMAP_T));
4067
4068 if (cr)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004069 ns.conn = drbd_sync_handshake(peer_device, peer_state.role, real_peer_disk);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004070
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004071 put_ldev(device);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004072 if (ns.conn == C_MASK) {
4073 ns.conn = C_CONNECTED;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004074 if (device->state.disk == D_NEGOTIATING) {
4075 drbd_force_state(device, NS(disk, D_FAILED));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004076 } else if (peer_state.disk == D_NEGOTIATING) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004077 drbd_err(device, "Disk attach process on the peer node was aborted.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004078 peer_state.disk = D_DISKLESS;
Lars Ellenberg580b9762010-02-26 23:15:23 +01004079 real_peer_disk = D_DISKLESS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004080 } else {
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004081 if (test_and_clear_bit(CONN_DRY_RUN, &peer_device->connection->flags))
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004082 return -EIO;
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02004083 D_ASSERT(device, os.conn == C_WF_REPORT_PARAMS);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004084 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004085 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004086 }
4087 }
4088 }
4089
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004090 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004091 if (os.i != drbd_read_state(device).i)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004092 goto retry;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004093 clear_bit(CONSIDER_RESYNC, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004094 ns.peer = peer_state.role;
4095 ns.pdsk = real_peer_disk;
4096 ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004097 if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004098 ns.disk = device->new_state_tmp.disk;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004099 cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004100 if (ns.pdsk == D_CONSISTENT && drbd_suspended(device) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
4101 test_bit(NEW_CUR_UUID, &device->flags)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004102 /* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this
Philipp Reisner481c6f52010-06-22 14:03:27 +02004103 for temporal network outages! */
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004104 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004105 drbd_err(device, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004106 tl_clear(peer_device->connection);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004107 drbd_uuid_new_current(device);
4108 clear_bit(NEW_CUR_UUID, &device->flags);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004109 conn_request_state(peer_device->connection, NS2(conn, C_PROTOCOL_ERROR, susp, 0), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004110 return -EIO;
Philipp Reisner481c6f52010-06-22 14:03:27 +02004111 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004112 rv = _drbd_set_state(device, ns, cs_flags, NULL);
4113 ns = drbd_read_state(device);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004114 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004115
4116 if (rv < SS_SUCCESS) {
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004117 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004118 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004119 }
4120
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004121 if (os.conn > C_WF_REPORT_PARAMS) {
4122 if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07004123 peer_state.disk != D_NEGOTIATING ) {
4124 /* we want resync, peer has not yet decided to sync... */
4125 /* Nowadays only used when forcing a node into primary role and
4126 setting its disk to UpToDate with that */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004127 drbd_send_uuids(peer_device);
4128 drbd_send_current_state(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004129 }
4130 }
4131
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004132 clear_bit(DISCARD_MY_DATA, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004133
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004134 drbd_md_sync(device); /* update connected indicator, la_size_sect, ... */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004135
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004136 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004137}
4138
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004139static int receive_sync_uuid(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004140{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004141 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004142 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004143 struct p_rs_uuid *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004144
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004145 peer_device = conn_peer_device(connection, pi->vnr);
4146 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004147 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004148 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004149
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004150 wait_event(device->misc_wait,
4151 device->state.conn == C_WF_SYNC_UUID ||
4152 device->state.conn == C_BEHIND ||
4153 device->state.conn < C_CONNECTED ||
4154 device->state.disk < D_NEGOTIATING);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004155
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02004156 /* D_ASSERT(device, device->state.conn == C_WF_SYNC_UUID ); */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004157
Philipp Reisnerb411b362009-09-25 16:07:19 -07004158 /* Here the _drbd_uuid_ functions are right, current should
4159 _not_ be rotated into the history */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004160 if (get_ldev_if_state(device, D_NEGOTIATING)) {
4161 _drbd_uuid_set(device, UI_CURRENT, be64_to_cpu(p->uuid));
4162 _drbd_uuid_set(device, UI_BITMAP, 0UL);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004163
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004164 drbd_print_uuids(device, "updated sync uuid");
4165 drbd_start_resync(device, C_SYNC_TARGET);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004166
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004167 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004168 } else
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004169 drbd_err(device, "Ignoring SyncUUID packet!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004170
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004171 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004172}
4173
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004174/**
4175 * receive_bitmap_plain
4176 *
4177 * Return 0 when done, 1 when another iteration is needed, and a negative error
4178 * code upon failure.
4179 */
4180static int
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004181receive_bitmap_plain(struct drbd_peer_device *peer_device, unsigned int size,
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004182 unsigned long *p, struct bm_xfer_ctx *c)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004183{
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004184 unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE -
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004185 drbd_header_size(peer_device->connection);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004186 unsigned int num_words = min_t(size_t, data_size / sizeof(*p),
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004187 c->bm_words - c->word_offset);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004188 unsigned int want = num_words * sizeof(*p);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004189 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004190
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004191 if (want != size) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004192 drbd_err(peer_device, "%s:want (%u) != size (%u)\n", __func__, want, size);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004193 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004194 }
4195 if (want == 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004196 return 0;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004197 err = drbd_recv_all(peer_device->connection, p, want);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004198 if (err)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004199 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004200
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004201 drbd_bm_merge_lel(peer_device->device, c->word_offset, num_words, p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004202
4203 c->word_offset += num_words;
4204 c->bit_offset = c->word_offset * BITS_PER_LONG;
4205 if (c->bit_offset > c->bm_bits)
4206 c->bit_offset = c->bm_bits;
4207
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004208 return 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004209}
4210
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004211static enum drbd_bitmap_code dcbp_get_code(struct p_compressed_bm *p)
4212{
4213 return (enum drbd_bitmap_code)(p->encoding & 0x0f);
4214}
4215
4216static int dcbp_get_start(struct p_compressed_bm *p)
4217{
4218 return (p->encoding & 0x80) != 0;
4219}
4220
4221static int dcbp_get_pad_bits(struct p_compressed_bm *p)
4222{
4223 return (p->encoding >> 4) & 0x7;
4224}
4225
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004226/**
4227 * recv_bm_rle_bits
4228 *
4229 * Return 0 when done, 1 when another iteration is needed, and a negative error
4230 * code upon failure.
4231 */
4232static int
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004233recv_bm_rle_bits(struct drbd_peer_device *peer_device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07004234 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01004235 struct bm_xfer_ctx *c,
4236 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004237{
4238 struct bitstream bs;
4239 u64 look_ahead;
4240 u64 rl;
4241 u64 tmp;
4242 unsigned long s = c->bit_offset;
4243 unsigned long e;
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004244 int toggle = dcbp_get_start(p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004245 int have;
4246 int bits;
4247
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004248 bitstream_init(&bs, p->code, len, dcbp_get_pad_bits(p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004249
4250 bits = bitstream_get_bits(&bs, &look_ahead, 64);
4251 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004252 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004253
4254 for (have = bits; have > 0; s += rl, toggle = !toggle) {
4255 bits = vli_decode_bits(&rl, look_ahead);
4256 if (bits <= 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004257 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004258
4259 if (toggle) {
4260 e = s + rl -1;
4261 if (e >= c->bm_bits) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004262 drbd_err(peer_device, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004263 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004264 }
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004265 _drbd_bm_set_bits(peer_device->device, s, e);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004266 }
4267
4268 if (have < bits) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004269 drbd_err(peer_device, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07004270 have, bits, look_ahead,
4271 (unsigned int)(bs.cur.b - p->code),
4272 (unsigned int)bs.buf_len);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004273 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004274 }
Lars Ellenbergd2da5b02013-10-23 10:59:18 +02004275 /* if we consumed all 64 bits, assign 0; >> 64 is "undefined"; */
4276 if (likely(bits < 64))
4277 look_ahead >>= bits;
4278 else
4279 look_ahead = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004280 have -= bits;
4281
4282 bits = bitstream_get_bits(&bs, &tmp, 64 - have);
4283 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004284 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004285 look_ahead |= tmp << have;
4286 have += bits;
4287 }
4288
4289 c->bit_offset = s;
4290 bm_xfer_ctx_bit_to_word_offset(c);
4291
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004292 return (s != c->bm_bits);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004293}
4294
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004295/**
4296 * decode_bitmap_c
4297 *
4298 * Return 0 when done, 1 when another iteration is needed, and a negative error
4299 * code upon failure.
4300 */
4301static int
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004302decode_bitmap_c(struct drbd_peer_device *peer_device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07004303 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01004304 struct bm_xfer_ctx *c,
4305 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004306{
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004307 if (dcbp_get_code(p) == RLE_VLI_Bits)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004308 return recv_bm_rle_bits(peer_device, p, c, len - sizeof(*p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004309
4310 /* other variants had been implemented for evaluation,
4311 * but have been dropped as this one turned out to be "best"
4312 * during all our tests. */
4313
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004314 drbd_err(peer_device, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
4315 conn_request_state(peer_device->connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004316 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004317}
4318
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004319void INFO_bm_xfer_stats(struct drbd_device *device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07004320 const char *direction, struct bm_xfer_ctx *c)
4321{
4322 /* what would it take to transfer it "plaintext" */
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02004323 unsigned int header_size = drbd_header_size(first_peer_device(device)->connection);
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004324 unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE - header_size;
4325 unsigned int plain =
4326 header_size * (DIV_ROUND_UP(c->bm_words, data_size) + 1) +
4327 c->bm_words * sizeof(unsigned long);
4328 unsigned int total = c->bytes[0] + c->bytes[1];
4329 unsigned int r;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004330
4331 /* total can not be zero. but just in case: */
4332 if (total == 0)
4333 return;
4334
4335 /* don't report if not compressed */
4336 if (total >= plain)
4337 return;
4338
4339 /* total < plain. check for overflow, still */
4340 r = (total > UINT_MAX/1000) ? (total / (plain/1000))
4341 : (1000 * total / plain);
4342
4343 if (r > 1000)
4344 r = 1000;
4345
4346 r = 1000 - r;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004347 drbd_info(device, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
Philipp Reisnerb411b362009-09-25 16:07:19 -07004348 "total %u; compression: %u.%u%%\n",
4349 direction,
4350 c->bytes[1], c->packets[1],
4351 c->bytes[0], c->packets[0],
4352 total, r/10, r % 10);
4353}
4354
4355/* Since we are processing the bitfield from lower addresses to higher,
4356 it does not matter if the process it in 32 bit chunks or 64 bit
4357 chunks as long as it is little endian. (Understand it as byte stream,
4358 beginning with the lowest byte...) If we would use big endian
4359 we would need to process it from the highest address to the lowest,
4360 in order to be agnostic to the 32 vs 64 bits issue.
4361
4362 returns 0 on failure, 1 if we successfully received it. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004363static int receive_bitmap(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004364{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004365 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004366 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004367 struct bm_xfer_ctx c;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004368 int err;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004369
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004370 peer_device = conn_peer_device(connection, pi->vnr);
4371 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004372 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004373 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004374
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004375 drbd_bm_lock(device, "receive bitmap", BM_LOCKED_SET_ALLOWED);
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004376 /* you are supposed to send additional out-of-sync information
4377 * if you actually set bits during this phase */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004378
Philipp Reisnerb411b362009-09-25 16:07:19 -07004379 c = (struct bm_xfer_ctx) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004380 .bm_bits = drbd_bm_bits(device),
4381 .bm_words = drbd_bm_words(device),
Philipp Reisnerb411b362009-09-25 16:07:19 -07004382 };
4383
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004384 for(;;) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004385 if (pi->cmd == P_BITMAP)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004386 err = receive_bitmap_plain(peer_device, pi->size, pi->data, &c);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004387 else if (pi->cmd == P_COMPRESSED_BITMAP) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004388 /* MAYBE: sanity check that we speak proto >= 90,
4389 * and the feature is enabled! */
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004390 struct p_compressed_bm *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004391
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004392 if (pi->size > DRBD_SOCKET_BUFFER_SIZE - drbd_header_size(connection)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004393 drbd_err(device, "ReportCBitmap packet too large\n");
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004394 err = -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004395 goto out;
4396 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004397 if (pi->size <= sizeof(*p)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004398 drbd_err(device, "ReportCBitmap packet too small (l:%u)\n", pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004399 err = -EIO;
Andreas Gruenbacher78fcbda2010-12-10 22:18:27 +01004400 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004401 }
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004402 err = drbd_recv_all(peer_device->connection, p, pi->size);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004403 if (err)
4404 goto out;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004405 err = decode_bitmap_c(peer_device, p, &c, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004406 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004407 drbd_warn(device, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", pi->cmd);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004408 err = -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004409 goto out;
4410 }
4411
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004412 c.packets[pi->cmd == P_BITMAP]++;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004413 c.bytes[pi->cmd == P_BITMAP] += drbd_header_size(connection) + pi->size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004414
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004415 if (err <= 0) {
4416 if (err < 0)
4417 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004418 break;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004419 }
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004420 err = drbd_recv_header(peer_device->connection, pi);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004421 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004422 goto out;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004423 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004424
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004425 INFO_bm_xfer_stats(device, "receive", &c);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004426
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004427 if (device->state.conn == C_WF_BITMAP_T) {
Andreas Gruenbacherde1f8e42010-12-10 21:04:00 +01004428 enum drbd_state_rv rv;
4429
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004430 err = drbd_send_bitmap(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004431 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004432 goto out;
4433 /* Omit CS_ORDERED with this state transition to avoid deadlocks. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004434 rv = _drbd_request_state(device, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02004435 D_ASSERT(device, rv == SS_SUCCESS);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004436 } else if (device->state.conn != C_WF_BITMAP_S) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004437 /* admin may have requested C_DISCONNECTING,
4438 * other threads may have noticed network errors */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004439 drbd_info(device, "unexpected cstate (%s) in receive_bitmap\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004440 drbd_conn_str(device->state.conn));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004441 }
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004442 err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004443
Philipp Reisnerb411b362009-09-25 16:07:19 -07004444 out:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004445 drbd_bm_unlock(device);
4446 if (!err && device->state.conn == C_WF_BITMAP_S)
4447 drbd_start_resync(device, C_SYNC_SOURCE);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004448 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004449}
4450
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004451static int receive_skip(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004452{
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004453 drbd_warn(connection, "skipping unknown optional packet type %d, l: %d!\n",
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004454 pi->cmd, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004455
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004456 return ignore_remaining_packet(connection, pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004457}
4458
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004459static int receive_UnplugRemote(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004460{
Philipp Reisnerb411b362009-09-25 16:07:19 -07004461 /* Make sure we've acked all the TCP data associated
4462 * with the data requests being unplugged */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004463 drbd_tcp_quickack(connection->data.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004464
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004465 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004466}
4467
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004468static int receive_out_of_sync(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisner73a01a12010-10-27 14:33:00 +02004469{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004470 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004471 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004472 struct p_block_desc *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004473
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004474 peer_device = conn_peer_device(connection, pi->vnr);
4475 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004476 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004477 device = peer_device->device;
Philipp Reisner73a01a12010-10-27 14:33:00 +02004478
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004479 switch (device->state.conn) {
Lars Ellenbergf735e3632010-12-17 21:06:18 +01004480 case C_WF_SYNC_UUID:
4481 case C_WF_BITMAP_T:
4482 case C_BEHIND:
4483 break;
4484 default:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004485 drbd_err(device, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004486 drbd_conn_str(device->state.conn));
Lars Ellenbergf735e3632010-12-17 21:06:18 +01004487 }
4488
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004489 drbd_set_out_of_sync(device, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
Philipp Reisner73a01a12010-10-27 14:33:00 +02004490
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004491 return 0;
Philipp Reisner73a01a12010-10-27 14:33:00 +02004492}
4493
Philipp Reisner02918be2010-08-20 14:35:10 +02004494struct data_cmd {
4495 int expect_payload;
4496 size_t pkt_size;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004497 int (*fn)(struct drbd_connection *, struct packet_info *);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004498};
4499
Philipp Reisner02918be2010-08-20 14:35:10 +02004500static struct data_cmd drbd_cmd_handler[] = {
4501 [P_DATA] = { 1, sizeof(struct p_data), receive_Data },
4502 [P_DATA_REPLY] = { 1, sizeof(struct p_data), receive_DataReply },
4503 [P_RS_DATA_REPLY] = { 1, sizeof(struct p_data), receive_RSDataReply } ,
4504 [P_BARRIER] = { 0, sizeof(struct p_barrier), receive_Barrier } ,
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004505 [P_BITMAP] = { 1, 0, receive_bitmap } ,
4506 [P_COMPRESSED_BITMAP] = { 1, 0, receive_bitmap } ,
4507 [P_UNPLUG_REMOTE] = { 0, 0, receive_UnplugRemote },
Philipp Reisner02918be2010-08-20 14:35:10 +02004508 [P_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
4509 [P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004510 [P_SYNC_PARAM] = { 1, 0, receive_SyncParam },
4511 [P_SYNC_PARAM89] = { 1, 0, receive_SyncParam },
Philipp Reisner02918be2010-08-20 14:35:10 +02004512 [P_PROTOCOL] = { 1, sizeof(struct p_protocol), receive_protocol },
4513 [P_UUIDS] = { 0, sizeof(struct p_uuids), receive_uuids },
4514 [P_SIZES] = { 0, sizeof(struct p_sizes), receive_sizes },
4515 [P_STATE] = { 0, sizeof(struct p_state), receive_state },
4516 [P_STATE_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_state },
4517 [P_SYNC_UUID] = { 0, sizeof(struct p_rs_uuid), receive_sync_uuid },
4518 [P_OV_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
4519 [P_OV_REPLY] = { 1, sizeof(struct p_block_req), receive_DataRequest },
4520 [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest },
4521 [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip },
Philipp Reisner73a01a12010-10-27 14:33:00 +02004522 [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004523 [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_conn_state },
Philipp Reisner036b17e2011-05-16 17:38:11 +02004524 [P_PROTOCOL_UPDATE] = { 1, sizeof(struct p_protocol), receive_protocol },
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02004525 [P_TRIM] = { 0, sizeof(struct p_trim), receive_Data },
Philipp Reisner02918be2010-08-20 14:35:10 +02004526};
4527
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004528static void drbdd(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004529{
Philipp Reisner77351055b2011-02-07 17:24:26 +01004530 struct packet_info pi;
Philipp Reisner02918be2010-08-20 14:35:10 +02004531 size_t shs; /* sub header size */
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004532 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004533
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004534 while (get_t_state(&connection->receiver) == RUNNING) {
Andreas Gruenbacherdeebe192011-03-25 00:01:04 +01004535 struct data_cmd *cmd;
4536
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004537 drbd_thread_current_set_cpu(&connection->receiver);
4538 if (drbd_recv_header(connection, &pi))
Philipp Reisner02918be2010-08-20 14:35:10 +02004539 goto err_out;
4540
Andreas Gruenbacherdeebe192011-03-25 00:01:04 +01004541 cmd = &drbd_cmd_handler[pi.cmd];
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004542 if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) || !cmd->fn)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004543 drbd_err(connection, "Unexpected data packet %s (0x%04x)",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004544 cmdname(pi.cmd), pi.cmd);
Philipp Reisner02918be2010-08-20 14:35:10 +02004545 goto err_out;
Lars Ellenberg0b33a912009-11-16 15:58:04 +01004546 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004547
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004548 shs = cmd->pkt_size;
4549 if (pi.size > shs && !cmd->expect_payload) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004550 drbd_err(connection, "No payload expected %s l:%d\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004551 cmdname(pi.cmd), pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02004552 goto err_out;
4553 }
4554
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02004555 if (shs) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004556 err = drbd_recv_all_warn(connection, pi.data, shs);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004557 if (err)
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02004558 goto err_out;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004559 pi.size -= shs;
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02004560 }
4561
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004562 err = cmd->fn(connection, &pi);
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004563 if (err) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004564 drbd_err(connection, "error receiving %s, e: %d l: %d!\n",
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004565 cmdname(pi.cmd), err, pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02004566 goto err_out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004567 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004568 }
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004569 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004570
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004571 err_out:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004572 conn_request_state(connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004573}
4574
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004575static void conn_disconnect(struct drbd_connection *connection)
Philipp Reisnerf70b35112010-06-24 14:34:40 +02004576{
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02004577 struct drbd_peer_device *peer_device;
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004578 enum drbd_conns oc;
Philipp Reisner376694a2011-11-07 10:54:28 +01004579 int vnr;
Philipp Reisnerf70b35112010-06-24 14:34:40 +02004580
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004581 if (connection->cstate == C_STANDALONE)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004582 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004583
Lars Ellenberg545752d2011-12-05 14:39:25 +01004584 /* We are about to start the cleanup after connection loss.
4585 * Make sure drbd_make_request knows about that.
4586 * Usually we should be in some network failure state already,
4587 * but just in case we are not, we fix it up here.
4588 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004589 conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
Lars Ellenberg545752d2011-12-05 14:39:25 +01004590
Philipp Reisnerb411b362009-09-25 16:07:19 -07004591 /* asender does not clean up anything. it must not interfere, either */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004592 drbd_thread_stop(&connection->asender);
4593 drbd_free_sock(connection);
Philipp Reisner360cc742011-02-08 14:29:53 +01004594
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02004595 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02004596 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
4597 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004598 kref_get(&device->kref);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02004599 rcu_read_unlock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004600 drbd_disconnected(peer_device);
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02004601 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02004602 rcu_read_lock();
4603 }
4604 rcu_read_unlock();
4605
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004606 if (!list_empty(&connection->current_epoch->list))
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004607 drbd_err(connection, "ASSERTION FAILED: connection->current_epoch->list not empty\n");
Philipp Reisner12038a32011-11-09 19:18:00 +01004608 /* ok, no more ee's on the fly, it is safe to reset the epoch_size */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004609 atomic_set(&connection->current_epoch->epoch_size, 0);
4610 connection->send.seen_any_write_yet = false;
Philipp Reisner12038a32011-11-09 19:18:00 +01004611
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004612 drbd_info(connection, "Connection closed\n");
Philipp Reisner360cc742011-02-08 14:29:53 +01004613
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004614 if (conn_highest_role(connection) == R_PRIMARY && conn_highest_pdsk(connection) >= D_UNKNOWN)
4615 conn_try_outdate_peer_async(connection);
Philipp Reisnercb703452011-03-24 11:03:07 +01004616
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004617 spin_lock_irq(&connection->resource->req_lock);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004618 oc = connection->cstate;
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004619 if (oc >= C_UNCONNECTED)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004620 _conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE);
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004621
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004622 spin_unlock_irq(&connection->resource->req_lock);
Philipp Reisner360cc742011-02-08 14:29:53 +01004623
Lars Ellenbergf3dfa402011-05-02 10:45:05 +02004624 if (oc == C_DISCONNECTING)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004625 conn_request_state(connection, NS(conn, C_STANDALONE), CS_VERBOSE | CS_HARD);
Philipp Reisner360cc742011-02-08 14:29:53 +01004626}
4627
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004628static int drbd_disconnected(struct drbd_peer_device *peer_device)
Philipp Reisner360cc742011-02-08 14:29:53 +01004629{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004630 struct drbd_device *device = peer_device->device;
Philipp Reisner360cc742011-02-08 14:29:53 +01004631 unsigned int i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004632
Philipp Reisner85719572010-07-21 10:20:17 +02004633 /* wait for current activity to cease. */
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004634 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004635 _drbd_wait_ee_list_empty(device, &device->active_ee);
4636 _drbd_wait_ee_list_empty(device, &device->sync_ee);
4637 _drbd_wait_ee_list_empty(device, &device->read_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004638 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004639
4640 /* We do not have data structures that would allow us to
4641 * get the rs_pending_cnt down to 0 again.
4642 * * On C_SYNC_TARGET we do not have any data structures describing
4643 * the pending RSDataRequest's we have sent.
4644 * * On C_SYNC_SOURCE there is no data structure that tracks
4645 * the P_RS_DATA_REPLY blocks that we sent to the SyncTarget.
4646 * And no, it is not the sum of the reference counts in the
4647 * resync_LRU. The resync_LRU tracks the whole operation including
4648 * the disk-IO, while the rs_pending_cnt only tracks the blocks
4649 * on the fly. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004650 drbd_rs_cancel_all(device);
4651 device->rs_total = 0;
4652 device->rs_failed = 0;
4653 atomic_set(&device->rs_pending_cnt, 0);
4654 wake_up(&device->misc_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004655
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004656 del_timer_sync(&device->resync_timer);
4657 resync_timer_fn((unsigned long)device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004658
Philipp Reisnerb411b362009-09-25 16:07:19 -07004659 /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
4660 * w_make_resync_request etc. which may still be on the worker queue
4661 * to be "canceled" */
Andreas Gruenbacherb5043c52011-07-28 15:56:02 +02004662 drbd_flush_workqueue(&peer_device->connection->sender_work);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004663
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004664 drbd_finish_peer_reqs(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004665
Philipp Reisnerd10b4ea2011-11-30 23:25:36 +01004666 /* This second workqueue flush is necessary, since drbd_finish_peer_reqs()
4667 might have issued a work again. The one before drbd_finish_peer_reqs() is
4668 necessary to reclain net_ee in drbd_finish_peer_reqs(). */
Andreas Gruenbacherb5043c52011-07-28 15:56:02 +02004669 drbd_flush_workqueue(&peer_device->connection->sender_work);
Philipp Reisnerd10b4ea2011-11-30 23:25:36 +01004670
Lars Ellenberg08332d72012-08-17 15:09:13 +02004671 /* need to do it again, drbd_finish_peer_reqs() may have populated it
4672 * again via drbd_try_clear_on_disk_bm(). */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004673 drbd_rs_cancel_all(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004674
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004675 kfree(device->p_uuid);
4676 device->p_uuid = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004677
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004678 if (!drbd_suspended(device))
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004679 tl_clear(peer_device->connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004680
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004681 drbd_md_sync(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004682
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004683 /* serialize with bitmap writeout triggered by the state change,
4684 * if any. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004685 wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004686
Philipp Reisnerb411b362009-09-25 16:07:19 -07004687 /* tcp_close and release of sendpage pages can be deferred. I don't
4688 * want to use SO_LINGER, because apparently it can be deferred for
4689 * more than 20 seconds (longest time I checked).
4690 *
4691 * Actually we don't care for exactly when the network stack does its
4692 * put_page(), but release our reference on these pages right here.
4693 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004694 i = drbd_free_peer_reqs(device, &device->net_ee);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004695 if (i)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004696 drbd_info(device, "net_ee not empty, killed %u entries\n", i);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004697 i = atomic_read(&device->pp_in_use_by_net);
Lars Ellenberg435f0742010-09-06 12:30:25 +02004698 if (i)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004699 drbd_info(device, "pp_in_use_by_net = %d, expected 0\n", i);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004700 i = atomic_read(&device->pp_in_use);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004701 if (i)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004702 drbd_info(device, "pp_in_use = %d, expected 0\n", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004703
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02004704 D_ASSERT(device, list_empty(&device->read_ee));
4705 D_ASSERT(device, list_empty(&device->active_ee));
4706 D_ASSERT(device, list_empty(&device->sync_ee));
4707 D_ASSERT(device, list_empty(&device->done_ee));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004708
Philipp Reisner360cc742011-02-08 14:29:53 +01004709 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004710}
4711
4712/*
4713 * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version
4714 * we can agree on is stored in agreed_pro_version.
4715 *
4716 * feature flags and the reserved array should be enough room for future
4717 * enhancements of the handshake protocol, and possible plugins...
4718 *
4719 * for now, they are expected to be zero, but ignored.
4720 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004721static int drbd_send_features(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004722{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004723 struct drbd_socket *sock;
4724 struct p_connection_features *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004725
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004726 sock = &connection->data;
4727 p = conn_prepare_command(connection, sock);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004728 if (!p)
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004729 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004730 memset(p, 0, sizeof(*p));
4731 p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
4732 p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
Lars Ellenberg20c68fd2014-04-28 18:43:25 +02004733 p->feature_flags = cpu_to_be32(PRO_FEATURES);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004734 return conn_send_command(connection, sock, P_CONNECTION_FEATURES, sizeof(*p), NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004735}
4736
4737/*
4738 * return values:
4739 * 1 yes, we have a valid connection
4740 * 0 oops, did not work out, please try again
4741 * -1 peer talks different language,
4742 * no point in trying again, please go standalone.
4743 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004744static int drbd_do_features(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004745{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004746 /* ASSERT current == connection->receiver ... */
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004747 struct p_connection_features *p;
4748 const int expect = sizeof(struct p_connection_features);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004749 struct packet_info pi;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004750 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004751
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004752 err = drbd_send_features(connection);
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004753 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004754 return 0;
4755
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004756 err = drbd_recv_header(connection, &pi);
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004757 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004758 return 0;
4759
Andreas Gruenbacher60381782011-03-28 17:05:50 +02004760 if (pi.cmd != P_CONNECTION_FEATURES) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004761 drbd_err(connection, "expected ConnectionFeatures packet, received: %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004762 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004763 return -1;
4764 }
4765
Philipp Reisner77351055b2011-02-07 17:24:26 +01004766 if (pi.size != expect) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004767 drbd_err(connection, "expected ConnectionFeatures length: %u, received: %u\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004768 expect, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004769 return -1;
4770 }
4771
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004772 p = pi.data;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004773 err = drbd_recv_all_warn(connection, p, expect);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004774 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004775 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004776
Philipp Reisnerb411b362009-09-25 16:07:19 -07004777 p->protocol_min = be32_to_cpu(p->protocol_min);
4778 p->protocol_max = be32_to_cpu(p->protocol_max);
4779 if (p->protocol_max == 0)
4780 p->protocol_max = p->protocol_min;
4781
4782 if (PRO_VERSION_MAX < p->protocol_min ||
4783 PRO_VERSION_MIN > p->protocol_max)
4784 goto incompat;
4785
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004786 connection->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
Lars Ellenberg20c68fd2014-04-28 18:43:25 +02004787 connection->agreed_features = PRO_FEATURES & be32_to_cpu(p->feature_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004788
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004789 drbd_info(connection, "Handshake successful: "
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004790 "Agreed network protocol version %d\n", connection->agreed_pro_version);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004791
Lars Ellenberg20c68fd2014-04-28 18:43:25 +02004792 drbd_info(connection, "Agreed to%ssupport TRIM on protocol level\n",
4793 connection->agreed_features & FF_TRIM ? " " : " not ");
4794
Philipp Reisnerb411b362009-09-25 16:07:19 -07004795 return 1;
4796
4797 incompat:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004798 drbd_err(connection, "incompatible DRBD dialects: "
Philipp Reisnerb411b362009-09-25 16:07:19 -07004799 "I support %d-%d, peer supports %d-%d\n",
4800 PRO_VERSION_MIN, PRO_VERSION_MAX,
4801 p->protocol_min, p->protocol_max);
4802 return -1;
4803}
4804
4805#if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004806static int drbd_do_auth(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004807{
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004808 drbd_err(connection, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
4809 drbd_err(connection, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004810 return -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004811}
4812#else
4813#define CHALLENGE_LEN 64
Johannes Thomab10d96c2010-01-07 16:02:50 +01004814
4815/* Return value:
4816 1 - auth succeeded,
4817 0 - failed, try again (network error),
4818 -1 - auth failed, don't try again.
4819*/
4820
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004821static int drbd_do_auth(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004822{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004823 struct drbd_socket *sock;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004824 char my_challenge[CHALLENGE_LEN]; /* 64 Bytes... */
4825 struct scatterlist sg;
4826 char *response = NULL;
4827 char *right_response = NULL;
4828 char *peers_ch = NULL;
Philipp Reisner44ed1672011-04-19 17:10:19 +02004829 unsigned int key_len;
4830 char secret[SHARED_SECRET_MAX]; /* 64 byte */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004831 unsigned int resp_size;
4832 struct hash_desc desc;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004833 struct packet_info pi;
Philipp Reisner44ed1672011-04-19 17:10:19 +02004834 struct net_conf *nc;
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004835 int err, rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004836
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004837 /* FIXME: Put the challenge/response into the preallocated socket buffer. */
4838
Philipp Reisner44ed1672011-04-19 17:10:19 +02004839 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004840 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +02004841 key_len = strlen(nc->shared_secret);
4842 memcpy(secret, nc->shared_secret, key_len);
4843 rcu_read_unlock();
4844
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004845 desc.tfm = connection->cram_hmac_tfm;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004846 desc.flags = 0;
4847
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004848 rv = crypto_hash_setkey(connection->cram_hmac_tfm, (u8 *)secret, key_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004849 if (rv) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004850 drbd_err(connection, "crypto_hash_setkey() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004851 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004852 goto fail;
4853 }
4854
4855 get_random_bytes(my_challenge, CHALLENGE_LEN);
4856
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004857 sock = &connection->data;
4858 if (!conn_prepare_command(connection, sock)) {
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004859 rv = 0;
4860 goto fail;
4861 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004862 rv = !conn_send_command(connection, sock, P_AUTH_CHALLENGE, 0,
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004863 my_challenge, CHALLENGE_LEN);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004864 if (!rv)
4865 goto fail;
4866
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004867 err = drbd_recv_header(connection, &pi);
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004868 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004869 rv = 0;
4870 goto fail;
4871 }
4872
Philipp Reisner77351055b2011-02-07 17:24:26 +01004873 if (pi.cmd != P_AUTH_CHALLENGE) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004874 drbd_err(connection, "expected AuthChallenge packet, received: %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004875 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004876 rv = 0;
4877 goto fail;
4878 }
4879
Philipp Reisner77351055b2011-02-07 17:24:26 +01004880 if (pi.size > CHALLENGE_LEN * 2) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004881 drbd_err(connection, "expected AuthChallenge payload too big.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004882 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004883 goto fail;
4884 }
4885
Philipp Reisner67cca282014-04-28 18:43:30 +02004886 if (pi.size < CHALLENGE_LEN) {
4887 drbd_err(connection, "AuthChallenge payload too small.\n");
4888 rv = -1;
4889 goto fail;
4890 }
4891
Philipp Reisner77351055b2011-02-07 17:24:26 +01004892 peers_ch = kmalloc(pi.size, GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004893 if (peers_ch == NULL) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004894 drbd_err(connection, "kmalloc of peers_ch failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004895 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004896 goto fail;
4897 }
4898
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004899 err = drbd_recv_all_warn(connection, peers_ch, pi.size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004900 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004901 rv = 0;
4902 goto fail;
4903 }
4904
Philipp Reisner67cca282014-04-28 18:43:30 +02004905 if (!memcmp(my_challenge, peers_ch, CHALLENGE_LEN)) {
4906 drbd_err(connection, "Peer presented the same challenge!\n");
4907 rv = -1;
4908 goto fail;
4909 }
4910
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004911 resp_size = crypto_hash_digestsize(connection->cram_hmac_tfm);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004912 response = kmalloc(resp_size, GFP_NOIO);
4913 if (response == NULL) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004914 drbd_err(connection, "kmalloc of response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004915 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004916 goto fail;
4917 }
4918
4919 sg_init_table(&sg, 1);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004920 sg_set_buf(&sg, peers_ch, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004921
4922 rv = crypto_hash_digest(&desc, &sg, sg.length, response);
4923 if (rv) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004924 drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004925 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004926 goto fail;
4927 }
4928
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004929 if (!conn_prepare_command(connection, sock)) {
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004930 rv = 0;
4931 goto fail;
4932 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004933 rv = !conn_send_command(connection, sock, P_AUTH_RESPONSE, 0,
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004934 response, resp_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004935 if (!rv)
4936 goto fail;
4937
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004938 err = drbd_recv_header(connection, &pi);
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004939 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004940 rv = 0;
4941 goto fail;
4942 }
4943
Philipp Reisner77351055b2011-02-07 17:24:26 +01004944 if (pi.cmd != P_AUTH_RESPONSE) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004945 drbd_err(connection, "expected AuthResponse packet, received: %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004946 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004947 rv = 0;
4948 goto fail;
4949 }
4950
Philipp Reisner77351055b2011-02-07 17:24:26 +01004951 if (pi.size != resp_size) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004952 drbd_err(connection, "expected AuthResponse payload of wrong size\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004953 rv = 0;
4954 goto fail;
4955 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004956
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004957 err = drbd_recv_all_warn(connection, response , resp_size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004958 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004959 rv = 0;
4960 goto fail;
4961 }
4962
4963 right_response = kmalloc(resp_size, GFP_NOIO);
Julia Lawall2d1ee872009-12-27 22:27:11 +01004964 if (right_response == NULL) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004965 drbd_err(connection, "kmalloc of right_response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004966 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004967 goto fail;
4968 }
4969
4970 sg_set_buf(&sg, my_challenge, CHALLENGE_LEN);
4971
4972 rv = crypto_hash_digest(&desc, &sg, sg.length, right_response);
4973 if (rv) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004974 drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004975 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004976 goto fail;
4977 }
4978
4979 rv = !memcmp(response, right_response, resp_size);
4980
4981 if (rv)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004982 drbd_info(connection, "Peer authenticated using %d bytes HMAC\n",
Philipp Reisner44ed1672011-04-19 17:10:19 +02004983 resp_size);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004984 else
4985 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004986
4987 fail:
4988 kfree(peers_ch);
4989 kfree(response);
4990 kfree(right_response);
4991
4992 return rv;
4993}
4994#endif
4995
Andreas Gruenbacher8fe60552011-07-22 11:04:36 +02004996int drbd_receiver(struct drbd_thread *thi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004997{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004998 struct drbd_connection *connection = thi->connection;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004999 int h;
5000
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005001 drbd_info(connection, "receiver (re)started\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005002
5003 do {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005004 h = conn_connect(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005005 if (h == 0) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005006 conn_disconnect(connection);
Philipp Reisner20ee6392011-01-18 15:28:59 +01005007 schedule_timeout_interruptible(HZ);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005008 }
5009 if (h == -1) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005010 drbd_warn(connection, "Discarding network configuration.\n");
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005011 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005012 }
5013 } while (h == 0);
5014
Philipp Reisner91fd4da2011-04-20 17:47:29 +02005015 if (h > 0)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005016 drbdd(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005017
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005018 conn_disconnect(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005019
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005020 drbd_info(connection, "receiver terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005021 return 0;
5022}
5023
5024/* ********* acknowledge sender ******** */
5025
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005026static int got_conn_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005027{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005028 struct p_req_state_reply *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005029 int retcode = be32_to_cpu(p->retcode);
5030
5031 if (retcode >= SS_SUCCESS) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005032 set_bit(CONN_WD_ST_CHG_OKAY, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005033 } else {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005034 set_bit(CONN_WD_ST_CHG_FAIL, &connection->flags);
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005035 drbd_err(connection, "Requested state change failed by peer: %s (%d)\n",
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01005036 drbd_set_st_err_str(retcode), retcode);
5037 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005038 wake_up(&connection->ping_wait);
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01005039
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005040 return 0;
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01005041}
5042
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005043static int got_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005044{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005045 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005046 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005047 struct p_req_state_reply *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005048 int retcode = be32_to_cpu(p->retcode);
5049
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005050 peer_device = conn_peer_device(connection, pi->vnr);
5051 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005052 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005053 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005054
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005055 if (test_bit(CONN_WD_ST_CHG_REQ, &connection->flags)) {
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02005056 D_ASSERT(device, connection->agreed_pro_version < 100);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005057 return got_conn_RqSReply(connection, pi);
Philipp Reisner4d0fc3f2012-01-20 13:52:27 +01005058 }
5059
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01005060 if (retcode >= SS_SUCCESS) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005061 set_bit(CL_ST_CHG_SUCCESS, &device->flags);
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01005062 } else {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005063 set_bit(CL_ST_CHG_FAIL, &device->flags);
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02005064 drbd_err(device, "Requested state change failed by peer: %s (%d)\n",
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01005065 drbd_set_st_err_str(retcode), retcode);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005066 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005067 wake_up(&device->state_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005068
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005069 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005070}
5071
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005072static int got_Ping(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005073{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005074 return drbd_send_ping_ack(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005075
5076}
5077
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005078static int got_PingAck(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005079{
5080 /* restore idle timeout */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005081 connection->meta.socket->sk->sk_rcvtimeo = connection->net_conf->ping_int*HZ;
5082 if (!test_and_set_bit(GOT_PING_ACK, &connection->flags))
5083 wake_up(&connection->ping_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005084
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005085 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005086}
5087
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005088static int got_IsInSync(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005089{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005090 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005091 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005092 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005093 sector_t sector = be64_to_cpu(p->sector);
5094 int blksize = be32_to_cpu(p->blksize);
5095
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005096 peer_device = conn_peer_device(connection, pi->vnr);
5097 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005098 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005099 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005100
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005101 D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005102
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005103 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005104
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005105 if (get_ldev(device)) {
5106 drbd_rs_complete_io(device, sector);
5107 drbd_set_in_sync(device, sector, blksize);
Lars Ellenberg1d53f092010-09-05 01:13:24 +02005108 /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005109 device->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
5110 put_ldev(device);
Lars Ellenberg1d53f092010-09-05 01:13:24 +02005111 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005112 dec_rs_pending(device);
5113 atomic_add(blksize >> 9, &device->rs_sect_in);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005114
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005115 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005116}
5117
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01005118static int
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005119validate_req_change_req_state(struct drbd_device *device, u64 id, sector_t sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01005120 struct rb_root *root, const char *func,
5121 enum drbd_req_event what, bool missing_ok)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005122{
5123 struct drbd_request *req;
5124 struct bio_and_error m;
5125
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005126 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005127 req = find_request(device, root, id, sector, missing_ok, func);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005128 if (unlikely(!req)) {
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005129 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005130 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005131 }
5132 __req_mod(req, what, &m);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005133 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005134
5135 if (m.bio)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005136 complete_master_bio(device, &m);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005137 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005138}
5139
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005140static int got_BlockAck(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005141{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005142 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005143 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005144 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005145 sector_t sector = be64_to_cpu(p->sector);
5146 int blksize = be32_to_cpu(p->blksize);
5147 enum drbd_req_event what;
5148
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005149 peer_device = conn_peer_device(connection, pi->vnr);
5150 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005151 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005152 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005153
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005154 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005155
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01005156 if (p->block_id == ID_SYNCER) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005157 drbd_set_in_sync(device, sector, blksize);
5158 dec_rs_pending(device);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005159 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005160 }
Andreas Gruenbachere05e1e52011-03-25 15:16:26 +01005161 switch (pi->cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07005162 case P_RS_WRITE_ACK:
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01005163 what = WRITE_ACKED_BY_PEER_AND_SIS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005164 break;
5165 case P_WRITE_ACK:
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01005166 what = WRITE_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005167 break;
5168 case P_RECV_ACK:
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01005169 what = RECV_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005170 break;
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02005171 case P_SUPERSEDED:
5172 what = CONFLICT_RESOLVED;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01005173 break;
5174 case P_RETRY_WRITE:
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01005175 what = POSTPONE_WRITE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005176 break;
5177 default:
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005178 BUG();
Philipp Reisnerb411b362009-09-25 16:07:19 -07005179 }
5180
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005181 return validate_req_change_req_state(device, p->block_id, sector,
5182 &device->write_requests, __func__,
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005183 what, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005184}
5185
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005186static int got_NegAck(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005187{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005188 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005189 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005190 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005191 sector_t sector = be64_to_cpu(p->sector);
Philipp Reisner2deb8332011-01-17 18:39:18 +01005192 int size = be32_to_cpu(p->blksize);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005193 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005194
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005195 peer_device = conn_peer_device(connection, pi->vnr);
5196 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005197 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005198 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005199
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005200 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005201
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01005202 if (p->block_id == ID_SYNCER) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005203 dec_rs_pending(device);
5204 drbd_rs_failed_io(device, sector, size);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005205 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005206 }
Philipp Reisner2deb8332011-01-17 18:39:18 +01005207
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005208 err = validate_req_change_req_state(device, p->block_id, sector,
5209 &device->write_requests, __func__,
Philipp Reisner303d1442011-04-13 16:24:47 -07005210 NEG_ACKED, true);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005211 if (err) {
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01005212 /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs.
5213 The master bio might already be completed, therefore the
5214 request is no longer in the collision hash. */
5215 /* In Protocol B we might already have got a P_RECV_ACK
5216 but then get a P_NEG_ACK afterwards. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005217 drbd_set_out_of_sync(device, sector, size);
Philipp Reisner2deb8332011-01-17 18:39:18 +01005218 }
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005219 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005220}
5221
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005222static int got_NegDReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005223{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005224 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005225 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005226 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005227 sector_t sector = be64_to_cpu(p->sector);
5228
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005229 peer_device = conn_peer_device(connection, pi->vnr);
5230 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005231 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005232 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005233
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005234 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01005235
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02005236 drbd_err(device, "Got NegDReply; Sector %llus, len %u.\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07005237 (unsigned long long)sector, be32_to_cpu(p->blksize));
5238
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005239 return validate_req_change_req_state(device, p->block_id, sector,
5240 &device->read_requests, __func__,
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005241 NEG_ACKED, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005242}
5243
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005244static int got_NegRSDReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005245{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005246 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005247 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005248 sector_t sector;
5249 int size;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005250 struct p_block_ack *p = pi->data;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005251
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005252 peer_device = conn_peer_device(connection, pi->vnr);
5253 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005254 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005255 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005256
5257 sector = be64_to_cpu(p->sector);
5258 size = be32_to_cpu(p->blksize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005259
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005260 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005261
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005262 dec_rs_pending(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005263
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005264 if (get_ldev_if_state(device, D_FAILED)) {
5265 drbd_rs_complete_io(device, sector);
Andreas Gruenbachere05e1e52011-03-25 15:16:26 +01005266 switch (pi->cmd) {
Philipp Reisnerd612d302010-12-27 10:53:28 +01005267 case P_NEG_RS_DREPLY:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005268 drbd_rs_failed_io(device, sector, size);
Philipp Reisnerd612d302010-12-27 10:53:28 +01005269 case P_RS_CANCEL:
5270 break;
5271 default:
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005272 BUG();
Philipp Reisnerd612d302010-12-27 10:53:28 +01005273 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005274 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005275 }
5276
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005277 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005278}
5279
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005280static int got_BarrierAck(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005281{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005282 struct p_barrier_ack *p = pi->data;
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005283 struct drbd_peer_device *peer_device;
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005284 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005285
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005286 tl_release(connection, p->barrier, be32_to_cpu(p->set_size));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005287
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005288 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005289 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5290 struct drbd_device *device = peer_device->device;
5291
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005292 if (device->state.conn == C_AHEAD &&
5293 atomic_read(&device->ap_in_flight) == 0 &&
5294 !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &device->flags)) {
5295 device->start_resync_timer.expires = jiffies + HZ;
5296 add_timer(&device->start_resync_timer);
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005297 }
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02005298 }
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005299 rcu_read_unlock();
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02005300
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005301 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005302}
5303
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005304static int got_OVResult(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005305{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005306 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005307 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005308 struct p_block_ack *p = pi->data;
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02005309 struct drbd_device_work *dw;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005310 sector_t sector;
5311 int size;
5312
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005313 peer_device = conn_peer_device(connection, pi->vnr);
5314 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005315 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005316 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005317
Philipp Reisnerb411b362009-09-25 16:07:19 -07005318 sector = be64_to_cpu(p->sector);
5319 size = be32_to_cpu(p->blksize);
5320
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005321 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005322
5323 if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005324 drbd_ov_out_of_sync_found(device, sector, size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005325 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005326 ov_out_of_sync_print(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005327
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005328 if (!get_ldev(device))
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005329 return 0;
Lars Ellenberg1d53f092010-09-05 01:13:24 +02005330
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005331 drbd_rs_complete_io(device, sector);
5332 dec_rs_pending(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005333
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005334 --device->ov_left;
Lars Ellenbergea5442a2010-11-05 09:48:01 +01005335
5336 /* let's advance progress step marks only for every other megabyte */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005337 if ((device->ov_left & 0x200) == 0x200)
5338 drbd_advance_rs_marks(device, device->ov_left);
Lars Ellenbergea5442a2010-11-05 09:48:01 +01005339
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005340 if (device->ov_left == 0) {
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02005341 dw = kmalloc(sizeof(*dw), GFP_NOIO);
5342 if (dw) {
5343 dw->w.cb = w_ov_finished;
5344 dw->device = device;
5345 drbd_queue_work(&peer_device->connection->sender_work, &dw->w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005346 } else {
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02005347 drbd_err(device, "kmalloc(dw) failed.");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005348 ov_out_of_sync_print(device);
5349 drbd_resync_finished(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005350 }
5351 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005352 put_ldev(device);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005353 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005354}
5355
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005356static int got_skip(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisner0ced55a2010-04-30 15:26:20 +02005357{
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005358 return 0;
Philipp Reisner0ced55a2010-04-30 15:26:20 +02005359}
5360
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005361static int connection_finish_peer_reqs(struct drbd_connection *connection)
Philipp Reisner32862ec2011-02-08 16:41:01 +01005362{
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005363 struct drbd_peer_device *peer_device;
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005364 int vnr, not_empty = 0;
Philipp Reisner32862ec2011-02-08 16:41:01 +01005365
5366 do {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005367 clear_bit(SIGNAL_ASENDER, &connection->flags);
Philipp Reisner32862ec2011-02-08 16:41:01 +01005368 flush_signals(current);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005369
5370 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005371 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5372 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005373 kref_get(&device->kref);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005374 rcu_read_unlock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005375 if (drbd_finish_peer_reqs(device)) {
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02005376 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005377 return 1;
Philipp Reisnerd3fcb492011-04-13 14:46:05 -07005378 }
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02005379 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005380 rcu_read_lock();
Philipp Reisner082a3432011-03-15 16:05:42 +01005381 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005382 set_bit(SIGNAL_ASENDER, &connection->flags);
Philipp Reisner082a3432011-03-15 16:05:42 +01005383
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005384 spin_lock_irq(&connection->resource->req_lock);
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005385 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5386 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005387 not_empty = !list_empty(&device->done_ee);
Philipp Reisner082a3432011-03-15 16:05:42 +01005388 if (not_empty)
5389 break;
5390 }
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005391 spin_unlock_irq(&connection->resource->req_lock);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005392 rcu_read_unlock();
Philipp Reisner32862ec2011-02-08 16:41:01 +01005393 } while (not_empty);
5394
5395 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005396}
5397
5398struct asender_cmd {
5399 size_t pkt_size;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005400 int (*fn)(struct drbd_connection *connection, struct packet_info *);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005401};
5402
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01005403static struct asender_cmd asender_tbl[] = {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005404 [P_PING] = { 0, got_Ping },
5405 [P_PING_ACK] = { 0, got_PingAck },
Philipp Reisnerb411b362009-09-25 16:07:19 -07005406 [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
5407 [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
5408 [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02005409 [P_SUPERSEDED] = { sizeof(struct p_block_ack), got_BlockAck },
Philipp Reisnerb411b362009-09-25 16:07:19 -07005410 [P_NEG_ACK] = { sizeof(struct p_block_ack), got_NegAck },
5411 [P_NEG_DREPLY] = { sizeof(struct p_block_ack), got_NegDReply },
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005412 [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), got_NegRSDReply },
Philipp Reisnerb411b362009-09-25 16:07:19 -07005413 [P_OV_RESULT] = { sizeof(struct p_block_ack), got_OVResult },
5414 [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck },
5415 [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
5416 [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync },
Philipp Reisner02918be2010-08-20 14:35:10 +02005417 [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip },
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005418 [P_RS_CANCEL] = { sizeof(struct p_block_ack), got_NegRSDReply },
5419 [P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), got_conn_RqSReply },
5420 [P_RETRY_WRITE] = { sizeof(struct p_block_ack), got_BlockAck },
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01005421};
Philipp Reisnerb411b362009-09-25 16:07:19 -07005422
5423int drbd_asender(struct drbd_thread *thi)
5424{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005425 struct drbd_connection *connection = thi->connection;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005426 struct asender_cmd *cmd = NULL;
Philipp Reisner77351055b2011-02-07 17:24:26 +01005427 struct packet_info pi;
Philipp Reisner257d0af2011-01-26 12:15:29 +01005428 int rv;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005429 void *buf = connection->meta.rbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005430 int received = 0;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005431 unsigned int header_size = drbd_header_size(connection);
Andreas Gruenbacher52b061a2011-03-30 11:38:49 +02005432 int expect = header_size;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005433 bool ping_timeout_active = false;
5434 struct net_conf *nc;
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005435 int ping_timeo, tcp_cork, ping_int;
Philipp Reisner3990e042013-03-27 14:08:48 +01005436 struct sched_param param = { .sched_priority = 2 };
Philipp Reisnerb411b362009-09-25 16:07:19 -07005437
Philipp Reisner3990e042013-03-27 14:08:48 +01005438 rv = sched_setscheduler(current, SCHED_RR, &param);
5439 if (rv < 0)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005440 drbd_err(connection, "drbd_asender: ERROR set priority, ret=%d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005441
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +01005442 while (get_t_state(thi) == RUNNING) {
Philipp Reisner80822282011-02-08 12:46:30 +01005443 drbd_thread_current_set_cpu(thi);
Philipp Reisner44ed1672011-04-19 17:10:19 +02005444
5445 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005446 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +02005447 ping_timeo = nc->ping_timeo;
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005448 tcp_cork = nc->tcp_cork;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005449 ping_int = nc->ping_int;
5450 rcu_read_unlock();
5451
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005452 if (test_and_clear_bit(SEND_PING, &connection->flags)) {
5453 if (drbd_send_ping(connection)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005454 drbd_err(connection, "drbd_send_ping has failed\n");
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01005455 goto reconnect;
5456 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005457 connection->meta.socket->sk->sk_rcvtimeo = ping_timeo * HZ / 10;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005458 ping_timeout_active = true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005459 }
5460
Philipp Reisner32862ec2011-02-08 16:41:01 +01005461 /* TODO: conditionally cork; it may hurt latency if we cork without
5462 much to send */
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005463 if (tcp_cork)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005464 drbd_tcp_cork(connection->meta.socket);
5465 if (connection_finish_peer_reqs(connection)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005466 drbd_err(connection, "connection_finish_peer_reqs() failed\n");
Philipp Reisner32862ec2011-02-08 16:41:01 +01005467 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005468 }
5469 /* but unconditionally uncork unless disabled */
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005470 if (tcp_cork)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005471 drbd_tcp_uncork(connection->meta.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005472
5473 /* short circuit, recv_msg would return EINTR anyways. */
5474 if (signal_pending(current))
5475 continue;
5476
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005477 rv = drbd_recv_short(connection->meta.socket, buf, expect-received, 0);
5478 clear_bit(SIGNAL_ASENDER, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005479
5480 flush_signals(current);
5481
5482 /* Note:
5483 * -EINTR (on meta) we got a signal
5484 * -EAGAIN (on meta) rcvtimeo expired
5485 * -ECONNRESET other side closed the connection
5486 * -ERESTARTSYS (on data) we got a signal
5487 * rv < 0 other than above: unexpected error!
5488 * rv == expected: full header or command
5489 * rv < expected: "woken" by signal during receive
5490 * rv == 0 : "connection shut down by peer"
5491 */
5492 if (likely(rv > 0)) {
5493 received += rv;
5494 buf += rv;
5495 } else if (rv == 0) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005496 if (test_bit(DISCONNECT_SENT, &connection->flags)) {
Philipp Reisnerb66623e2012-08-08 21:19:09 +02005497 long t;
5498 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005499 t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
Philipp Reisnerb66623e2012-08-08 21:19:09 +02005500 rcu_read_unlock();
5501
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005502 t = wait_event_timeout(connection->ping_wait,
5503 connection->cstate < C_WF_REPORT_PARAMS,
Philipp Reisnerb66623e2012-08-08 21:19:09 +02005504 t);
Philipp Reisner599377a2012-08-17 14:50:22 +02005505 if (t)
5506 break;
5507 }
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005508 drbd_err(connection, "meta connection shut down by peer.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005509 goto reconnect;
5510 } else if (rv == -EAGAIN) {
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02005511 /* If the data socket received something meanwhile,
5512 * that is good enough: peer is still alive. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005513 if (time_after(connection->last_received,
5514 jiffies - connection->meta.socket->sk->sk_rcvtimeo))
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02005515 continue;
Lars Ellenbergf36af182011-03-09 22:44:55 +01005516 if (ping_timeout_active) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005517 drbd_err(connection, "PingAck did not arrive in time.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005518 goto reconnect;
5519 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005520 set_bit(SEND_PING, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005521 continue;
5522 } else if (rv == -EINTR) {
5523 continue;
5524 } else {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005525 drbd_err(connection, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005526 goto reconnect;
5527 }
5528
5529 if (received == expect && cmd == NULL) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005530 if (decode_header(connection, connection->meta.rbuf, &pi))
Philipp Reisnerb411b362009-09-25 16:07:19 -07005531 goto reconnect;
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01005532 cmd = &asender_tbl[pi.cmd];
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005533 if (pi.cmd >= ARRAY_SIZE(asender_tbl) || !cmd->fn) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005534 drbd_err(connection, "Unexpected meta packet %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02005535 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005536 goto disconnect;
5537 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005538 expect = header_size + cmd->pkt_size;
Andreas Gruenbacher52b061a2011-03-30 11:38:49 +02005539 if (pi.size != expect - header_size) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005540 drbd_err(connection, "Wrong packet size on meta (c: %d, l: %d)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01005541 pi.cmd, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005542 goto reconnect;
Philipp Reisner257d0af2011-01-26 12:15:29 +01005543 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005544 }
5545 if (received == expect) {
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005546 bool err;
Philipp Reisnera4fbda82011-03-16 11:13:17 +01005547
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005548 err = cmd->fn(connection, &pi);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005549 if (err) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005550 drbd_err(connection, "%pf failed\n", cmd->fn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005551 goto reconnect;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005552 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005553
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005554 connection->last_received = jiffies;
Lars Ellenbergf36af182011-03-09 22:44:55 +01005555
Philipp Reisner44ed1672011-04-19 17:10:19 +02005556 if (cmd == &asender_tbl[P_PING_ACK]) {
5557 /* restore idle timeout */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005558 connection->meta.socket->sk->sk_rcvtimeo = ping_int * HZ;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005559 ping_timeout_active = false;
5560 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005561
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005562 buf = connection->meta.rbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005563 received = 0;
Andreas Gruenbacher52b061a2011-03-30 11:38:49 +02005564 expect = header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005565 cmd = NULL;
5566 }
5567 }
5568
5569 if (0) {
5570reconnect:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005571 conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
5572 conn_md_sync(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005573 }
5574 if (0) {
5575disconnect:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005576 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005577 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005578 clear_bit(SIGNAL_ASENDER, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005579
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005580 drbd_info(connection, "asender terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005581
5582 return 0;
5583}