blob: 2f67dc03d403e9dbdfacea556968038eeeb3dfcd [file] [log] [blame]
Philipp Reisnerb411b362009-09-25 16:07:19 -07001/*
2 drbd_receiver.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23 */
24
25
Philipp Reisnerb411b362009-09-25 16:07:19 -070026#include <linux/module.h>
27
28#include <asm/uaccess.h>
29#include <net/sock.h>
30
Philipp Reisnerb411b362009-09-25 16:07:19 -070031#include <linux/drbd.h>
32#include <linux/fs.h>
33#include <linux/file.h>
34#include <linux/in.h>
35#include <linux/mm.h>
36#include <linux/memcontrol.h>
37#include <linux/mm_inline.h>
38#include <linux/slab.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070039#include <linux/pkt_sched.h>
40#define __KERNEL_SYSCALLS__
41#include <linux/unistd.h>
42#include <linux/vmalloc.h>
43#include <linux/random.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070044#include <linux/string.h>
45#include <linux/scatterlist.h>
46#include "drbd_int.h"
Andreas Gruenbachera3603a62011-05-30 11:47:37 +020047#include "drbd_protocol.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070048#include "drbd_req.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070049#include "drbd_vli.h"
50
Lars Ellenberg20c68fd2014-04-28 18:43:25 +020051#define PRO_FEATURES (FF_TRIM)
52
Philipp Reisner77351055b2011-02-07 17:24:26 +010053struct packet_info {
54 enum drbd_packet cmd;
Andreas Gruenbachere2857212011-03-25 00:57:38 +010055 unsigned int size;
56 unsigned int vnr;
Andreas Gruenbachere6589832011-03-30 12:54:42 +020057 void *data;
Philipp Reisner77351055b2011-02-07 17:24:26 +010058};
59
Philipp Reisnerb411b362009-09-25 16:07:19 -070060enum finish_epoch {
61 FE_STILL_LIVE,
62 FE_DESTROYED,
63 FE_RECYCLED,
64};
65
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +020066static int drbd_do_features(struct drbd_connection *connection);
67static int drbd_do_auth(struct drbd_connection *connection);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +020068static int drbd_disconnected(struct drbd_peer_device *);
Lars Ellenberga0fb3c42014-04-28 18:43:23 +020069static void conn_wait_active_ee_empty(struct drbd_connection *connection);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +020070static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *, struct drbd_epoch *, enum epoch_event);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +010071static int e_end_block(struct drbd_work *, int);
Philipp Reisnerb411b362009-09-25 16:07:19 -070072
Philipp Reisnerb411b362009-09-25 16:07:19 -070073
74#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
75
Lars Ellenberg45bb9122010-05-14 17:10:48 +020076/*
77 * some helper functions to deal with single linked page lists,
78 * page->private being our "next" pointer.
79 */
80
81/* If at least n pages are linked at head, get n pages off.
82 * Otherwise, don't modify head, and return NULL.
83 * Locking is the responsibility of the caller.
84 */
85static struct page *page_chain_del(struct page **head, int n)
86{
87 struct page *page;
88 struct page *tmp;
89
90 BUG_ON(!n);
91 BUG_ON(!head);
92
93 page = *head;
Philipp Reisner23ce4222010-05-20 13:35:31 +020094
95 if (!page)
96 return NULL;
97
Lars Ellenberg45bb9122010-05-14 17:10:48 +020098 while (page) {
99 tmp = page_chain_next(page);
100 if (--n == 0)
101 break; /* found sufficient pages */
102 if (tmp == NULL)
103 /* insufficient pages, don't use any of them. */
104 return NULL;
105 page = tmp;
106 }
107
108 /* add end of list marker for the returned list */
109 set_page_private(page, 0);
110 /* actual return value, and adjustment of head */
111 page = *head;
112 *head = tmp;
113 return page;
114}
115
116/* may be used outside of locks to find the tail of a (usually short)
117 * "private" page chain, before adding it back to a global chain head
118 * with page_chain_add() under a spinlock. */
119static struct page *page_chain_tail(struct page *page, int *len)
120{
121 struct page *tmp;
122 int i = 1;
123 while ((tmp = page_chain_next(page)))
124 ++i, page = tmp;
125 if (len)
126 *len = i;
127 return page;
128}
129
130static int page_chain_free(struct page *page)
131{
132 struct page *tmp;
133 int i = 0;
134 page_chain_for_each_safe(page, tmp) {
135 put_page(page);
136 ++i;
137 }
138 return i;
139}
140
141static void page_chain_add(struct page **head,
142 struct page *chain_first, struct page *chain_last)
143{
144#if 1
145 struct page *tmp;
146 tmp = page_chain_tail(chain_first, NULL);
147 BUG_ON(tmp != chain_last);
148#endif
149
150 /* add chain to head */
151 set_page_private(chain_last, (unsigned long)*head);
152 *head = chain_first;
153}
154
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200155static struct page *__drbd_alloc_pages(struct drbd_device *device,
Andreas Gruenbacher18c2d522011-04-07 21:08:50 +0200156 unsigned int number)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700157{
158 struct page *page = NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200159 struct page *tmp = NULL;
Andreas Gruenbacher18c2d522011-04-07 21:08:50 +0200160 unsigned int i = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700161
162 /* Yes, testing drbd_pp_vacant outside the lock is racy.
163 * So what. It saves a spin_lock. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200164 if (drbd_pp_vacant >= number) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700165 spin_lock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200166 page = page_chain_del(&drbd_pp_pool, number);
167 if (page)
168 drbd_pp_vacant -= number;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700169 spin_unlock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200170 if (page)
171 return page;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700172 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200173
Philipp Reisnerb411b362009-09-25 16:07:19 -0700174 /* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD
175 * "criss-cross" setup, that might cause write-out on some other DRBD,
176 * which in turn might block on the other node at this very place. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200177 for (i = 0; i < number; i++) {
178 tmp = alloc_page(GFP_TRY);
179 if (!tmp)
180 break;
181 set_page_private(tmp, (unsigned long)page);
182 page = tmp;
183 }
184
185 if (i == number)
186 return page;
187
188 /* Not enough pages immediately available this time.
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200189 * No need to jump around here, drbd_alloc_pages will retry this
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200190 * function "soon". */
191 if (page) {
192 tmp = page_chain_tail(page, NULL);
193 spin_lock(&drbd_pp_lock);
194 page_chain_add(&drbd_pp_pool, page, tmp);
195 drbd_pp_vacant += i;
196 spin_unlock(&drbd_pp_lock);
197 }
198 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700199}
200
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200201static void reclaim_finished_net_peer_reqs(struct drbd_device *device,
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200202 struct list_head *to_be_freed)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700203{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200204 struct drbd_peer_request *peer_req, *tmp;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700205
206 /* The EEs are always appended to the end of the list. Since
207 they are sent in order over the wire, they have to finish
208 in order. As soon as we see the first not finished we can
209 stop to examine the list... */
210
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200211 list_for_each_entry_safe(peer_req, tmp, &device->net_ee, w.list) {
Andreas Gruenbacher045417f2011-04-07 21:34:24 +0200212 if (drbd_peer_req_has_active_page(peer_req))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700213 break;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200214 list_move(&peer_req->w.list, to_be_freed);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700215 }
216}
217
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200218static void drbd_kick_lo_and_reclaim_net(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700219{
220 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100221 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700222
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200223 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200224 reclaim_finished_net_peer_reqs(device, &reclaimed);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200225 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700226
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200227 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200228 drbd_free_net_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700229}
230
231/**
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200232 * drbd_alloc_pages() - Returns @number pages, retries forever (or until signalled)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200233 * @device: DRBD device.
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200234 * @number: number of pages requested
235 * @retry: whether to retry, if not enough pages are available right now
Philipp Reisnerb411b362009-09-25 16:07:19 -0700236 *
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200237 * Tries to allocate number pages, first from our own page pool, then from
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200238 * the kernel.
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200239 * Possibly retry until DRBD frees sufficient pages somewhere else.
240 *
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200241 * If this allocation would exceed the max_buffers setting, we throttle
242 * allocation (schedule_timeout) to give the system some room to breathe.
243 *
244 * We do not use max-buffers as hard limit, because it could lead to
245 * congestion and further to a distributed deadlock during online-verify or
246 * (checksum based) resync, if the max-buffers, socket buffer sizes and
247 * resync-rate settings are mis-configured.
248 *
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200249 * Returns a page chain linked via page->private.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700250 */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200251struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int number,
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200252 bool retry)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700253{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200254 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700255 struct page *page = NULL;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200256 struct net_conf *nc;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700257 DEFINE_WAIT(wait);
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200258 unsigned int mxb;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700259
Philipp Reisner44ed1672011-04-19 17:10:19 +0200260 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200261 nc = rcu_dereference(peer_device->connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200262 mxb = nc ? nc->max_buffers : 1000000;
263 rcu_read_unlock();
264
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200265 if (atomic_read(&device->pp_in_use) < mxb)
266 page = __drbd_alloc_pages(device, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700267
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200268 while (page == NULL) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700269 prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
270
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200271 drbd_kick_lo_and_reclaim_net(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700272
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200273 if (atomic_read(&device->pp_in_use) < mxb) {
274 page = __drbd_alloc_pages(device, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700275 if (page)
276 break;
277 }
278
279 if (!retry)
280 break;
281
282 if (signal_pending(current)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200283 drbd_warn(device, "drbd_alloc_pages interrupted!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700284 break;
285 }
286
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200287 if (schedule_timeout(HZ/10) == 0)
288 mxb = UINT_MAX;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700289 }
290 finish_wait(&drbd_pp_wait, &wait);
291
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200292 if (page)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200293 atomic_add(number, &device->pp_in_use);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700294 return page;
295}
296
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200297/* Must not be used from irq, as that may deadlock: see drbd_alloc_pages.
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200298 * Is also used from inside an other spin_lock_irq(&resource->req_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200299 * Either links the page chain back to the global pool,
300 * or returns all pages to the system. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200301static void drbd_free_pages(struct drbd_device *device, struct page *page, int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700302{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200303 atomic_t *a = is_net ? &device->pp_in_use_by_net : &device->pp_in_use;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700304 int i;
Lars Ellenberg435f0742010-09-06 12:30:25 +0200305
Lars Ellenberga73ff322012-06-25 19:15:38 +0200306 if (page == NULL)
307 return;
308
Philipp Reisner81a5d602011-02-22 19:53:16 -0500309 if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200310 i = page_chain_free(page);
311 else {
312 struct page *tmp;
313 tmp = page_chain_tail(page, &i);
314 spin_lock(&drbd_pp_lock);
315 page_chain_add(&drbd_pp_pool, page, tmp);
316 drbd_pp_vacant += i;
317 spin_unlock(&drbd_pp_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700318 }
Lars Ellenberg435f0742010-09-06 12:30:25 +0200319 i = atomic_sub_return(i, a);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200320 if (i < 0)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200321 drbd_warn(device, "ASSERTION FAILED: %s: %d < 0\n",
Lars Ellenberg435f0742010-09-06 12:30:25 +0200322 is_net ? "pp_in_use_by_net" : "pp_in_use", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700323 wake_up(&drbd_pp_wait);
324}
325
326/*
327You need to hold the req_lock:
328 _drbd_wait_ee_list_empty()
329
330You must not have the req_lock:
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +0200331 drbd_free_peer_req()
Andreas Gruenbacher0db55362011-04-06 16:09:15 +0200332 drbd_alloc_peer_req()
Andreas Gruenbacher7721f562011-04-06 17:14:02 +0200333 drbd_free_peer_reqs()
Philipp Reisnerb411b362009-09-25 16:07:19 -0700334 drbd_ee_fix_bhs()
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200335 drbd_finish_peer_reqs()
Philipp Reisnerb411b362009-09-25 16:07:19 -0700336 drbd_clear_done_ee()
337 drbd_wait_ee_list_empty()
338*/
339
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100340struct drbd_peer_request *
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200341drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
Lars Ellenberga0fb3c42014-04-28 18:43:23 +0200342 unsigned int data_size, bool has_payload, gfp_t gfp_mask) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700343{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200344 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100345 struct drbd_peer_request *peer_req;
Lars Ellenberga73ff322012-06-25 19:15:38 +0200346 struct page *page = NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200347 unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700348
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200349 if (drbd_insert_fault(device, DRBD_FAULT_AL_EE))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700350 return NULL;
351
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100352 peer_req = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
353 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700354 if (!(gfp_mask & __GFP_NOWARN))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200355 drbd_err(device, "%s: allocation failed\n", __func__);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700356 return NULL;
357 }
358
Lars Ellenberga0fb3c42014-04-28 18:43:23 +0200359 if (has_payload && data_size) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200360 page = drbd_alloc_pages(peer_device, nr_pages, (gfp_mask & __GFP_WAIT));
Lars Ellenberga73ff322012-06-25 19:15:38 +0200361 if (!page)
362 goto fail;
363 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700364
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100365 drbd_clear_interval(&peer_req->i);
366 peer_req->i.size = data_size;
367 peer_req->i.sector = sector;
368 peer_req->i.local = false;
369 peer_req->i.waiting = false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700370
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100371 peer_req->epoch = NULL;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200372 peer_req->peer_device = peer_device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100373 peer_req->pages = page;
374 atomic_set(&peer_req->pending_bios, 0);
375 peer_req->flags = 0;
Andreas Gruenbacher9a8e7752011-01-11 14:04:09 +0100376 /*
377 * The block_id is opaque to the receiver. It is not endianness
378 * converted, and sent back to the sender unchanged.
379 */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100380 peer_req->block_id = id;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700381
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100382 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700383
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200384 fail:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100385 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700386 return NULL;
387}
388
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200389void __drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request *peer_req,
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100390 int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700391{
Lars Ellenberg21ae5d72014-05-05 23:42:24 +0200392 might_sleep();
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100393 if (peer_req->flags & EE_HAS_DIGEST)
394 kfree(peer_req->digest);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200395 drbd_free_pages(device, peer_req->pages, is_net);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +0200396 D_ASSERT(device, atomic_read(&peer_req->pending_bios) == 0);
397 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
Lars Ellenberg21ae5d72014-05-05 23:42:24 +0200398 if (!expect(!(peer_req->flags & EE_CALL_AL_COMPLETE_IO))) {
399 peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO;
400 drbd_al_complete_io(device, &peer_req->i);
401 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100402 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700403}
404
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200405int drbd_free_peer_reqs(struct drbd_device *device, struct list_head *list)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700406{
407 LIST_HEAD(work_list);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100408 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700409 int count = 0;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200410 int is_net = list == &device->net_ee;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700411
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200412 spin_lock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700413 list_splice_init(list, &work_list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200414 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700415
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200416 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200417 __drbd_free_peer_req(device, peer_req, is_net);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700418 count++;
419 }
420 return count;
421}
422
Philipp Reisnerb411b362009-09-25 16:07:19 -0700423/*
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200424 * See also comments in _req_mod(,BARRIER_ACKED) and receive_Barrier.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700425 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200426static int drbd_finish_peer_reqs(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700427{
428 LIST_HEAD(work_list);
429 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100430 struct drbd_peer_request *peer_req, *t;
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100431 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700432
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200433 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200434 reclaim_finished_net_peer_reqs(device, &reclaimed);
435 list_splice_init(&device->done_ee, &work_list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200436 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700437
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200438 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200439 drbd_free_net_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700440
441 /* possible callbacks here:
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +0200442 * e_end_block, and e_end_resync_block, e_send_superseded.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700443 * all ignore the last argument.
444 */
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200445 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100446 int err2;
447
Philipp Reisnerb411b362009-09-25 16:07:19 -0700448 /* list_del not necessary, next/prev members not touched */
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200449 err2 = peer_req->w.cb(&peer_req->w, !!err);
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100450 if (!err)
451 err = err2;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200452 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700453 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200454 wake_up(&device->ee_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700455
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100456 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700457}
458
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200459static void _drbd_wait_ee_list_empty(struct drbd_device *device,
Andreas Gruenbacherd4da1532011-04-07 00:06:56 +0200460 struct list_head *head)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700461{
462 DEFINE_WAIT(wait);
463
464 /* avoids spin_lock/unlock
465 * and calling prepare_to_wait in the fast path */
466 while (!list_empty(head)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200467 prepare_to_wait(&device->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200468 spin_unlock_irq(&device->resource->req_lock);
Jens Axboe7eaceac2011-03-10 08:52:07 +0100469 io_schedule();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200470 finish_wait(&device->ee_wait, &wait);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200471 spin_lock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700472 }
473}
474
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200475static void drbd_wait_ee_list_empty(struct drbd_device *device,
Andreas Gruenbacherd4da1532011-04-07 00:06:56 +0200476 struct list_head *head)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700477{
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200478 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200479 _drbd_wait_ee_list_empty(device, head);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200480 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700481}
482
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100483static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700484{
Philipp Reisnerb411b362009-09-25 16:07:19 -0700485 struct kvec iov = {
486 .iov_base = buf,
487 .iov_len = size,
488 };
489 struct msghdr msg = {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700490 .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
491 };
Al Virof730c842014-02-08 21:07:38 -0500492 return kernel_recvmsg(sock, &msg, &iov, 1, size, msg.msg_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700493}
494
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200495static int drbd_recv(struct drbd_connection *connection, void *buf, size_t size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700496{
Philipp Reisnerb411b362009-09-25 16:07:19 -0700497 int rv;
498
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200499 rv = drbd_recv_short(connection->data.socket, buf, size, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700500
Philipp Reisnerdbd08202012-08-17 16:55:47 +0200501 if (rv < 0) {
502 if (rv == -ECONNRESET)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200503 drbd_info(connection, "sock was reset by peer\n");
Philipp Reisnerdbd08202012-08-17 16:55:47 +0200504 else if (rv != -ERESTARTSYS)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200505 drbd_err(connection, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerdbd08202012-08-17 16:55:47 +0200506 } else if (rv == 0) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200507 if (test_bit(DISCONNECT_SENT, &connection->flags)) {
Philipp Reisnerb66623e2012-08-08 21:19:09 +0200508 long t;
509 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200510 t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
Philipp Reisnerb66623e2012-08-08 21:19:09 +0200511 rcu_read_unlock();
512
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200513 t = wait_event_timeout(connection->ping_wait, connection->cstate < C_WF_REPORT_PARAMS, t);
Philipp Reisnerb66623e2012-08-08 21:19:09 +0200514
Philipp Reisner599377a2012-08-17 14:50:22 +0200515 if (t)
516 goto out;
517 }
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200518 drbd_info(connection, "sock was shut down by peer\n");
Philipp Reisner599377a2012-08-17 14:50:22 +0200519 }
520
Philipp Reisnerb411b362009-09-25 16:07:19 -0700521 if (rv != size)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200522 conn_request_state(connection, NS(conn, C_BROKEN_PIPE), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700523
Philipp Reisner599377a2012-08-17 14:50:22 +0200524out:
Philipp Reisnerb411b362009-09-25 16:07:19 -0700525 return rv;
526}
527
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200528static int drbd_recv_all(struct drbd_connection *connection, void *buf, size_t size)
Andreas Gruenbacherc6967742011-03-17 17:15:20 +0100529{
530 int err;
531
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200532 err = drbd_recv(connection, buf, size);
Andreas Gruenbacherc6967742011-03-17 17:15:20 +0100533 if (err != size) {
534 if (err >= 0)
535 err = -EIO;
536 } else
537 err = 0;
538 return err;
539}
540
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200541static int drbd_recv_all_warn(struct drbd_connection *connection, void *buf, size_t size)
Andreas Gruenbachera5c31902011-03-24 03:28:04 +0100542{
543 int err;
544
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200545 err = drbd_recv_all(connection, buf, size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +0100546 if (err && !signal_pending(current))
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200547 drbd_warn(connection, "short read (expected size %d)\n", (int)size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +0100548 return err;
549}
550
Lars Ellenberg5dbf1672010-05-25 16:18:01 +0200551/* quoting tcp(7):
552 * On individual connections, the socket buffer size must be set prior to the
553 * listen(2) or connect(2) calls in order to have it take effect.
554 * This is our wrapper to do so.
555 */
556static void drbd_setbufsize(struct socket *sock, unsigned int snd,
557 unsigned int rcv)
558{
559 /* open coded SO_SNDBUF, SO_RCVBUF */
560 if (snd) {
561 sock->sk->sk_sndbuf = snd;
562 sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
563 }
564 if (rcv) {
565 sock->sk->sk_rcvbuf = rcv;
566 sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
567 }
568}
569
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200570static struct socket *drbd_try_connect(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700571{
572 const char *what;
573 struct socket *sock;
574 struct sockaddr_in6 src_in6;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200575 struct sockaddr_in6 peer_in6;
576 struct net_conf *nc;
577 int err, peer_addr_len, my_addr_len;
Andreas Gruenbacher69ef82d2011-05-11 14:34:35 +0200578 int sndbuf_size, rcvbuf_size, connect_int;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700579 int disconnect_on_error = 1;
580
Philipp Reisner44ed1672011-04-19 17:10:19 +0200581 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200582 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200583 if (!nc) {
584 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700585 return NULL;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200586 }
Philipp Reisner44ed1672011-04-19 17:10:19 +0200587 sndbuf_size = nc->sndbuf_size;
588 rcvbuf_size = nc->rcvbuf_size;
Andreas Gruenbacher69ef82d2011-05-11 14:34:35 +0200589 connect_int = nc->connect_int;
Andreas Gruenbacher089c0752011-06-14 18:28:09 +0200590 rcu_read_unlock();
Philipp Reisner44ed1672011-04-19 17:10:19 +0200591
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200592 my_addr_len = min_t(int, connection->my_addr_len, sizeof(src_in6));
593 memcpy(&src_in6, &connection->my_addr, my_addr_len);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200594
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200595 if (((struct sockaddr *)&connection->my_addr)->sa_family == AF_INET6)
Philipp Reisner44ed1672011-04-19 17:10:19 +0200596 src_in6.sin6_port = 0;
597 else
598 ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
599
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200600 peer_addr_len = min_t(int, connection->peer_addr_len, sizeof(src_in6));
601 memcpy(&peer_in6, &connection->peer_addr, peer_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700602
603 what = "sock_create_kern";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200604 err = sock_create_kern(((struct sockaddr *)&src_in6)->sa_family,
605 SOCK_STREAM, IPPROTO_TCP, &sock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700606 if (err < 0) {
607 sock = NULL;
608 goto out;
609 }
610
611 sock->sk->sk_rcvtimeo =
Andreas Gruenbacher69ef82d2011-05-11 14:34:35 +0200612 sock->sk->sk_sndtimeo = connect_int * HZ;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200613 drbd_setbufsize(sock, sndbuf_size, rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700614
615 /* explicitly bind to the configured IP as source IP
616 * for the outgoing connections.
617 * This is needed for multihomed hosts and to be
618 * able to use lo: interfaces for drbd.
619 * Make sure to use 0 as port number, so linux selects
620 * a free one dynamically.
621 */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700622 what = "bind before connect";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200623 err = sock->ops->bind(sock, (struct sockaddr *) &src_in6, my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700624 if (err < 0)
625 goto out;
626
627 /* connect may fail, peer not yet available.
628 * stay C_WF_CONNECTION, don't go Disconnecting! */
629 disconnect_on_error = 0;
630 what = "connect";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200631 err = sock->ops->connect(sock, (struct sockaddr *) &peer_in6, peer_addr_len, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700632
633out:
634 if (err < 0) {
635 if (sock) {
636 sock_release(sock);
637 sock = NULL;
638 }
639 switch (-err) {
640 /* timeout, busy, signal pending */
641 case ETIMEDOUT: case EAGAIN: case EINPROGRESS:
642 case EINTR: case ERESTARTSYS:
643 /* peer not (yet) available, network problem */
644 case ECONNREFUSED: case ENETUNREACH:
645 case EHOSTDOWN: case EHOSTUNREACH:
646 disconnect_on_error = 0;
647 break;
648 default:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200649 drbd_err(connection, "%s failed, err = %d\n", what, err);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700650 }
651 if (disconnect_on_error)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200652 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700653 }
Philipp Reisner44ed1672011-04-19 17:10:19 +0200654
Philipp Reisnerb411b362009-09-25 16:07:19 -0700655 return sock;
656}
657
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200658struct accept_wait_data {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200659 struct drbd_connection *connection;
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200660 struct socket *s_listen;
661 struct completion door_bell;
662 void (*original_sk_state_change)(struct sock *sk);
663
664};
665
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200666static void drbd_incoming_connection(struct sock *sk)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700667{
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200668 struct accept_wait_data *ad = sk->sk_user_data;
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200669 void (*state_change)(struct sock *sk);
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200670
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200671 state_change = ad->original_sk_state_change;
672 if (sk->sk_state == TCP_ESTABLISHED)
673 complete(&ad->door_bell);
674 state_change(sk);
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200675}
676
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200677static int prepare_listen_socket(struct drbd_connection *connection, struct accept_wait_data *ad)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700678{
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200679 int err, sndbuf_size, rcvbuf_size, my_addr_len;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200680 struct sockaddr_in6 my_addr;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200681 struct socket *s_listen;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200682 struct net_conf *nc;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700683 const char *what;
684
Philipp Reisner44ed1672011-04-19 17:10:19 +0200685 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200686 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200687 if (!nc) {
688 rcu_read_unlock();
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200689 return -EIO;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200690 }
Philipp Reisner44ed1672011-04-19 17:10:19 +0200691 sndbuf_size = nc->sndbuf_size;
692 rcvbuf_size = nc->rcvbuf_size;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200693 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700694
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200695 my_addr_len = min_t(int, connection->my_addr_len, sizeof(struct sockaddr_in6));
696 memcpy(&my_addr, &connection->my_addr, my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700697
698 what = "sock_create_kern";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200699 err = sock_create_kern(((struct sockaddr *)&my_addr)->sa_family,
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200700 SOCK_STREAM, IPPROTO_TCP, &s_listen);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700701 if (err) {
702 s_listen = NULL;
703 goto out;
704 }
705
Philipp Reisner98683652012-11-09 14:18:43 +0100706 s_listen->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
Philipp Reisner44ed1672011-04-19 17:10:19 +0200707 drbd_setbufsize(s_listen, sndbuf_size, rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700708
709 what = "bind before listen";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200710 err = s_listen->ops->bind(s_listen, (struct sockaddr *)&my_addr, my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700711 if (err < 0)
712 goto out;
713
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200714 ad->s_listen = s_listen;
715 write_lock_bh(&s_listen->sk->sk_callback_lock);
716 ad->original_sk_state_change = s_listen->sk->sk_state_change;
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200717 s_listen->sk->sk_state_change = drbd_incoming_connection;
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200718 s_listen->sk->sk_user_data = ad;
719 write_unlock_bh(&s_listen->sk->sk_callback_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700720
Philipp Reisner2820fd32012-07-12 10:22:48 +0200721 what = "listen";
722 err = s_listen->ops->listen(s_listen, 5);
723 if (err < 0)
724 goto out;
725
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200726 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700727out:
728 if (s_listen)
729 sock_release(s_listen);
730 if (err < 0) {
731 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200732 drbd_err(connection, "%s failed, err = %d\n", what, err);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200733 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700734 }
735 }
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200736
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200737 return -EIO;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200738}
739
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200740static void unregister_state_change(struct sock *sk, struct accept_wait_data *ad)
741{
742 write_lock_bh(&sk->sk_callback_lock);
743 sk->sk_state_change = ad->original_sk_state_change;
744 sk->sk_user_data = NULL;
745 write_unlock_bh(&sk->sk_callback_lock);
746}
747
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200748static struct socket *drbd_wait_for_connect(struct drbd_connection *connection, struct accept_wait_data *ad)
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200749{
750 int timeo, connect_int, err = 0;
751 struct socket *s_estab = NULL;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200752 struct net_conf *nc;
753
754 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200755 nc = rcu_dereference(connection->net_conf);
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200756 if (!nc) {
757 rcu_read_unlock();
758 return NULL;
759 }
760 connect_int = nc->connect_int;
761 rcu_read_unlock();
762
763 timeo = connect_int * HZ;
Akinobu Mita38b682b22013-04-29 16:21:31 -0700764 /* 28.5% random jitter */
765 timeo += (prandom_u32() & 1) ? timeo / 7 : -timeo / 7;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200766
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200767 err = wait_for_completion_interruptible_timeout(&ad->door_bell, timeo);
768 if (err <= 0)
769 return NULL;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200770
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200771 err = kernel_accept(ad->s_listen, &s_estab, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700772 if (err < 0) {
773 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200774 drbd_err(connection, "accept failed, err = %d\n", err);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200775 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700776 }
777 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700778
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200779 if (s_estab)
780 unregister_state_change(s_estab->sk, ad);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700781
782 return s_estab;
783}
784
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200785static int decode_header(struct drbd_connection *, void *, struct packet_info *);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700786
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200787static int send_first_packet(struct drbd_connection *connection, struct drbd_socket *sock,
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200788 enum drbd_packet cmd)
789{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200790 if (!conn_prepare_command(connection, sock))
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200791 return -EIO;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200792 return conn_send_command(connection, sock, cmd, 0, NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700793}
794
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200795static int receive_first_packet(struct drbd_connection *connection, struct socket *sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700796{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200797 unsigned int header_size = drbd_header_size(connection);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200798 struct packet_info pi;
Philipp Reisner4920e372014-03-18 14:40:13 +0100799 struct net_conf *nc;
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200800 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700801
Philipp Reisner4920e372014-03-18 14:40:13 +0100802 rcu_read_lock();
803 nc = rcu_dereference(connection->net_conf);
804 if (!nc) {
805 rcu_read_unlock();
806 return -EIO;
807 }
808 sock->sk->sk_rcvtimeo = nc->ping_timeo * 4 * HZ / 10;
809 rcu_read_unlock();
810
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200811 err = drbd_recv_short(sock, connection->data.rbuf, header_size, 0);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200812 if (err != header_size) {
813 if (err >= 0)
814 err = -EIO;
815 return err;
816 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200817 err = decode_header(connection, connection->data.rbuf, &pi);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200818 if (err)
819 return err;
820 return pi.cmd;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700821}
822
823/**
824 * drbd_socket_okay() - Free the socket if its connection is not okay
Philipp Reisnerb411b362009-09-25 16:07:19 -0700825 * @sock: pointer to the pointer to the socket.
826 */
Philipp Reisner5d0b17f2014-03-18 14:24:35 +0100827static bool drbd_socket_okay(struct socket **sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700828{
829 int rr;
830 char tb[4];
831
832 if (!*sock)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100833 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700834
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100835 rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700836
837 if (rr > 0 || rr == -EAGAIN) {
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100838 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700839 } else {
840 sock_release(*sock);
841 *sock = NULL;
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100842 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700843 }
844}
Philipp Reisner5d0b17f2014-03-18 14:24:35 +0100845
846static bool connection_established(struct drbd_connection *connection,
847 struct socket **sock1,
848 struct socket **sock2)
849{
850 struct net_conf *nc;
851 int timeout;
852 bool ok;
853
854 if (!*sock1 || !*sock2)
855 return false;
856
857 rcu_read_lock();
858 nc = rcu_dereference(connection->net_conf);
859 timeout = (nc->sock_check_timeo ?: nc->ping_timeo) * HZ / 10;
860 rcu_read_unlock();
861 schedule_timeout_interruptible(timeout);
862
863 ok = drbd_socket_okay(sock1);
864 ok = drbd_socket_okay(sock2) && ok;
865
866 return ok;
867}
868
Philipp Reisner2325eb62011-03-15 16:56:18 +0100869/* Gets called if a connection is established, or if a new minor gets created
870 in a connection */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200871int drbd_connected(struct drbd_peer_device *peer_device)
Philipp Reisner907599e2011-02-08 11:25:37 +0100872{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200873 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100874 int err;
Philipp Reisner907599e2011-02-08 11:25:37 +0100875
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200876 atomic_set(&device->packet_seq, 0);
877 device->peer_seq = 0;
Philipp Reisner907599e2011-02-08 11:25:37 +0100878
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200879 device->state_mutex = peer_device->connection->agreed_pro_version < 100 ?
880 &peer_device->connection->cstate_mutex :
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200881 &device->own_state_mutex;
Philipp Reisner8410da82011-02-11 20:11:10 +0100882
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200883 err = drbd_send_sync_param(peer_device);
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100884 if (!err)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200885 err = drbd_send_sizes(peer_device, 0, 0);
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100886 if (!err)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200887 err = drbd_send_uuids(peer_device);
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100888 if (!err)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200889 err = drbd_send_current_state(peer_device);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200890 clear_bit(USE_DEGR_WFC_T, &device->flags);
891 clear_bit(RESIZE_PENDING, &device->flags);
892 atomic_set(&device->ap_in_flight, 0);
893 mod_timer(&device->request_timer, jiffies + HZ); /* just start it here. */
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100894 return err;
Philipp Reisner907599e2011-02-08 11:25:37 +0100895}
Philipp Reisnerb411b362009-09-25 16:07:19 -0700896
897/*
898 * return values:
899 * 1 yes, we have a valid connection
900 * 0 oops, did not work out, please try again
901 * -1 peer talks different language,
902 * no point in trying again, please go standalone.
903 * -2 We do not have a network config...
904 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200905static int conn_connect(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700906{
Philipp Reisner7da35862011-12-19 22:42:56 +0100907 struct drbd_socket sock, msock;
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +0200908 struct drbd_peer_device *peer_device;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200909 struct net_conf *nc;
Philipp Reisner5d0b17f2014-03-18 14:24:35 +0100910 int vnr, timeout, h;
911 bool discard_my_data, ok;
Philipp Reisner197296f2012-03-26 16:47:11 +0200912 enum drbd_state_rv rv;
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200913 struct accept_wait_data ad = {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200914 .connection = connection,
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200915 .door_bell = COMPLETION_INITIALIZER_ONSTACK(ad.door_bell),
916 };
Philipp Reisnerb411b362009-09-25 16:07:19 -0700917
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200918 clear_bit(DISCONNECT_SENT, &connection->flags);
919 if (conn_request_state(connection, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700920 return -2;
921
Philipp Reisner7da35862011-12-19 22:42:56 +0100922 mutex_init(&sock.mutex);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200923 sock.sbuf = connection->data.sbuf;
924 sock.rbuf = connection->data.rbuf;
Philipp Reisner7da35862011-12-19 22:42:56 +0100925 sock.socket = NULL;
926 mutex_init(&msock.mutex);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200927 msock.sbuf = connection->meta.sbuf;
928 msock.rbuf = connection->meta.rbuf;
Philipp Reisner7da35862011-12-19 22:42:56 +0100929 msock.socket = NULL;
930
Andreas Gruenbacher0916e0e2011-03-21 14:10:15 +0100931 /* Assume that the peer only understands protocol 80 until we know better. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200932 connection->agreed_pro_version = 80;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700933
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200934 if (prepare_listen_socket(connection, &ad))
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200935 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700936
937 do {
Andreas Gruenbacher2bf89622011-03-28 16:33:12 +0200938 struct socket *s;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700939
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200940 s = drbd_try_connect(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700941 if (s) {
Philipp Reisner7da35862011-12-19 22:42:56 +0100942 if (!sock.socket) {
943 sock.socket = s;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200944 send_first_packet(connection, &sock, P_INITIAL_DATA);
Philipp Reisner7da35862011-12-19 22:42:56 +0100945 } else if (!msock.socket) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200946 clear_bit(RESOLVE_CONFLICTS, &connection->flags);
Philipp Reisner7da35862011-12-19 22:42:56 +0100947 msock.socket = s;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200948 send_first_packet(connection, &msock, P_INITIAL_META);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700949 } else {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200950 drbd_err(connection, "Logic error in conn_connect()\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700951 goto out_release_sockets;
952 }
953 }
954
Philipp Reisner5d0b17f2014-03-18 14:24:35 +0100955 if (connection_established(connection, &sock.socket, &msock.socket))
956 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700957
958retry:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200959 s = drbd_wait_for_connect(connection, &ad);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700960 if (s) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200961 int fp = receive_first_packet(connection, s);
Philipp Reisner7da35862011-12-19 22:42:56 +0100962 drbd_socket_okay(&sock.socket);
963 drbd_socket_okay(&msock.socket);
Philipp Reisner92f14952012-08-01 11:41:01 +0200964 switch (fp) {
Andreas Gruenbachere5d6f332011-03-28 16:44:40 +0200965 case P_INITIAL_DATA:
Philipp Reisner7da35862011-12-19 22:42:56 +0100966 if (sock.socket) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200967 drbd_warn(connection, "initial packet S crossed\n");
Philipp Reisner7da35862011-12-19 22:42:56 +0100968 sock_release(sock.socket);
Philipp Reisner80c6eed2012-08-01 14:53:39 +0200969 sock.socket = s;
970 goto randomize;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700971 }
Philipp Reisner7da35862011-12-19 22:42:56 +0100972 sock.socket = s;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700973 break;
Andreas Gruenbachere5d6f332011-03-28 16:44:40 +0200974 case P_INITIAL_META:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200975 set_bit(RESOLVE_CONFLICTS, &connection->flags);
Philipp Reisner7da35862011-12-19 22:42:56 +0100976 if (msock.socket) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200977 drbd_warn(connection, "initial packet M crossed\n");
Philipp Reisner7da35862011-12-19 22:42:56 +0100978 sock_release(msock.socket);
Philipp Reisner80c6eed2012-08-01 14:53:39 +0200979 msock.socket = s;
980 goto randomize;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700981 }
Philipp Reisner7da35862011-12-19 22:42:56 +0100982 msock.socket = s;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700983 break;
984 default:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200985 drbd_warn(connection, "Error receiving initial packet\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700986 sock_release(s);
Philipp Reisner80c6eed2012-08-01 14:53:39 +0200987randomize:
Akinobu Mita38b682b22013-04-29 16:21:31 -0700988 if (prandom_u32() & 1)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700989 goto retry;
990 }
991 }
992
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200993 if (connection->cstate <= C_DISCONNECTING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700994 goto out_release_sockets;
995 if (signal_pending(current)) {
996 flush_signals(current);
997 smp_rmb();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200998 if (get_t_state(&connection->receiver) == EXITING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700999 goto out_release_sockets;
1000 }
1001
Philipp Reisner5d0b17f2014-03-18 14:24:35 +01001002 ok = connection_established(connection, &sock.socket, &msock.socket);
Philipp Reisnerb666dbf2012-07-26 14:12:59 +02001003 } while (!ok);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001004
Philipp Reisner7a426fd2012-07-12 14:22:37 +02001005 if (ad.s_listen)
1006 sock_release(ad.s_listen);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001007
Philipp Reisner98683652012-11-09 14:18:43 +01001008 sock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
1009 msock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001010
Philipp Reisner7da35862011-12-19 22:42:56 +01001011 sock.socket->sk->sk_allocation = GFP_NOIO;
1012 msock.socket->sk->sk_allocation = GFP_NOIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001013
Philipp Reisner7da35862011-12-19 22:42:56 +01001014 sock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
1015 msock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001016
Philipp Reisnerb411b362009-09-25 16:07:19 -07001017 /* NOT YET ...
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001018 * sock.socket->sk->sk_sndtimeo = connection->net_conf->timeout*HZ/10;
Philipp Reisner7da35862011-12-19 22:42:56 +01001019 * sock.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
Andreas Gruenbacher60381782011-03-28 17:05:50 +02001020 * first set it to the P_CONNECTION_FEATURES timeout,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001021 * which we set to 4x the configured ping_timeout. */
Philipp Reisner44ed1672011-04-19 17:10:19 +02001022 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001023 nc = rcu_dereference(connection->net_conf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001024
Philipp Reisner7da35862011-12-19 22:42:56 +01001025 sock.socket->sk->sk_sndtimeo =
1026 sock.socket->sk->sk_rcvtimeo = nc->ping_timeo*4*HZ/10;
Philipp Reisner44ed1672011-04-19 17:10:19 +02001027
Philipp Reisner7da35862011-12-19 22:42:56 +01001028 msock.socket->sk->sk_rcvtimeo = nc->ping_int*HZ;
Philipp Reisner44ed1672011-04-19 17:10:19 +02001029 timeout = nc->timeout * HZ / 10;
Philipp Reisner08b165b2011-09-05 16:22:33 +02001030 discard_my_data = nc->discard_my_data;
Philipp Reisner44ed1672011-04-19 17:10:19 +02001031 rcu_read_unlock();
1032
Philipp Reisner7da35862011-12-19 22:42:56 +01001033 msock.socket->sk->sk_sndtimeo = timeout;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001034
1035 /* we don't want delays.
Lucas De Marchi25985ed2011-03-30 22:57:33 -03001036 * we use TCP_CORK where appropriate, though */
Philipp Reisner7da35862011-12-19 22:42:56 +01001037 drbd_tcp_nodelay(sock.socket);
1038 drbd_tcp_nodelay(msock.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001039
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001040 connection->data.socket = sock.socket;
1041 connection->meta.socket = msock.socket;
1042 connection->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001043
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001044 h = drbd_do_features(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001045 if (h <= 0)
1046 return h;
1047
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001048 if (connection->cram_hmac_tfm) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001049 /* drbd_request_state(device, NS(conn, WFAuth)); */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001050 switch (drbd_do_auth(connection)) {
Johannes Thomab10d96c2010-01-07 16:02:50 +01001051 case -1:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001052 drbd_err(connection, "Authentication of peer failed\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001053 return -1;
Johannes Thomab10d96c2010-01-07 16:02:50 +01001054 case 0:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001055 drbd_err(connection, "Authentication of peer failed, trying again.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01001056 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001057 }
1058 }
1059
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001060 connection->data.socket->sk->sk_sndtimeo = timeout;
1061 connection->data.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001062
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001063 if (drbd_send_protocol(connection) == -EOPNOTSUPP)
Philipp Reisner7e2455c2010-04-22 14:50:23 +02001064 return -1;
Philipp Reisner1e86ac42011-08-04 10:33:08 +02001065
Philipp Reisner31007742014-04-28 18:43:12 +02001066 /* Prevent a race between resync-handshake and
1067 * being promoted to Primary.
1068 *
1069 * Grab and release the state mutex, so we know that any current
1070 * drbd_set_role() is finished, and any incoming drbd_set_role
1071 * will see the STATE_SENT flag, and wait for it to be cleared.
1072 */
1073 idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
1074 mutex_lock(peer_device->device->state_mutex);
1075
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001076 set_bit(STATE_SENT, &connection->flags);
Philipp Reisner197296f2012-03-26 16:47:11 +02001077
Philipp Reisner31007742014-04-28 18:43:12 +02001078 idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
1079 mutex_unlock(peer_device->device->state_mutex);
1080
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02001081 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001082 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1083 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001084 kref_get(&device->kref);
Andreas Gruenbacher26ea8f92013-06-25 16:50:03 +02001085 rcu_read_unlock();
1086
Philipp Reisner08b165b2011-09-05 16:22:33 +02001087 if (discard_my_data)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001088 set_bit(DISCARD_MY_DATA, &device->flags);
Philipp Reisner08b165b2011-09-05 16:22:33 +02001089 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001090 clear_bit(DISCARD_MY_DATA, &device->flags);
Philipp Reisner08b165b2011-09-05 16:22:33 +02001091
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001092 drbd_connected(peer_device);
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001093 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02001094 rcu_read_lock();
1095 }
1096 rcu_read_unlock();
1097
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001098 rv = conn_request_state(connection, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE);
1099 if (rv < SS_SUCCESS || connection->cstate != C_WF_REPORT_PARAMS) {
1100 clear_bit(STATE_SENT, &connection->flags);
Philipp Reisner1e86ac42011-08-04 10:33:08 +02001101 return 0;
Philipp Reisnera1096a62012-04-06 12:07:34 +02001102 }
Philipp Reisner1e86ac42011-08-04 10:33:08 +02001103
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001104 drbd_thread_start(&connection->asender);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001105
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001106 mutex_lock(&connection->resource->conf_update);
Philipp Reisner08b165b2011-09-05 16:22:33 +02001107 /* The discard_my_data flag is a single-shot modifier to the next
1108 * connection attempt, the handshake of which is now well underway.
1109 * No need for rcu style copying of the whole struct
1110 * just to clear a single value. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001111 connection->net_conf->discard_my_data = 0;
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001112 mutex_unlock(&connection->resource->conf_update);
Philipp Reisner08b165b2011-09-05 16:22:33 +02001113
Philipp Reisnerd3fcb492011-04-13 14:46:05 -07001114 return h;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001115
1116out_release_sockets:
Philipp Reisner7a426fd2012-07-12 14:22:37 +02001117 if (ad.s_listen)
1118 sock_release(ad.s_listen);
Philipp Reisner7da35862011-12-19 22:42:56 +01001119 if (sock.socket)
1120 sock_release(sock.socket);
1121 if (msock.socket)
1122 sock_release(msock.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001123 return -1;
1124}
1125
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001126static int decode_header(struct drbd_connection *connection, void *header, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001127{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001128 unsigned int header_size = drbd_header_size(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001129
Andreas Gruenbacher0c8e36d2011-03-30 16:00:17 +02001130 if (header_size == sizeof(struct p_header100) &&
1131 *(__be32 *)header == cpu_to_be32(DRBD_MAGIC_100)) {
1132 struct p_header100 *h = header;
1133 if (h->pad != 0) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001134 drbd_err(connection, "Header padding is not zero\n");
Andreas Gruenbacher0c8e36d2011-03-30 16:00:17 +02001135 return -EINVAL;
1136 }
1137 pi->vnr = be16_to_cpu(h->volume);
1138 pi->cmd = be16_to_cpu(h->command);
1139 pi->size = be32_to_cpu(h->length);
1140 } else if (header_size == sizeof(struct p_header95) &&
1141 *(__be16 *)header == cpu_to_be16(DRBD_MAGIC_BIG)) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001142 struct p_header95 *h = header;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001143 pi->cmd = be16_to_cpu(h->command);
Andreas Gruenbacherb55d84b2011-03-22 13:17:47 +01001144 pi->size = be32_to_cpu(h->length);
1145 pi->vnr = 0;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001146 } else if (header_size == sizeof(struct p_header80) &&
1147 *(__be32 *)header == cpu_to_be32(DRBD_MAGIC)) {
1148 struct p_header80 *h = header;
1149 pi->cmd = be16_to_cpu(h->command);
1150 pi->size = be16_to_cpu(h->length);
Philipp Reisner77351055b2011-02-07 17:24:26 +01001151 pi->vnr = 0;
Philipp Reisner02918be2010-08-20 14:35:10 +02001152 } else {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001153 drbd_err(connection, "Wrong magic value 0x%08x in protocol version %d\n",
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001154 be32_to_cpu(*(__be32 *)header),
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001155 connection->agreed_pro_version);
Andreas Gruenbacher8172f3e2011-03-16 17:22:39 +01001156 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001157 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001158 pi->data = header + header_size;
Andreas Gruenbacher8172f3e2011-03-16 17:22:39 +01001159 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001160}
1161
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001162static int drbd_recv_header(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisner257d0af2011-01-26 12:15:29 +01001163{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001164 void *buffer = connection->data.rbuf;
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001165 int err;
Philipp Reisner257d0af2011-01-26 12:15:29 +01001166
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001167 err = drbd_recv_all_warn(connection, buffer, drbd_header_size(connection));
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001168 if (err)
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001169 return err;
Philipp Reisner257d0af2011-01-26 12:15:29 +01001170
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001171 err = decode_header(connection, buffer, pi);
1172 connection->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001173
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001174 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001175}
1176
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001177static void drbd_flush(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001178{
1179 int rv;
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001180 struct drbd_peer_device *peer_device;
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001181 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001182
Philipp Reisnere9526582013-11-22 15:53:41 +01001183 if (connection->resource->write_ordering >= WO_bdev_flush) {
Lars Ellenberg615e0872011-11-17 14:32:12 +01001184 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001185 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1186 struct drbd_device *device = peer_device->device;
1187
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001188 if (!get_ldev(device))
Lars Ellenberg615e0872011-11-17 14:32:12 +01001189 continue;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001190 kref_get(&device->kref);
Lars Ellenberg615e0872011-11-17 14:32:12 +01001191 rcu_read_unlock();
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001192
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001193 rv = blkdev_issue_flush(device->ldev->backing_bdev,
Lars Ellenberg615e0872011-11-17 14:32:12 +01001194 GFP_NOIO, NULL);
1195 if (rv) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001196 drbd_info(device, "local disk flush failed with status %d\n", rv);
Lars Ellenberg615e0872011-11-17 14:32:12 +01001197 /* would rather check on EOPNOTSUPP, but that is not reliable.
1198 * don't try again for ANY return value != 0
1199 * if (rv == -EOPNOTSUPP) */
Philipp Reisner8fe39aa2013-11-22 13:22:13 +01001200 drbd_bump_write_ordering(connection->resource, NULL, WO_drain_io);
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001201 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001202 put_ldev(device);
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001203 kref_put(&device->kref, drbd_destroy_device);
Lars Ellenberg615e0872011-11-17 14:32:12 +01001204
1205 rcu_read_lock();
1206 if (rv)
1207 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001208 }
Lars Ellenberg615e0872011-11-17 14:32:12 +01001209 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07001210 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001211}
1212
1213/**
1214 * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it.
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001215 * @device: DRBD device.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001216 * @epoch: Epoch object.
1217 * @ev: Epoch event.
1218 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001219static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *connection,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001220 struct drbd_epoch *epoch,
1221 enum epoch_event ev)
1222{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001223 int epoch_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001224 struct drbd_epoch *next_epoch;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001225 enum finish_epoch rv = FE_STILL_LIVE;
1226
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001227 spin_lock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001228 do {
1229 next_epoch = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001230
1231 epoch_size = atomic_read(&epoch->epoch_size);
1232
1233 switch (ev & ~EV_CLEANUP) {
1234 case EV_PUT:
1235 atomic_dec(&epoch->active);
1236 break;
1237 case EV_GOT_BARRIER_NR:
1238 set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001239 break;
1240 case EV_BECAME_LAST:
1241 /* nothing to do*/
1242 break;
1243 }
1244
Philipp Reisnerb411b362009-09-25 16:07:19 -07001245 if (epoch_size != 0 &&
1246 atomic_read(&epoch->active) == 0 &&
Philipp Reisner80f9fd52011-07-18 15:45:15 +02001247 (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) || ev & EV_CLEANUP)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001248 if (!(ev & EV_CLEANUP)) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001249 spin_unlock(&connection->epoch_lock);
1250 drbd_send_b_ack(epoch->connection, epoch->barrier_nr, epoch_size);
1251 spin_lock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001252 }
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001253#if 0
1254 /* FIXME: dec unacked on connection, once we have
1255 * something to count pending connection packets in. */
Philipp Reisner80f9fd52011-07-18 15:45:15 +02001256 if (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags))
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001257 dec_unacked(epoch->connection);
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001258#endif
Philipp Reisnerb411b362009-09-25 16:07:19 -07001259
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001260 if (connection->current_epoch != epoch) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001261 next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
1262 list_del(&epoch->list);
1263 ev = EV_BECAME_LAST | (ev & EV_CLEANUP);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001264 connection->epochs--;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001265 kfree(epoch);
1266
1267 if (rv == FE_STILL_LIVE)
1268 rv = FE_DESTROYED;
1269 } else {
1270 epoch->flags = 0;
1271 atomic_set(&epoch->epoch_size, 0);
Uwe Kleine-König698f9312010-07-02 20:41:51 +02001272 /* atomic_set(&epoch->active, 0); is already zero */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001273 if (rv == FE_STILL_LIVE)
1274 rv = FE_RECYCLED;
1275 }
1276 }
1277
1278 if (!next_epoch)
1279 break;
1280
1281 epoch = next_epoch;
1282 } while (1);
1283
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001284 spin_unlock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001285
Philipp Reisnerb411b362009-09-25 16:07:19 -07001286 return rv;
1287}
1288
Philipp Reisner8fe39aa2013-11-22 13:22:13 +01001289static enum write_ordering_e
1290max_allowed_wo(struct drbd_backing_dev *bdev, enum write_ordering_e wo)
1291{
1292 struct disk_conf *dc;
1293
1294 dc = rcu_dereference(bdev->disk_conf);
1295
1296 if (wo == WO_bdev_flush && !dc->disk_flushes)
1297 wo = WO_drain_io;
1298 if (wo == WO_drain_io && !dc->disk_drain)
1299 wo = WO_none;
1300
1301 return wo;
1302}
1303
Philipp Reisnerb411b362009-09-25 16:07:19 -07001304/**
1305 * drbd_bump_write_ordering() - Fall back to an other write ordering method
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001306 * @connection: DRBD connection.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001307 * @wo: Write ordering method to try.
1308 */
Philipp Reisner8fe39aa2013-11-22 13:22:13 +01001309void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backing_dev *bdev,
1310 enum write_ordering_e wo)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001311{
Philipp Reisnere9526582013-11-22 15:53:41 +01001312 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001313 enum write_ordering_e pwo;
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001314 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001315 static char *write_ordering_str[] = {
1316 [WO_none] = "none",
1317 [WO_drain_io] = "drain",
1318 [WO_bdev_flush] = "flush",
Philipp Reisnerb411b362009-09-25 16:07:19 -07001319 };
1320
Philipp Reisnere9526582013-11-22 15:53:41 +01001321 pwo = resource->write_ordering;
Lars Ellenberg70df7092013-12-20 11:17:02 +01001322 if (wo != WO_bdev_flush)
1323 wo = min(pwo, wo);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001324 rcu_read_lock();
Philipp Reisnere9526582013-11-22 15:53:41 +01001325 idr_for_each_entry(&resource->devices, device, vnr) {
Philipp Reisner8fe39aa2013-11-22 13:22:13 +01001326 if (get_ldev(device)) {
1327 wo = max_allowed_wo(device->ldev, wo);
1328 if (device->ldev == bdev)
1329 bdev = NULL;
1330 put_ldev(device);
1331 }
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001332 }
Philipp Reisner8fe39aa2013-11-22 13:22:13 +01001333
1334 if (bdev)
1335 wo = max_allowed_wo(bdev, wo);
1336
Lars Ellenberg70df7092013-12-20 11:17:02 +01001337 rcu_read_unlock();
1338
Philipp Reisnere9526582013-11-22 15:53:41 +01001339 resource->write_ordering = wo;
1340 if (pwo != resource->write_ordering || wo == WO_bdev_flush)
1341 drbd_info(resource, "Method to ensure write ordering: %s\n", write_ordering_str[resource->write_ordering]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001342}
1343
1344/**
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001345 * drbd_submit_peer_request()
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001346 * @device: DRBD device.
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001347 * @peer_req: peer request
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001348 * @rw: flag field, see bio->bi_rw
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001349 *
1350 * May spread the pages to multiple bios,
1351 * depending on bio_add_page restrictions.
1352 *
1353 * Returns 0 if all bios have been submitted,
1354 * -ENOMEM if we could not allocate enough bios,
1355 * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a
1356 * single page to an empty bio (which should never happen and likely indicates
1357 * that the lower level IO stack is in some way broken). This has been observed
1358 * on certain Xen deployments.
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001359 */
1360/* TODO allocate from our own bio_set. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001361int drbd_submit_peer_request(struct drbd_device *device,
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001362 struct drbd_peer_request *peer_req,
1363 const unsigned rw, const int fault_type)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001364{
1365 struct bio *bios = NULL;
1366 struct bio *bio;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001367 struct page *page = peer_req->pages;
1368 sector_t sector = peer_req->i.sector;
1369 unsigned ds = peer_req->i.size;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001370 unsigned n_bios = 0;
1371 unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT;
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001372 int err = -ENOMEM;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001373
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001374 if (peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) {
1375 /* wait for all pending IO completions, before we start
1376 * zeroing things out. */
1377 conn_wait_active_ee_empty(first_peer_device(device)->connection);
Lars Ellenberg45d29332014-04-23 12:25:23 +02001378 /* add it to the active list now,
1379 * so we can find it to present it in debugfs */
Lars Ellenberg21ae5d72014-05-05 23:42:24 +02001380 peer_req->submit_jif = jiffies;
1381 peer_req->flags |= EE_SUBMITTED;
Lars Ellenberg45d29332014-04-23 12:25:23 +02001382 spin_lock_irq(&device->resource->req_lock);
1383 list_add_tail(&peer_req->w.list, &device->active_ee);
1384 spin_unlock_irq(&device->resource->req_lock);
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001385 if (blkdev_issue_zeroout(device->ldev->backing_bdev,
1386 sector, ds >> 9, GFP_NOIO))
1387 peer_req->flags |= EE_WAS_ERROR;
1388 drbd_endio_write_sec_final(peer_req);
1389 return 0;
1390 }
1391
Lars Ellenberg54ed4ed2014-06-25 17:52:38 +02001392 /* Discards don't have any payload.
1393 * But the scsi layer still expects a bio_vec it can use internally,
1394 * see sd_setup_discard_cmnd() and blk_add_request_payload(). */
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001395 if (peer_req->flags & EE_IS_TRIM)
Lars Ellenberg54ed4ed2014-06-25 17:52:38 +02001396 nr_pages = 1;
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001397
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001398 /* In most cases, we will only need one bio. But in case the lower
1399 * level restrictions happen to be different at this offset on this
1400 * side than those of the sending peer, we may need to submit the
Lars Ellenberg9476f392011-02-23 17:02:01 +01001401 * request in more than one bio.
1402 *
1403 * Plain bio_alloc is good enough here, this is no DRBD internally
1404 * generated bio, but a bio allocated on behalf of the peer.
1405 */
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001406next_bio:
1407 bio = bio_alloc(GFP_NOIO, nr_pages);
1408 if (!bio) {
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001409 drbd_err(device, "submit_ee: Allocation of a bio failed (nr_pages=%u)\n", nr_pages);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001410 goto fail;
1411 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001412 /* > peer_req->i.sector, unless this is the first bio */
Kent Overstreet4f024f32013-10-11 15:44:27 -07001413 bio->bi_iter.bi_sector = sector;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001414 bio->bi_bdev = device->ldev->backing_bdev;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001415 bio->bi_rw = rw;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001416 bio->bi_private = peer_req;
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001417 bio->bi_end_io = drbd_peer_request_endio;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001418
1419 bio->bi_next = bios;
1420 bios = bio;
1421 ++n_bios;
1422
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001423 if (rw & REQ_DISCARD) {
1424 bio->bi_iter.bi_size = ds;
1425 goto submit;
1426 }
1427
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001428 page_chain_for_each(page) {
1429 unsigned len = min_t(unsigned, ds, PAGE_SIZE);
1430 if (!bio_add_page(bio, page, len, 0)) {
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001431 /* A single page must always be possible!
1432 * But in case it fails anyways,
1433 * we deal with it, and complain (below). */
1434 if (bio->bi_vcnt == 0) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001435 drbd_err(device,
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001436 "bio_add_page failed for len=%u, "
1437 "bi_vcnt=0 (bi_sector=%llu)\n",
Kent Overstreet4f024f32013-10-11 15:44:27 -07001438 len, (uint64_t)bio->bi_iter.bi_sector);
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001439 err = -ENOSPC;
1440 goto fail;
1441 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001442 goto next_bio;
1443 }
1444 ds -= len;
1445 sector += len >> 9;
1446 --nr_pages;
1447 }
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001448 D_ASSERT(device, ds == 0);
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001449submit:
1450 D_ASSERT(device, page == NULL);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001451
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001452 atomic_set(&peer_req->pending_bios, n_bios);
Lars Ellenberg21ae5d72014-05-05 23:42:24 +02001453 /* for debugfs: update timestamp, mark as submitted */
1454 peer_req->submit_jif = jiffies;
1455 peer_req->flags |= EE_SUBMITTED;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001456 do {
1457 bio = bios;
1458 bios = bios->bi_next;
1459 bio->bi_next = NULL;
1460
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001461 drbd_generic_make_request(device, fault_type, bio);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001462 } while (bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001463 return 0;
1464
1465fail:
1466 while (bios) {
1467 bio = bios;
1468 bios = bios->bi_next;
1469 bio_put(bio);
1470 }
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001471 return err;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001472}
1473
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001474static void drbd_remove_epoch_entry_interval(struct drbd_device *device,
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001475 struct drbd_peer_request *peer_req)
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001476{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001477 struct drbd_interval *i = &peer_req->i;
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001478
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001479 drbd_remove_interval(&device->write_requests, i);
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001480 drbd_clear_interval(i);
1481
Andreas Gruenbacher6c852be2011-02-04 15:38:52 +01001482 /* Wake up any processes waiting for this peer request to complete. */
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001483 if (i->waiting)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001484 wake_up(&device->misc_wait);
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001485}
1486
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001487static void conn_wait_active_ee_empty(struct drbd_connection *connection)
Philipp Reisner77fede52011-11-10 21:19:11 +01001488{
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001489 struct drbd_peer_device *peer_device;
Philipp Reisner77fede52011-11-10 21:19:11 +01001490 int vnr;
1491
1492 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001493 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1494 struct drbd_device *device = peer_device->device;
1495
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001496 kref_get(&device->kref);
Philipp Reisner77fede52011-11-10 21:19:11 +01001497 rcu_read_unlock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001498 drbd_wait_ee_list_empty(device, &device->active_ee);
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001499 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisner77fede52011-11-10 21:19:11 +01001500 rcu_read_lock();
1501 }
1502 rcu_read_unlock();
1503}
1504
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001505static struct drbd_peer_device *
1506conn_peer_device(struct drbd_connection *connection, int volume_number)
1507{
1508 return idr_find(&connection->peer_devices, volume_number);
1509}
1510
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001511static int receive_Barrier(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001512{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001513 int rv;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001514 struct p_barrier *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001515 struct drbd_epoch *epoch;
1516
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001517 /* FIXME these are unacked on connection,
1518 * not a specific (peer)device.
1519 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001520 connection->current_epoch->barrier_nr = p->barrier;
1521 connection->current_epoch->connection = connection;
1522 rv = drbd_may_finish_epoch(connection, connection->current_epoch, EV_GOT_BARRIER_NR);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001523
1524 /* P_BARRIER_ACK may imply that the corresponding extent is dropped from
1525 * the activity log, which means it would not be resynced in case the
1526 * R_PRIMARY crashes now.
1527 * Therefore we must send the barrier_ack after the barrier request was
1528 * completed. */
Philipp Reisnere9526582013-11-22 15:53:41 +01001529 switch (connection->resource->write_ordering) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001530 case WO_none:
1531 if (rv == FE_RECYCLED)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001532 return 0;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001533
1534 /* receiver context, in the writeout path of the other node.
1535 * avoid potential distributed deadlock */
1536 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1537 if (epoch)
1538 break;
1539 else
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001540 drbd_warn(connection, "Allocation of an epoch failed, slowing down\n");
Philipp Reisner2451fc32010-08-24 13:43:11 +02001541 /* Fall through */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001542
1543 case WO_bdev_flush:
1544 case WO_drain_io:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001545 conn_wait_active_ee_empty(connection);
1546 drbd_flush(connection);
Philipp Reisner2451fc32010-08-24 13:43:11 +02001547
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001548 if (atomic_read(&connection->current_epoch->epoch_size)) {
Philipp Reisner2451fc32010-08-24 13:43:11 +02001549 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1550 if (epoch)
1551 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001552 }
1553
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001554 return 0;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001555 default:
Philipp Reisnere9526582013-11-22 15:53:41 +01001556 drbd_err(connection, "Strangeness in connection->write_ordering %d\n",
1557 connection->resource->write_ordering);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001558 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001559 }
1560
1561 epoch->flags = 0;
1562 atomic_set(&epoch->epoch_size, 0);
1563 atomic_set(&epoch->active, 0);
1564
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001565 spin_lock(&connection->epoch_lock);
1566 if (atomic_read(&connection->current_epoch->epoch_size)) {
1567 list_add(&epoch->list, &connection->current_epoch->list);
1568 connection->current_epoch = epoch;
1569 connection->epochs++;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001570 } else {
1571 /* The current_epoch got recycled while we allocated this one... */
1572 kfree(epoch);
1573 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001574 spin_unlock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001575
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001576 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001577}
1578
1579/* used from receive_RSDataReply (recv_resync_read)
1580 * and from receive_Data */
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +01001581static struct drbd_peer_request *
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001582read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001583 struct packet_info *pi) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001584{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001585 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001586 const sector_t capacity = drbd_get_capacity(device->this_bdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001587 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001588 struct page *page;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001589 int dgs, ds, err;
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001590 int data_size = pi->size;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001591 void *dig_in = peer_device->connection->int_dig_in;
1592 void *dig_vv = peer_device->connection->int_dig_vv;
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001593 unsigned long *data;
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001594 struct p_trim *trim = (pi->cmd == P_TRIM) ? pi->data : NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001595
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001596 dgs = 0;
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001597 if (!trim && peer_device->connection->peer_integrity_tfm) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001598 dgs = crypto_hash_digestsize(peer_device->connection->peer_integrity_tfm);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001599 /*
1600 * FIXME: Receive the incoming digest into the receive buffer
1601 * here, together with its struct p_data?
1602 */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001603 err = drbd_recv_all_warn(peer_device->connection, dig_in, dgs);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001604 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001605 return NULL;
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001606 data_size -= dgs;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001607 }
1608
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001609 if (trim) {
1610 D_ASSERT(peer_device, data_size == 0);
1611 data_size = be32_to_cpu(trim->size);
1612 }
1613
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01001614 if (!expect(IS_ALIGNED(data_size, 512)))
1615 return NULL;
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001616 /* prepare for larger trim requests. */
1617 if (!trim && !expect(data_size <= DRBD_MAX_BIO_SIZE))
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01001618 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001619
Lars Ellenberg66660322010-04-06 12:15:04 +02001620 /* even though we trust out peer,
1621 * we sometimes have to double check. */
1622 if (sector + (data_size>>9) > capacity) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001623 drbd_err(device, "request from peer beyond end of local disk: "
Lars Ellenbergfdda6542011-01-24 15:11:01 +01001624 "capacity: %llus < sector: %llus + size: %u\n",
Lars Ellenberg66660322010-04-06 12:15:04 +02001625 (unsigned long long)capacity,
1626 (unsigned long long)sector, data_size);
1627 return NULL;
1628 }
1629
Philipp Reisnerb411b362009-09-25 16:07:19 -07001630 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
1631 * "criss-cross" setup, that might cause write-out on some other DRBD,
1632 * which in turn might block on the other node at this very place. */
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001633 peer_req = drbd_alloc_peer_req(peer_device, id, sector, data_size, trim == NULL, GFP_NOIO);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001634 if (!peer_req)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001635 return NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001636
Lars Ellenberg21ae5d72014-05-05 23:42:24 +02001637 peer_req->flags |= EE_WRITE;
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001638 if (trim)
Lars Ellenberg81a35372012-07-30 09:00:54 +02001639 return peer_req;
Lars Ellenberga73ff322012-06-25 19:15:38 +02001640
Philipp Reisnerb411b362009-09-25 16:07:19 -07001641 ds = data_size;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001642 page = peer_req->pages;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001643 page_chain_for_each(page) {
1644 unsigned len = min_t(int, ds, PAGE_SIZE);
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001645 data = kmap(page);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001646 err = drbd_recv_all_warn(peer_device->connection, data, len);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001647 if (drbd_insert_fault(device, DRBD_FAULT_RECEIVE)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001648 drbd_err(device, "Fault injection: Corrupting data on receive\n");
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001649 data[0] = data[0] ^ (unsigned long)-1;
1650 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001651 kunmap(page);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001652 if (err) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001653 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001654 return NULL;
1655 }
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001656 ds -= len;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001657 }
1658
1659 if (dgs) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001660 drbd_csum_ee(peer_device->connection->peer_integrity_tfm, peer_req, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001661 if (memcmp(dig_in, dig_vv, dgs)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001662 drbd_err(device, "Digest integrity check FAILED: %llus +%u\n",
Lars Ellenberg470be442010-11-10 10:36:52 +01001663 (unsigned long long)sector, data_size);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001664 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001665 return NULL;
1666 }
1667 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001668 device->recv_cnt += data_size>>9;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001669 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001670}
1671
1672/* drbd_drain_block() just takes a data block
1673 * out of the socket input buffer, and discards it.
1674 */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001675static int drbd_drain_block(struct drbd_peer_device *peer_device, int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001676{
1677 struct page *page;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001678 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001679 void *data;
1680
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001681 if (!data_size)
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001682 return 0;
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001683
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001684 page = drbd_alloc_pages(peer_device, 1, 1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001685
1686 data = kmap(page);
1687 while (data_size) {
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001688 unsigned int len = min_t(int, data_size, PAGE_SIZE);
1689
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001690 err = drbd_recv_all_warn(peer_device->connection, data, len);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001691 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001692 break;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001693 data_size -= len;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001694 }
1695 kunmap(page);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001696 drbd_free_pages(peer_device->device, page, 0);
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001697 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001698}
1699
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001700static int recv_dless_read(struct drbd_peer_device *peer_device, struct drbd_request *req,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001701 sector_t sector, int data_size)
1702{
Kent Overstreet79886132013-11-23 17:19:00 -08001703 struct bio_vec bvec;
1704 struct bvec_iter iter;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001705 struct bio *bio;
Kent Overstreet79886132013-11-23 17:19:00 -08001706 int dgs, err, expect;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001707 void *dig_in = peer_device->connection->int_dig_in;
1708 void *dig_vv = peer_device->connection->int_dig_vv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001709
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001710 dgs = 0;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001711 if (peer_device->connection->peer_integrity_tfm) {
1712 dgs = crypto_hash_digestsize(peer_device->connection->peer_integrity_tfm);
1713 err = drbd_recv_all_warn(peer_device->connection, dig_in, dgs);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001714 if (err)
1715 return err;
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001716 data_size -= dgs;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001717 }
1718
Philipp Reisnerb411b362009-09-25 16:07:19 -07001719 /* optimistically update recv_cnt. if receiving fails below,
1720 * we disconnect anyways, and counters will be reset. */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001721 peer_device->device->recv_cnt += data_size>>9;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001722
1723 bio = req->master_bio;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001724 D_ASSERT(peer_device->device, sector == bio->bi_iter.bi_sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001725
Kent Overstreet79886132013-11-23 17:19:00 -08001726 bio_for_each_segment(bvec, bio, iter) {
1727 void *mapped = kmap(bvec.bv_page) + bvec.bv_offset;
1728 expect = min_t(int, data_size, bvec.bv_len);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001729 err = drbd_recv_all_warn(peer_device->connection, mapped, expect);
Kent Overstreet79886132013-11-23 17:19:00 -08001730 kunmap(bvec.bv_page);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001731 if (err)
1732 return err;
1733 data_size -= expect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001734 }
1735
1736 if (dgs) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001737 drbd_csum_bio(peer_device->connection->peer_integrity_tfm, bio, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001738 if (memcmp(dig_in, dig_vv, dgs)) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001739 drbd_err(peer_device, "Digest integrity check FAILED. Broken NICs?\n");
Andreas Gruenbacher28284ce2011-03-16 17:54:02 +01001740 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001741 }
1742 }
1743
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001744 D_ASSERT(peer_device->device, data_size == 0);
Andreas Gruenbacher28284ce2011-03-16 17:54:02 +01001745 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001746}
1747
Andreas Gruenbachera990be42011-04-06 17:56:48 +02001748/*
1749 * e_end_resync_block() is called in asender context via
1750 * drbd_finish_peer_reqs().
1751 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001752static int e_end_resync_block(struct drbd_work *w, int unused)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001753{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001754 struct drbd_peer_request *peer_req =
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001755 container_of(w, struct drbd_peer_request, w);
1756 struct drbd_peer_device *peer_device = peer_req->peer_device;
1757 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001758 sector_t sector = peer_req->i.sector;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001759 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001760
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001761 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001762
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001763 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001764 drbd_set_in_sync(device, sector, peer_req->i.size);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001765 err = drbd_send_ack(peer_device, P_RS_WRITE_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001766 } else {
1767 /* Record failure to sync */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001768 drbd_rs_failed_io(device, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001769
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001770 err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001771 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001772 dec_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001773
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001774 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001775}
1776
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001777static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t sector,
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001778 struct packet_info *pi) __releases(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001779{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001780 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001781 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001782
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001783 peer_req = read_in_block(peer_device, ID_SYNCER, sector, pi);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001784 if (!peer_req)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001785 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001786
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001787 dec_rs_pending(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001788
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001789 inc_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001790 /* corresponding dec_unacked() in e_end_resync_block()
1791 * respective _drbd_clear_done_ee */
1792
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001793 peer_req->w.cb = e_end_resync_block;
Lars Ellenberg21ae5d72014-05-05 23:42:24 +02001794 peer_req->submit_jif = jiffies;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001795
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001796 spin_lock_irq(&device->resource->req_lock);
Lars Ellenbergb9ed7082014-04-23 12:15:35 +02001797 list_add_tail(&peer_req->w.list, &device->sync_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001798 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001799
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001800 atomic_add(pi->size >> 9, &device->rs_sect_ev);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001801 if (drbd_submit_peer_request(device, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0)
Andreas Gruenbachere1c1b0f2011-03-16 17:58:27 +01001802 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001803
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001804 /* don't care for the reason here */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001805 drbd_err(device, "submit failed, triggering re-connect\n");
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001806 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001807 list_del(&peer_req->w.list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001808 spin_unlock_irq(&device->resource->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02001809
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001810 drbd_free_peer_req(device, peer_req);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001811fail:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001812 put_ldev(device);
Andreas Gruenbachere1c1b0f2011-03-16 17:58:27 +01001813 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001814}
1815
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001816static struct drbd_request *
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001817find_request(struct drbd_device *device, struct rb_root *root, u64 id,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001818 sector_t sector, bool missing_ok, const char *func)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001819{
1820 struct drbd_request *req;
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001821
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001822 /* Request object according to our peer */
1823 req = (struct drbd_request *)(unsigned long)id;
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001824 if (drbd_contains_interval(root, sector, &req->i) && req->i.local)
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001825 return req;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001826 if (!missing_ok) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001827 drbd_err(device, "%s: failed to find request 0x%lx, sector %llus\n", func,
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001828 (unsigned long)id, (unsigned long long)sector);
1829 }
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001830 return NULL;
1831}
1832
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001833static int receive_DataReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001834{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001835 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001836 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001837 struct drbd_request *req;
1838 sector_t sector;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001839 int err;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001840 struct p_data *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001841
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001842 peer_device = conn_peer_device(connection, pi->vnr);
1843 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001844 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001845 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001846
1847 sector = be64_to_cpu(p->sector);
1848
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001849 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001850 req = find_request(device, &device->read_requests, p->block_id, sector, false, __func__);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001851 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001852 if (unlikely(!req))
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001853 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001854
Bart Van Assche24c48302011-05-21 18:32:29 +02001855 /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid
Philipp Reisnerb411b362009-09-25 16:07:19 -07001856 * special casing it there for the various failure cases.
1857 * still no race with drbd_fail_pending_reads */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001858 err = recv_dless_read(peer_device, req, sector, pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001859 if (!err)
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001860 req_mod(req, DATA_RECEIVED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001861 /* else: nothing. handled from drbd_disconnect...
1862 * I don't think we may complete this just yet
1863 * in case we are "on-disconnect: freeze" */
1864
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001865 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001866}
1867
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001868static int receive_RSDataReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001869{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001870 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001871 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001872 sector_t sector;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001873 int err;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001874 struct p_data *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001875
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001876 peer_device = conn_peer_device(connection, pi->vnr);
1877 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001878 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001879 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001880
1881 sector = be64_to_cpu(p->sector);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001882 D_ASSERT(device, p->block_id == ID_SYNCER);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001883
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001884 if (get_ldev(device)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001885 /* data is submitted to disk within recv_resync_read.
1886 * corresponding put_ldev done below on error,
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001887 * or in drbd_peer_request_endio. */
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001888 err = recv_resync_read(peer_device, sector, pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001889 } else {
1890 if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001891 drbd_err(device, "Can not write resync data to local disk.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001892
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001893 err = drbd_drain_block(peer_device, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001894
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001895 drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001896 }
1897
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001898 atomic_add(pi->size >> 9, &device->rs_sect_in);
Philipp Reisner778f2712010-07-06 11:14:00 +02001899
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001900 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001901}
1902
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001903static void restart_conflicting_writes(struct drbd_device *device,
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001904 sector_t sector, int size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001905{
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001906 struct drbd_interval *i;
1907 struct drbd_request *req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001908
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001909 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001910 if (!i->local)
1911 continue;
1912 req = container_of(i, struct drbd_request, i);
1913 if (req->rq_state & RQ_LOCAL_PENDING ||
1914 !(req->rq_state & RQ_POSTPONED))
1915 continue;
Lars Ellenberg2312f0b32011-11-24 10:36:25 +01001916 /* as it is RQ_POSTPONED, this will cause it to
1917 * be queued on the retry workqueue. */
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02001918 __req_mod(req, CONFLICT_RESOLVED, NULL);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001919 }
1920}
1921
Andreas Gruenbachera990be42011-04-06 17:56:48 +02001922/*
1923 * e_end_block() is called in asender context via drbd_finish_peer_reqs().
Philipp Reisnerb411b362009-09-25 16:07:19 -07001924 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001925static int e_end_block(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001926{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001927 struct drbd_peer_request *peer_req =
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001928 container_of(w, struct drbd_peer_request, w);
1929 struct drbd_peer_device *peer_device = peer_req->peer_device;
1930 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001931 sector_t sector = peer_req->i.sector;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001932 int err = 0, pcmd;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001933
Philipp Reisner303d1442011-04-13 16:24:47 -07001934 if (peer_req->flags & EE_SEND_WRITE_ACK) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001935 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001936 pcmd = (device->state.conn >= C_SYNC_SOURCE &&
1937 device->state.conn <= C_PAUSED_SYNC_T &&
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001938 peer_req->flags & EE_MAY_SET_IN_SYNC) ?
Philipp Reisnerb411b362009-09-25 16:07:19 -07001939 P_RS_WRITE_ACK : P_WRITE_ACK;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001940 err = drbd_send_ack(peer_device, pcmd, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001941 if (pcmd == P_RS_WRITE_ACK)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001942 drbd_set_in_sync(device, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001943 } else {
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001944 err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001945 /* we expect it to be marked out of sync anyways...
1946 * maybe assert this? */
1947 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001948 dec_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001949 }
Lars Ellenberg08d0dab2014-03-20 11:19:22 +01001950
Philipp Reisnerb411b362009-09-25 16:07:19 -07001951 /* we delete from the conflict detection hash _after_ we sent out the
1952 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */
Philipp Reisner302bdea2011-04-21 11:36:49 +02001953 if (peer_req->flags & EE_IN_INTERVAL_TREE) {
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001954 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001955 D_ASSERT(device, !drbd_interval_empty(&peer_req->i));
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001956 drbd_remove_epoch_entry_interval(device, peer_req);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001957 if (peer_req->flags & EE_RESTART_REQUESTS)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001958 restart_conflicting_writes(device, sector, peer_req->i.size);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001959 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +01001960 } else
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001961 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001962
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001963 drbd_may_finish_epoch(first_peer_device(device)->connection, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001964
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001965 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001966}
1967
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001968static int e_send_ack(struct drbd_work *w, enum drbd_packet ack)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001969{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001970 struct drbd_peer_request *peer_req =
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001971 container_of(w, struct drbd_peer_request, w);
1972 struct drbd_peer_device *peer_device = peer_req->peer_device;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001973 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001974
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001975 err = drbd_send_ack(peer_device, ack, peer_req);
1976 dec_unacked(peer_device->device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001977
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001978 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001979}
1980
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02001981static int e_send_superseded(struct drbd_work *w, int unused)
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001982{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001983 return e_send_ack(w, P_SUPERSEDED);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001984}
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001985
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001986static int e_send_retry_write(struct drbd_work *w, int unused)
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001987{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001988 struct drbd_peer_request *peer_req =
1989 container_of(w, struct drbd_peer_request, w);
1990 struct drbd_connection *connection = peer_req->peer_device->connection;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001991
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001992 return e_send_ack(w, connection->agreed_pro_version >= 100 ?
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02001993 P_RETRY_WRITE : P_SUPERSEDED);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001994}
1995
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001996static bool seq_greater(u32 a, u32 b)
1997{
1998 /*
1999 * We assume 32-bit wrap-around here.
2000 * For 24-bit wrap-around, we would have to shift:
2001 * a <<= 8; b <<= 8;
2002 */
2003 return (s32)a - (s32)b > 0;
2004}
2005
2006static u32 seq_max(u32 a, u32 b)
2007{
2008 return seq_greater(a, b) ? a : b;
2009}
2010
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002011static void update_peer_seq(struct drbd_peer_device *peer_device, unsigned int peer_seq)
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01002012{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002013 struct drbd_device *device = peer_device->device;
Lars Ellenberg3c13b682011-02-23 16:10:01 +01002014 unsigned int newest_peer_seq;
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01002015
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002016 if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002017 spin_lock(&device->peer_seq_lock);
2018 newest_peer_seq = seq_max(device->peer_seq, peer_seq);
2019 device->peer_seq = newest_peer_seq;
2020 spin_unlock(&device->peer_seq_lock);
2021 /* wake up only if we actually changed device->peer_seq */
Lars Ellenberg3c13b682011-02-23 16:10:01 +01002022 if (peer_seq == newest_peer_seq)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002023 wake_up(&device->seq_wait);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002024 }
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01002025}
2026
Lars Ellenbergd93f6302012-03-26 15:49:13 +02002027static inline int overlaps(sector_t s1, int l1, sector_t s2, int l2)
2028{
2029 return !((s1 + (l1>>9) <= s2) || (s1 >= s2 + (l2>>9)));
2030}
2031
2032/* maybe change sync_ee into interval trees as well? */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002033static bool overlapping_resync_write(struct drbd_device *device, struct drbd_peer_request *peer_req)
Lars Ellenbergd93f6302012-03-26 15:49:13 +02002034{
2035 struct drbd_peer_request *rs_req;
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01002036 bool rv = 0;
2037
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002038 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002039 list_for_each_entry(rs_req, &device->sync_ee, w.list) {
Lars Ellenbergd93f6302012-03-26 15:49:13 +02002040 if (overlaps(peer_req->i.sector, peer_req->i.size,
2041 rs_req->i.sector, rs_req->i.size)) {
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01002042 rv = 1;
2043 break;
2044 }
2045 }
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002046 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01002047
2048 return rv;
2049}
2050
Philipp Reisnerb411b362009-09-25 16:07:19 -07002051/* Called from receive_Data.
2052 * Synchronize packets on sock with packets on msock.
2053 *
2054 * This is here so even when a P_DATA packet traveling via sock overtook an Ack
2055 * packet traveling on msock, they are still processed in the order they have
2056 * been sent.
2057 *
2058 * Note: we don't care for Ack packets overtaking P_DATA packets.
2059 *
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002060 * In case packet_seq is larger than device->peer_seq number, there are
Philipp Reisnerb411b362009-09-25 16:07:19 -07002061 * outstanding packets on the msock. We wait for them to arrive.
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002062 * In case we are the logically next packet, we update device->peer_seq
Philipp Reisnerb411b362009-09-25 16:07:19 -07002063 * ourselves. Correctly handles 32bit wrap around.
2064 *
2065 * Assume we have a 10 GBit connection, that is about 1<<30 byte per second,
2066 * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds
2067 * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have
2068 * 1<<9 == 512 seconds aka ages for the 32bit wrap around...
2069 *
2070 * returns 0 if we may process the packet,
2071 * -ERESTARTSYS if we were interrupted (by disconnect signal). */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002072static int wait_for_and_update_peer_seq(struct drbd_peer_device *peer_device, const u32 peer_seq)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002073{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002074 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002075 DEFINE_WAIT(wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002076 long timeout;
Philipp Reisnerb874d232013-10-23 10:59:16 +02002077 int ret = 0, tp;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002078
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002079 if (!test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags))
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002080 return 0;
2081
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002082 spin_lock(&device->peer_seq_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002083 for (;;) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002084 if (!seq_greater(peer_seq - 1, device->peer_seq)) {
2085 device->peer_seq = seq_max(device->peer_seq, peer_seq);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002086 break;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002087 }
Philipp Reisnerb874d232013-10-23 10:59:16 +02002088
Philipp Reisnerb411b362009-09-25 16:07:19 -07002089 if (signal_pending(current)) {
2090 ret = -ERESTARTSYS;
2091 break;
2092 }
Philipp Reisnerb874d232013-10-23 10:59:16 +02002093
2094 rcu_read_lock();
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002095 tp = rcu_dereference(first_peer_device(device)->connection->net_conf)->two_primaries;
Philipp Reisnerb874d232013-10-23 10:59:16 +02002096 rcu_read_unlock();
2097
2098 if (!tp)
2099 break;
2100
2101 /* Only need to wait if two_primaries is enabled */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002102 prepare_to_wait(&device->seq_wait, &wait, TASK_INTERRUPTIBLE);
2103 spin_unlock(&device->peer_seq_lock);
Philipp Reisner44ed1672011-04-19 17:10:19 +02002104 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002105 timeout = rcu_dereference(peer_device->connection->net_conf)->ping_timeo*HZ/10;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002106 rcu_read_unlock();
Andreas Gruenbacher71b1c1e2011-03-01 15:40:43 +01002107 timeout = schedule_timeout(timeout);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002108 spin_lock(&device->peer_seq_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002109 if (!timeout) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002110 ret = -ETIMEDOUT;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002111 drbd_err(device, "Timed out waiting for missing ack packets; disconnecting\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002112 break;
2113 }
2114 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002115 spin_unlock(&device->peer_seq_lock);
2116 finish_wait(&device->seq_wait, &wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002117 return ret;
2118}
2119
Lars Ellenberg688593c2010-11-17 22:25:03 +01002120/* see also bio_flags_to_wire()
2121 * DRBD_REQ_*, because we need to semantically map the flags to data packet
2122 * flags and back. We may replicate to other kernel versions. */
Andreas Gruenbacher81f0ffd2011-08-30 16:22:33 +02002123static unsigned long wire_flags_to_bio(u32 dpf)
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02002124{
Lars Ellenberg688593c2010-11-17 22:25:03 +01002125 return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
2126 (dpf & DP_FUA ? REQ_FUA : 0) |
2127 (dpf & DP_FLUSH ? REQ_FLUSH : 0) |
2128 (dpf & DP_DISCARD ? REQ_DISCARD : 0);
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02002129}
2130
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002131static void fail_postponed_requests(struct drbd_device *device, sector_t sector,
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002132 unsigned int size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002133{
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002134 struct drbd_interval *i;
2135
2136 repeat:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002137 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002138 struct drbd_request *req;
2139 struct bio_and_error m;
2140
2141 if (!i->local)
2142 continue;
2143 req = container_of(i, struct drbd_request, i);
2144 if (!(req->rq_state & RQ_POSTPONED))
2145 continue;
2146 req->rq_state &= ~RQ_POSTPONED;
2147 __req_mod(req, NEG_ACKED, &m);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002148 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002149 if (m.bio)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002150 complete_master_bio(device, &m);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002151 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002152 goto repeat;
2153 }
2154}
2155
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002156static int handle_write_conflicts(struct drbd_device *device,
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002157 struct drbd_peer_request *peer_req)
2158{
Andreas Gruenbachere33b32d2011-08-30 15:38:04 +02002159 struct drbd_connection *connection = peer_req->peer_device->connection;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002160 bool resolve_conflicts = test_bit(RESOLVE_CONFLICTS, &connection->flags);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002161 sector_t sector = peer_req->i.sector;
2162 const unsigned int size = peer_req->i.size;
2163 struct drbd_interval *i;
2164 bool equal;
2165 int err;
2166
2167 /*
2168 * Inserting the peer request into the write_requests tree will prevent
2169 * new conflicting local requests from being added.
2170 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002171 drbd_insert_interval(&device->write_requests, &peer_req->i);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002172
2173 repeat:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002174 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002175 if (i == &peer_req->i)
2176 continue;
Lars Ellenberg08d0dab2014-03-20 11:19:22 +01002177 if (i->completed)
2178 continue;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002179
2180 if (!i->local) {
2181 /*
2182 * Our peer has sent a conflicting remote request; this
2183 * should not happen in a two-node setup. Wait for the
2184 * earlier peer request to complete.
2185 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002186 err = drbd_wait_misc(device, i);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002187 if (err)
2188 goto out;
2189 goto repeat;
2190 }
2191
2192 equal = i->sector == sector && i->size == size;
2193 if (resolve_conflicts) {
2194 /*
2195 * If the peer request is fully contained within the
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002196 * overlapping request, it can be considered overwritten
2197 * and thus superseded; otherwise, it will be retried
2198 * once all overlapping requests have completed.
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002199 */
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002200 bool superseded = i->sector <= sector && i->sector +
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002201 (i->size >> 9) >= sector + (size >> 9);
2202
2203 if (!equal)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002204 drbd_alert(device, "Concurrent writes detected: "
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002205 "local=%llus +%u, remote=%llus +%u, "
2206 "assuming %s came first\n",
2207 (unsigned long long)i->sector, i->size,
2208 (unsigned long long)sector, size,
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002209 superseded ? "local" : "remote");
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002210
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002211 peer_req->w.cb = superseded ? e_send_superseded :
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002212 e_send_retry_write;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002213 list_add_tail(&peer_req->w.list, &device->done_ee);
Andreas Gruenbachere33b32d2011-08-30 15:38:04 +02002214 wake_asender(connection);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002215
2216 err = -ENOENT;
2217 goto out;
2218 } else {
2219 struct drbd_request *req =
2220 container_of(i, struct drbd_request, i);
2221
2222 if (!equal)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002223 drbd_alert(device, "Concurrent writes detected: "
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002224 "local=%llus +%u, remote=%llus +%u\n",
2225 (unsigned long long)i->sector, i->size,
2226 (unsigned long long)sector, size);
2227
2228 if (req->rq_state & RQ_LOCAL_PENDING ||
2229 !(req->rq_state & RQ_POSTPONED)) {
2230 /*
2231 * Wait for the node with the discard flag to
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002232 * decide if this request has been superseded
2233 * or needs to be retried.
2234 * Requests that have been superseded will
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002235 * disappear from the write_requests tree.
2236 *
2237 * In addition, wait for the conflicting
2238 * request to finish locally before submitting
2239 * the conflicting peer request.
2240 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002241 err = drbd_wait_misc(device, &req->i);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002242 if (err) {
Andreas Gruenbachere33b32d2011-08-30 15:38:04 +02002243 _conn_request_state(connection, NS(conn, C_TIMEOUT), CS_HARD);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002244 fail_postponed_requests(device, sector, size);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002245 goto out;
2246 }
2247 goto repeat;
2248 }
2249 /*
2250 * Remember to restart the conflicting requests after
2251 * the new peer request has completed.
2252 */
2253 peer_req->flags |= EE_RESTART_REQUESTS;
2254 }
2255 }
2256 err = 0;
2257
2258 out:
2259 if (err)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002260 drbd_remove_epoch_entry_interval(device, peer_req);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002261 return err;
2262}
2263
Philipp Reisnerb411b362009-09-25 16:07:19 -07002264/* mirrored write */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002265static int receive_Data(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002266{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002267 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002268 struct drbd_device *device;
Lars Ellenberg21ae5d72014-05-05 23:42:24 +02002269 struct net_conf *nc;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002270 sector_t sector;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002271 struct drbd_peer_request *peer_req;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02002272 struct p_data *p = pi->data;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002273 u32 peer_seq = be32_to_cpu(p->seq_num);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002274 int rw = WRITE;
2275 u32 dp_flags;
Philipp Reisner302bdea2011-04-21 11:36:49 +02002276 int err, tp;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002277
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002278 peer_device = conn_peer_device(connection, pi->vnr);
2279 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002280 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002281 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002282
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002283 if (!get_ldev(device)) {
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002284 int err2;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002285
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002286 err = wait_for_and_update_peer_seq(peer_device, peer_seq);
2287 drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002288 atomic_inc(&connection->current_epoch->epoch_size);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002289 err2 = drbd_drain_block(peer_device, pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002290 if (!err)
2291 err = err2;
2292 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002293 }
2294
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01002295 /*
2296 * Corresponding put_ldev done either below (on various errors), or in
2297 * drbd_peer_request_endio, if we successfully submit the data at the
2298 * end of this function.
2299 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002300
2301 sector = be64_to_cpu(p->sector);
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02002302 peer_req = read_in_block(peer_device, p->block_id, sector, pi);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002303 if (!peer_req) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002304 put_ldev(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002305 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002306 }
2307
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002308 peer_req->w.cb = e_end_block;
Lars Ellenberg21ae5d72014-05-05 23:42:24 +02002309 peer_req->submit_jif = jiffies;
2310 peer_req->flags |= EE_APPLICATION;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002311
Lars Ellenberg688593c2010-11-17 22:25:03 +01002312 dp_flags = be32_to_cpu(p->dp_flags);
Andreas Gruenbacher81f0ffd2011-08-30 16:22:33 +02002313 rw |= wire_flags_to_bio(dp_flags);
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02002314 if (pi->cmd == P_TRIM) {
2315 struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev);
2316 peer_req->flags |= EE_IS_TRIM;
2317 if (!blk_queue_discard(q))
2318 peer_req->flags |= EE_IS_TRIM_USE_ZEROOUT;
2319 D_ASSERT(peer_device, peer_req->i.size > 0);
2320 D_ASSERT(peer_device, rw & REQ_DISCARD);
2321 D_ASSERT(peer_device, peer_req->pages == NULL);
2322 } else if (peer_req->pages == NULL) {
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02002323 D_ASSERT(device, peer_req->i.size == 0);
2324 D_ASSERT(device, dp_flags & DP_FLUSH);
Lars Ellenberga73ff322012-06-25 19:15:38 +02002325 }
Lars Ellenberg688593c2010-11-17 22:25:03 +01002326
2327 if (dp_flags & DP_MAY_SET_IN_SYNC)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002328 peer_req->flags |= EE_MAY_SET_IN_SYNC;
Lars Ellenberg688593c2010-11-17 22:25:03 +01002329
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002330 spin_lock(&connection->epoch_lock);
2331 peer_req->epoch = connection->current_epoch;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002332 atomic_inc(&peer_req->epoch->epoch_size);
2333 atomic_inc(&peer_req->epoch->active);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002334 spin_unlock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002335
Philipp Reisner302bdea2011-04-21 11:36:49 +02002336 rcu_read_lock();
Lars Ellenberg21ae5d72014-05-05 23:42:24 +02002337 nc = rcu_dereference(peer_device->connection->net_conf);
2338 tp = nc->two_primaries;
2339 if (peer_device->connection->agreed_pro_version < 100) {
2340 switch (nc->wire_protocol) {
2341 case DRBD_PROT_C:
2342 dp_flags |= DP_SEND_WRITE_ACK;
2343 break;
2344 case DRBD_PROT_B:
2345 dp_flags |= DP_SEND_RECEIVE_ACK;
2346 break;
2347 }
2348 }
Philipp Reisner302bdea2011-04-21 11:36:49 +02002349 rcu_read_unlock();
Lars Ellenberg21ae5d72014-05-05 23:42:24 +02002350
2351 if (dp_flags & DP_SEND_WRITE_ACK) {
2352 peer_req->flags |= EE_SEND_WRITE_ACK;
2353 inc_unacked(device);
2354 /* corresponding dec_unacked() in e_end_block()
2355 * respective _drbd_clear_done_ee */
2356 }
2357
2358 if (dp_flags & DP_SEND_RECEIVE_ACK) {
2359 /* I really don't like it that the receiver thread
2360 * sends on the msock, but anyways */
2361 drbd_send_ack(first_peer_device(device), P_RECV_ACK, peer_req);
2362 }
2363
Philipp Reisner302bdea2011-04-21 11:36:49 +02002364 if (tp) {
Lars Ellenberg21ae5d72014-05-05 23:42:24 +02002365 /* two primaries implies protocol C */
2366 D_ASSERT(device, dp_flags & DP_SEND_WRITE_ACK);
Philipp Reisner302bdea2011-04-21 11:36:49 +02002367 peer_req->flags |= EE_IN_INTERVAL_TREE;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002368 err = wait_for_and_update_peer_seq(peer_device, peer_seq);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002369 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002370 goto out_interrupted;
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002371 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002372 err = handle_write_conflicts(device, peer_req);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002373 if (err) {
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002374 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002375 if (err == -ENOENT) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002376 put_ldev(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002377 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002378 }
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002379 goto out_interrupted;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002380 }
Philipp Reisnerb874d232013-10-23 10:59:16 +02002381 } else {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002382 update_peer_seq(peer_device, peer_seq);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002383 spin_lock_irq(&device->resource->req_lock);
Philipp Reisnerb874d232013-10-23 10:59:16 +02002384 }
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02002385 /* if we use the zeroout fallback code, we process synchronously
2386 * and we wait for all pending requests, respectively wait for
2387 * active_ee to become empty in drbd_submit_peer_request();
2388 * better not add ourselves here. */
2389 if ((peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) == 0)
Lars Ellenbergb9ed7082014-04-23 12:15:35 +02002390 list_add_tail(&peer_req->w.list, &device->active_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002391 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002392
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002393 if (device->state.conn == C_SYNC_TARGET)
2394 wait_event(device->ee_wait, !overlapping_resync_write(device, peer_req));
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01002395
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002396 if (device->state.pdsk < D_INCONSISTENT) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002397 /* In case we have the only disk of the cluster, */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002398 drbd_set_out_of_sync(device, peer_req->i.sector, peer_req->i.size);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002399 peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
Lars Ellenberg4dd726f2014-02-11 11:15:36 +01002400 drbd_al_begin_io(device, &peer_req->i);
Lars Ellenberg21ae5d72014-05-05 23:42:24 +02002401 peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002402 }
2403
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002404 err = drbd_submit_peer_request(device, peer_req, rw, DRBD_FAULT_DT_WR);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002405 if (!err)
2406 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002407
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002408 /* don't care for the reason here */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002409 drbd_err(device, "submit failed, triggering re-connect\n");
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002410 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002411 list_del(&peer_req->w.list);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002412 drbd_remove_epoch_entry_interval(device, peer_req);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002413 spin_unlock_irq(&device->resource->req_lock);
Lars Ellenberg21ae5d72014-05-05 23:42:24 +02002414 if (peer_req->flags & EE_CALL_AL_COMPLETE_IO) {
2415 peer_req->flags &= ~EE_CALL_AL_COMPLETE_IO;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002416 drbd_al_complete_io(device, &peer_req->i);
Lars Ellenberg21ae5d72014-05-05 23:42:24 +02002417 }
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002418
Philipp Reisnerb411b362009-09-25 16:07:19 -07002419out_interrupted:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002420 drbd_may_finish_epoch(connection, peer_req->epoch, EV_PUT + EV_CLEANUP);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002421 put_ldev(device);
2422 drbd_free_peer_req(device, peer_req);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002423 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002424}
2425
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002426/* We may throttle resync, if the lower device seems to be busy,
2427 * and current sync rate is above c_min_rate.
2428 *
2429 * To decide whether or not the lower device is busy, we use a scheme similar
2430 * to MD RAID is_mddev_idle(): if the partition stats reveal "significant"
2431 * (more than 64 sectors) of activity we cannot account for with our own resync
2432 * activity, it obviously is "busy".
2433 *
2434 * The current sync rate used here uses only the most recent two step marks,
2435 * to have a short time average so we can react faster.
2436 */
Lars Ellenbergad3fee72013-12-20 11:22:13 +01002437bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector,
2438 bool throttle_if_app_is_waiting)
Lars Ellenberge8299872014-04-28 18:43:19 +02002439{
2440 struct lc_element *tmp;
Lars Ellenbergad3fee72013-12-20 11:22:13 +01002441 bool throttle = drbd_rs_c_min_rate_throttle(device);
Lars Ellenberge8299872014-04-28 18:43:19 +02002442
Lars Ellenbergad3fee72013-12-20 11:22:13 +01002443 if (!throttle || throttle_if_app_is_waiting)
2444 return throttle;
Lars Ellenberge8299872014-04-28 18:43:19 +02002445
2446 spin_lock_irq(&device->al_lock);
2447 tmp = lc_find(device->resync, BM_SECT_TO_EXT(sector));
2448 if (tmp) {
2449 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
2450 if (test_bit(BME_PRIORITY, &bm_ext->flags))
2451 throttle = false;
Lars Ellenbergad3fee72013-12-20 11:22:13 +01002452 /* Do not slow down if app IO is already waiting for this extent,
2453 * and our progress is necessary for application IO to complete. */
Lars Ellenberge8299872014-04-28 18:43:19 +02002454 }
2455 spin_unlock_irq(&device->al_lock);
2456
2457 return throttle;
2458}
2459
2460bool drbd_rs_c_min_rate_throttle(struct drbd_device *device)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002461{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002462 struct gendisk *disk = device->ldev->backing_bdev->bd_contains->bd_disk;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002463 unsigned long db, dt, dbdt;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002464 unsigned int c_min_rate;
Lars Ellenberge8299872014-04-28 18:43:19 +02002465 int curr_events;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002466
2467 rcu_read_lock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002468 c_min_rate = rcu_dereference(device->ldev->disk_conf)->c_min_rate;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002469 rcu_read_unlock();
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002470
2471 /* feature disabled? */
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002472 if (c_min_rate == 0)
Lars Ellenberge8299872014-04-28 18:43:19 +02002473 return false;
Philipp Reisnere3555d82010-11-07 15:56:29 +01002474
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002475 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
2476 (int)part_stat_read(&disk->part0, sectors[1]) -
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002477 atomic_read(&device->rs_sect_ev);
Lars Ellenbergad3fee72013-12-20 11:22:13 +01002478
2479 if (atomic_read(&device->ap_actlog_cnt)
2480 || !device->rs_last_events || curr_events - device->rs_last_events > 64) {
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002481 unsigned long rs_left;
2482 int i;
2483
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002484 device->rs_last_events = curr_events;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002485
2486 /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
2487 * approx. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002488 i = (device->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
Lars Ellenberg2649f082010-11-05 10:05:47 +01002489
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002490 if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T)
2491 rs_left = device->ov_left;
Lars Ellenberg2649f082010-11-05 10:05:47 +01002492 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002493 rs_left = drbd_bm_total_weight(device) - device->rs_failed;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002494
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002495 dt = ((long)jiffies - (long)device->rs_mark_time[i]) / HZ;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002496 if (!dt)
2497 dt++;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002498 db = device->rs_mark_left[i] - rs_left;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002499 dbdt = Bit2KB(db/dt);
2500
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002501 if (dbdt > c_min_rate)
Lars Ellenberge8299872014-04-28 18:43:19 +02002502 return true;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002503 }
Lars Ellenberge8299872014-04-28 18:43:19 +02002504 return false;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002505}
2506
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002507static int receive_DataRequest(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002508{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002509 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002510 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002511 sector_t sector;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002512 sector_t capacity;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002513 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002514 struct digest_info *di = NULL;
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002515 int size, verb;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002516 unsigned int fault_type;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02002517 struct p_block_req *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002518
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002519 peer_device = conn_peer_device(connection, pi->vnr);
2520 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002521 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002522 device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002523 capacity = drbd_get_capacity(device->this_bdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002524
2525 sector = be64_to_cpu(p->sector);
2526 size = be32_to_cpu(p->blksize);
2527
Andreas Gruenbacherc670a392011-02-21 12:41:39 +01002528 if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002529 drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
Philipp Reisnerb411b362009-09-25 16:07:19 -07002530 (unsigned long long)sector, size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002531 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002532 }
2533 if (sector + (size>>9) > capacity) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002534 drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
Philipp Reisnerb411b362009-09-25 16:07:19 -07002535 (unsigned long long)sector, size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002536 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002537 }
2538
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002539 if (!get_ldev_if_state(device, D_UP_TO_DATE)) {
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002540 verb = 1;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002541 switch (pi->cmd) {
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002542 case P_DATA_REQUEST:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002543 drbd_send_ack_rp(peer_device, P_NEG_DREPLY, p);
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002544 break;
2545 case P_RS_DATA_REQUEST:
2546 case P_CSUM_RS_REQUEST:
2547 case P_OV_REQUEST:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002548 drbd_send_ack_rp(peer_device, P_NEG_RS_DREPLY , p);
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002549 break;
2550 case P_OV_REPLY:
2551 verb = 0;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002552 dec_rs_pending(device);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002553 drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size, ID_IN_SYNC);
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002554 break;
2555 default:
Andreas Gruenbacher49ba9b12011-03-25 00:35:45 +01002556 BUG();
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002557 }
2558 if (verb && __ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002559 drbd_err(device, "Can not satisfy peer's read request, "
Philipp Reisnerb411b362009-09-25 16:07:19 -07002560 "no local data.\n");
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002561
Lars Ellenberga821cc42010-09-06 12:31:37 +02002562 /* drain possibly payload */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002563 return drbd_drain_block(peer_device, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002564 }
2565
2566 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
2567 * "criss-cross" setup, that might cause write-out on some other DRBD,
2568 * which in turn might block on the other node at this very place. */
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02002569 peer_req = drbd_alloc_peer_req(peer_device, p->block_id, sector, size,
2570 true /* has real payload */, GFP_NOIO);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002571 if (!peer_req) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002572 put_ldev(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002573 return -ENOMEM;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002574 }
2575
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002576 switch (pi->cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002577 case P_DATA_REQUEST:
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002578 peer_req->w.cb = w_e_end_data_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002579 fault_type = DRBD_FAULT_DT_RD;
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002580 /* application IO, don't drbd_rs_begin_io */
Lars Ellenberg21ae5d72014-05-05 23:42:24 +02002581 peer_req->flags |= EE_APPLICATION;
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002582 goto submit;
2583
Philipp Reisnerb411b362009-09-25 16:07:19 -07002584 case P_RS_DATA_REQUEST:
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002585 peer_req->w.cb = w_e_end_rsdata_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002586 fault_type = DRBD_FAULT_RS_RD;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002587 /* used in the sector offset progress display */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002588 device->bm_resync_fo = BM_SECT_TO_BIT(sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002589 break;
2590
2591 case P_OV_REPLY:
2592 case P_CSUM_RS_REQUEST:
2593 fault_type = DRBD_FAULT_RS_RD;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002594 di = kmalloc(sizeof(*di) + pi->size, GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002595 if (!di)
2596 goto out_free_e;
2597
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002598 di->digest_size = pi->size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002599 di->digest = (((char *)di)+sizeof(struct digest_info));
2600
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002601 peer_req->digest = di;
2602 peer_req->flags |= EE_HAS_DIGEST;
Lars Ellenbergc36c3ce2010-08-11 20:42:55 +02002603
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002604 if (drbd_recv_all(peer_device->connection, di->digest, pi->size))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002605 goto out_free_e;
2606
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002607 if (pi->cmd == P_CSUM_RS_REQUEST) {
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002608 D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002609 peer_req->w.cb = w_e_end_csum_rs_req;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002610 /* used in the sector offset progress display */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002611 device->bm_resync_fo = BM_SECT_TO_BIT(sector);
Lars Ellenbergaaaba342014-03-18 12:30:09 +01002612 /* remember to report stats in drbd_resync_finished */
2613 device->use_csums = true;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002614 } else if (pi->cmd == P_OV_REPLY) {
Lars Ellenberg2649f082010-11-05 10:05:47 +01002615 /* track progress, we may need to throttle */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002616 atomic_add(size >> 9, &device->rs_sect_in);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002617 peer_req->w.cb = w_e_end_ov_reply;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002618 dec_rs_pending(device);
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002619 /* drbd_rs_begin_io done when we sent this request,
2620 * but accounting still needs to be done. */
2621 goto submit_for_resync;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002622 }
2623 break;
2624
2625 case P_OV_REQUEST:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002626 if (device->ov_start_sector == ~(sector_t)0 &&
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002627 peer_device->connection->agreed_pro_version >= 90) {
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002628 unsigned long now = jiffies;
2629 int i;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002630 device->ov_start_sector = sector;
2631 device->ov_position = sector;
2632 device->ov_left = drbd_bm_bits(device) - BM_SECT_TO_BIT(sector);
2633 device->rs_total = device->ov_left;
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002634 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002635 device->rs_mark_left[i] = device->ov_left;
2636 device->rs_mark_time[i] = now;
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002637 }
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002638 drbd_info(device, "Online Verify start sector: %llu\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07002639 (unsigned long long)sector);
2640 }
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002641 peer_req->w.cb = w_e_end_ov_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002642 fault_type = DRBD_FAULT_RS_RD;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002643 break;
2644
Philipp Reisnerb411b362009-09-25 16:07:19 -07002645 default:
Andreas Gruenbacher49ba9b12011-03-25 00:35:45 +01002646 BUG();
Philipp Reisnerb411b362009-09-25 16:07:19 -07002647 }
2648
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002649 /* Throttle, drbd_rs_begin_io and submit should become asynchronous
2650 * wrt the receiver, but it is not as straightforward as it may seem.
2651 * Various places in the resync start and stop logic assume resync
2652 * requests are processed in order, requeuing this on the worker thread
2653 * introduces a bunch of new code for synchronization between threads.
2654 *
2655 * Unlimited throttling before drbd_rs_begin_io may stall the resync
2656 * "forever", throttling after drbd_rs_begin_io will lock that extent
2657 * for application writes for the same time. For now, just throttle
2658 * here, where the rest of the code expects the receiver to sleep for
2659 * a while, anyways.
2660 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002661
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002662 /* Throttle before drbd_rs_begin_io, as that locks out application IO;
2663 * this defers syncer requests for some time, before letting at least
2664 * on request through. The resync controller on the receiving side
2665 * will adapt to the incoming rate accordingly.
2666 *
2667 * We cannot throttle here if remote is Primary/SyncTarget:
2668 * we would also throttle its application reads.
2669 * In that case, throttling is done on the SyncTarget only.
2670 */
Lars Ellenbergad3fee72013-12-20 11:22:13 +01002671 if (device->state.peer != R_PRIMARY
2672 && drbd_rs_should_slow_down(device, sector, false))
Philipp Reisnere3555d82010-11-07 15:56:29 +01002673 schedule_timeout_uninterruptible(HZ/10);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002674 if (drbd_rs_begin_io(device, sector))
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002675 goto out_free_e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002676
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002677submit_for_resync:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002678 atomic_add(size >> 9, &device->rs_sect_ev);
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002679
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002680submit:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002681 inc_unacked(device);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002682 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002683 list_add_tail(&peer_req->w.list, &device->read_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002684 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002685
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002686 if (drbd_submit_peer_request(device, peer_req, READ, fault_type) == 0)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002687 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002688
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002689 /* don't care for the reason here */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002690 drbd_err(device, "submit failed, triggering re-connect\n");
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002691 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002692 list_del(&peer_req->w.list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002693 spin_unlock_irq(&device->resource->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002694 /* no drbd_rs_complete_io(), we are dropping the connection anyways */
2695
Philipp Reisnerb411b362009-09-25 16:07:19 -07002696out_free_e:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002697 put_ldev(device);
2698 drbd_free_peer_req(device, peer_req);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002699 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002700}
2701
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002702/**
2703 * drbd_asb_recover_0p - Recover after split-brain with no remaining primaries
2704 */
2705static int drbd_asb_recover_0p(struct drbd_peer_device *peer_device) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002706{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002707 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002708 int self, peer, rv = -100;
2709 unsigned long ch_self, ch_peer;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002710 enum drbd_after_sb_p after_sb_0p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002711
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002712 self = device->ldev->md.uuid[UI_BITMAP] & 1;
2713 peer = device->p_uuid[UI_BITMAP] & 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002714
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002715 ch_peer = device->p_uuid[UI_SIZE];
2716 ch_self = device->comm_bm_set;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002717
Philipp Reisner44ed1672011-04-19 17:10:19 +02002718 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002719 after_sb_0p = rcu_dereference(peer_device->connection->net_conf)->after_sb_0p;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002720 rcu_read_unlock();
2721 switch (after_sb_0p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002722 case ASB_CONSENSUS:
2723 case ASB_DISCARD_SECONDARY:
2724 case ASB_CALL_HELPER:
Philipp Reisner44ed1672011-04-19 17:10:19 +02002725 case ASB_VIOLENTLY:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002726 drbd_err(device, "Configuration error.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002727 break;
2728 case ASB_DISCONNECT:
2729 break;
2730 case ASB_DISCARD_YOUNGER_PRI:
2731 if (self == 0 && peer == 1) {
2732 rv = -1;
2733 break;
2734 }
2735 if (self == 1 && peer == 0) {
2736 rv = 1;
2737 break;
2738 }
2739 /* Else fall through to one of the other strategies... */
2740 case ASB_DISCARD_OLDER_PRI:
2741 if (self == 0 && peer == 1) {
2742 rv = 1;
2743 break;
2744 }
2745 if (self == 1 && peer == 0) {
2746 rv = -1;
2747 break;
2748 }
2749 /* Else fall through to one of the other strategies... */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002750 drbd_warn(device, "Discard younger/older primary did not find a decision\n"
Philipp Reisnerb411b362009-09-25 16:07:19 -07002751 "Using discard-least-changes instead\n");
2752 case ASB_DISCARD_ZERO_CHG:
2753 if (ch_peer == 0 && ch_self == 0) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002754 rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002755 ? -1 : 1;
2756 break;
2757 } else {
2758 if (ch_peer == 0) { rv = 1; break; }
2759 if (ch_self == 0) { rv = -1; break; }
2760 }
Philipp Reisner44ed1672011-04-19 17:10:19 +02002761 if (after_sb_0p == ASB_DISCARD_ZERO_CHG)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002762 break;
2763 case ASB_DISCARD_LEAST_CHG:
2764 if (ch_self < ch_peer)
2765 rv = -1;
2766 else if (ch_self > ch_peer)
2767 rv = 1;
2768 else /* ( ch_self == ch_peer ) */
2769 /* Well, then use something else. */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002770 rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002771 ? -1 : 1;
2772 break;
2773 case ASB_DISCARD_LOCAL:
2774 rv = -1;
2775 break;
2776 case ASB_DISCARD_REMOTE:
2777 rv = 1;
2778 }
2779
2780 return rv;
2781}
2782
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002783/**
2784 * drbd_asb_recover_1p - Recover after split-brain with one remaining primary
2785 */
2786static int drbd_asb_recover_1p(struct drbd_peer_device *peer_device) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002787{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002788 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002789 int hg, rv = -100;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002790 enum drbd_after_sb_p after_sb_1p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002791
Philipp Reisner44ed1672011-04-19 17:10:19 +02002792 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002793 after_sb_1p = rcu_dereference(peer_device->connection->net_conf)->after_sb_1p;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002794 rcu_read_unlock();
2795 switch (after_sb_1p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002796 case ASB_DISCARD_YOUNGER_PRI:
2797 case ASB_DISCARD_OLDER_PRI:
2798 case ASB_DISCARD_LEAST_CHG:
2799 case ASB_DISCARD_LOCAL:
2800 case ASB_DISCARD_REMOTE:
Philipp Reisner44ed1672011-04-19 17:10:19 +02002801 case ASB_DISCARD_ZERO_CHG:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002802 drbd_err(device, "Configuration error.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002803 break;
2804 case ASB_DISCONNECT:
2805 break;
2806 case ASB_CONSENSUS:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002807 hg = drbd_asb_recover_0p(peer_device);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002808 if (hg == -1 && device->state.role == R_SECONDARY)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002809 rv = hg;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002810 if (hg == 1 && device->state.role == R_PRIMARY)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002811 rv = hg;
2812 break;
2813 case ASB_VIOLENTLY:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002814 rv = drbd_asb_recover_0p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002815 break;
2816 case ASB_DISCARD_SECONDARY:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002817 return device->state.role == R_PRIMARY ? 1 : -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002818 case ASB_CALL_HELPER:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002819 hg = drbd_asb_recover_0p(peer_device);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002820 if (hg == -1 && device->state.role == R_PRIMARY) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002821 enum drbd_state_rv rv2;
2822
Philipp Reisnerb411b362009-09-25 16:07:19 -07002823 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2824 * we might be here in C_WF_REPORT_PARAMS which is transient.
2825 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002826 rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002827 if (rv2 != SS_SUCCESS) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002828 drbd_khelper(device, "pri-lost-after-sb");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002829 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002830 drbd_warn(device, "Successfully gave up primary role.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002831 rv = hg;
2832 }
2833 } else
2834 rv = hg;
2835 }
2836
2837 return rv;
2838}
2839
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002840/**
2841 * drbd_asb_recover_2p - Recover after split-brain with two remaining primaries
2842 */
2843static int drbd_asb_recover_2p(struct drbd_peer_device *peer_device) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002844{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002845 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002846 int hg, rv = -100;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002847 enum drbd_after_sb_p after_sb_2p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002848
Philipp Reisner44ed1672011-04-19 17:10:19 +02002849 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002850 after_sb_2p = rcu_dereference(peer_device->connection->net_conf)->after_sb_2p;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002851 rcu_read_unlock();
2852 switch (after_sb_2p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002853 case ASB_DISCARD_YOUNGER_PRI:
2854 case ASB_DISCARD_OLDER_PRI:
2855 case ASB_DISCARD_LEAST_CHG:
2856 case ASB_DISCARD_LOCAL:
2857 case ASB_DISCARD_REMOTE:
2858 case ASB_CONSENSUS:
2859 case ASB_DISCARD_SECONDARY:
Philipp Reisner44ed1672011-04-19 17:10:19 +02002860 case ASB_DISCARD_ZERO_CHG:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002861 drbd_err(device, "Configuration error.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002862 break;
2863 case ASB_VIOLENTLY:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002864 rv = drbd_asb_recover_0p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002865 break;
2866 case ASB_DISCONNECT:
2867 break;
2868 case ASB_CALL_HELPER:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002869 hg = drbd_asb_recover_0p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002870 if (hg == -1) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002871 enum drbd_state_rv rv2;
2872
Philipp Reisnerb411b362009-09-25 16:07:19 -07002873 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2874 * we might be here in C_WF_REPORT_PARAMS which is transient.
2875 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002876 rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002877 if (rv2 != SS_SUCCESS) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002878 drbd_khelper(device, "pri-lost-after-sb");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002879 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002880 drbd_warn(device, "Successfully gave up primary role.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002881 rv = hg;
2882 }
2883 } else
2884 rv = hg;
2885 }
2886
2887 return rv;
2888}
2889
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002890static void drbd_uuid_dump(struct drbd_device *device, char *text, u64 *uuid,
Philipp Reisnerb411b362009-09-25 16:07:19 -07002891 u64 bits, u64 flags)
2892{
2893 if (!uuid) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002894 drbd_info(device, "%s uuid info vanished while I was looking!\n", text);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002895 return;
2896 }
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002897 drbd_info(device, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07002898 text,
2899 (unsigned long long)uuid[UI_CURRENT],
2900 (unsigned long long)uuid[UI_BITMAP],
2901 (unsigned long long)uuid[UI_HISTORY_START],
2902 (unsigned long long)uuid[UI_HISTORY_END],
2903 (unsigned long long)bits,
2904 (unsigned long long)flags);
2905}
2906
2907/*
2908 100 after split brain try auto recover
2909 2 C_SYNC_SOURCE set BitMap
2910 1 C_SYNC_SOURCE use BitMap
2911 0 no Sync
2912 -1 C_SYNC_TARGET use BitMap
2913 -2 C_SYNC_TARGET set BitMap
2914 -100 after split brain, disconnect
2915-1000 unrelated data
Philipp Reisner4a23f262011-01-11 17:42:17 +01002916-1091 requires proto 91
2917-1096 requires proto 96
Philipp Reisnerb411b362009-09-25 16:07:19 -07002918 */
Lars Ellenberg44a4d552013-11-22 12:40:58 +01002919static int drbd_uuid_compare(struct drbd_device *const device, int *rule_nr) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002920{
Lars Ellenberg44a4d552013-11-22 12:40:58 +01002921 struct drbd_peer_device *const peer_device = first_peer_device(device);
2922 struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002923 u64 self, peer;
2924 int i, j;
2925
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002926 self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2927 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002928
2929 *rule_nr = 10;
2930 if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED)
2931 return 0;
2932
2933 *rule_nr = 20;
2934 if ((self == UUID_JUST_CREATED || self == (u64)0) &&
2935 peer != UUID_JUST_CREATED)
2936 return -2;
2937
2938 *rule_nr = 30;
2939 if (self != UUID_JUST_CREATED &&
2940 (peer == UUID_JUST_CREATED || peer == (u64)0))
2941 return 2;
2942
2943 if (self == peer) {
2944 int rct, dc; /* roles at crash time */
2945
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002946 if (device->p_uuid[UI_BITMAP] == (u64)0 && device->ldev->md.uuid[UI_BITMAP] != (u64)0) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002947
Lars Ellenberg44a4d552013-11-22 12:40:58 +01002948 if (connection->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002949 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002950
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002951 if ((device->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
2952 (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002953 drbd_info(device, "was SyncSource, missed the resync finished event, corrected myself:\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002954 drbd_uuid_move_history(device);
2955 device->ldev->md.uuid[UI_HISTORY_START] = device->ldev->md.uuid[UI_BITMAP];
2956 device->ldev->md.uuid[UI_BITMAP] = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002957
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002958 drbd_uuid_dump(device, "self", device->ldev->md.uuid,
2959 device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002960 *rule_nr = 34;
2961 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002962 drbd_info(device, "was SyncSource (peer failed to write sync_uuid)\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002963 *rule_nr = 36;
2964 }
2965
2966 return 1;
2967 }
2968
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002969 if (device->ldev->md.uuid[UI_BITMAP] == (u64)0 && device->p_uuid[UI_BITMAP] != (u64)0) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002970
Lars Ellenberg44a4d552013-11-22 12:40:58 +01002971 if (connection->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002972 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002973
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002974 if ((device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_BITMAP] & ~((u64)1)) &&
2975 (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002976 drbd_info(device, "was SyncTarget, peer missed the resync finished event, corrected peer:\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002977
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002978 device->p_uuid[UI_HISTORY_START + 1] = device->p_uuid[UI_HISTORY_START];
2979 device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_BITMAP];
2980 device->p_uuid[UI_BITMAP] = 0UL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002981
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002982 drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002983 *rule_nr = 35;
2984 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002985 drbd_info(device, "was SyncTarget (failed to write sync_uuid)\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002986 *rule_nr = 37;
2987 }
2988
2989 return -1;
2990 }
2991
2992 /* Common power [off|failure] */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002993 rct = (test_bit(CRASHED_PRIMARY, &device->flags) ? 1 : 0) +
2994 (device->p_uuid[UI_FLAGS] & 2);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002995 /* lowest bit is set when we were primary,
2996 * next bit (weight 2) is set when peer was primary */
2997 *rule_nr = 40;
2998
2999 switch (rct) {
3000 case 0: /* !self_pri && !peer_pri */ return 0;
3001 case 1: /* self_pri && !peer_pri */ return 1;
3002 case 2: /* !self_pri && peer_pri */ return -1;
3003 case 3: /* self_pri && peer_pri */
Lars Ellenberg44a4d552013-11-22 12:40:58 +01003004 dc = test_bit(RESOLVE_CONFLICTS, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003005 return dc ? -1 : 1;
3006 }
3007 }
3008
3009 *rule_nr = 50;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003010 peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003011 if (self == peer)
3012 return -1;
3013
3014 *rule_nr = 51;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003015 peer = device->p_uuid[UI_HISTORY_START] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003016 if (self == peer) {
Lars Ellenberg44a4d552013-11-22 12:40:58 +01003017 if (connection->agreed_pro_version < 96 ?
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003018 (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
3019 (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
3020 peer + UUID_NEW_BM_OFFSET == (device->p_uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003021 /* The last P_SYNC_UUID did not get though. Undo the last start of
3022 resync as sync source modifications of the peer's UUIDs. */
3023
Lars Ellenberg44a4d552013-11-22 12:40:58 +01003024 if (connection->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01003025 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003026
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003027 device->p_uuid[UI_BITMAP] = device->p_uuid[UI_HISTORY_START];
3028 device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_HISTORY_START + 1];
Philipp Reisner4a23f262011-01-11 17:42:17 +01003029
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003030 drbd_info(device, "Lost last syncUUID packet, corrected:\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003031 drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
Philipp Reisner4a23f262011-01-11 17:42:17 +01003032
Philipp Reisnerb411b362009-09-25 16:07:19 -07003033 return -1;
3034 }
3035 }
3036
3037 *rule_nr = 60;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003038 self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003039 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003040 peer = device->p_uuid[i] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003041 if (self == peer)
3042 return -2;
3043 }
3044
3045 *rule_nr = 70;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003046 self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
3047 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003048 if (self == peer)
3049 return 1;
3050
3051 *rule_nr = 71;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003052 self = device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003053 if (self == peer) {
Lars Ellenberg44a4d552013-11-22 12:40:58 +01003054 if (connection->agreed_pro_version < 96 ?
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003055 (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
3056 (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
3057 self + UUID_NEW_BM_OFFSET == (device->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003058 /* The last P_SYNC_UUID did not get though. Undo the last start of
3059 resync as sync source modifications of our UUIDs. */
3060
Lars Ellenberg44a4d552013-11-22 12:40:58 +01003061 if (connection->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01003062 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003063
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003064 __drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_HISTORY_START]);
3065 __drbd_uuid_set(device, UI_HISTORY_START, device->ldev->md.uuid[UI_HISTORY_START + 1]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003066
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003067 drbd_info(device, "Last syncUUID did not get through, corrected:\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003068 drbd_uuid_dump(device, "self", device->ldev->md.uuid,
3069 device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003070
3071 return 1;
3072 }
3073 }
3074
3075
3076 *rule_nr = 80;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003077 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003078 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003079 self = device->ldev->md.uuid[i] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003080 if (self == peer)
3081 return 2;
3082 }
3083
3084 *rule_nr = 90;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003085 self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
3086 peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003087 if (self == peer && self != ((u64)0))
3088 return 100;
3089
3090 *rule_nr = 100;
3091 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003092 self = device->ldev->md.uuid[i] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003093 for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003094 peer = device->p_uuid[j] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003095 if (self == peer)
3096 return -100;
3097 }
3098 }
3099
3100 return -1000;
3101}
3102
3103/* drbd_sync_handshake() returns the new conn state on success, or
3104 CONN_MASK (-1) on failure.
3105 */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003106static enum drbd_conns drbd_sync_handshake(struct drbd_peer_device *peer_device,
3107 enum drbd_role peer_role,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003108 enum drbd_disk_state peer_disk) __must_hold(local)
3109{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003110 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003111 enum drbd_conns rv = C_MASK;
3112 enum drbd_disk_state mydisk;
Philipp Reisner44ed1672011-04-19 17:10:19 +02003113 struct net_conf *nc;
Andreas Gruenbacher6dff2902011-06-28 14:18:12 +02003114 int hg, rule_nr, rr_conflict, tentative;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003115
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003116 mydisk = device->state.disk;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003117 if (mydisk == D_NEGOTIATING)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003118 mydisk = device->new_state_tmp.disk;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003119
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003120 drbd_info(device, "drbd_sync_handshake:\n");
Philipp Reisner9f2247b2012-08-16 14:25:58 +02003121
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003122 spin_lock_irq(&device->ldev->md.uuid_lock);
3123 drbd_uuid_dump(device, "self", device->ldev->md.uuid, device->comm_bm_set, 0);
3124 drbd_uuid_dump(device, "peer", device->p_uuid,
3125 device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003126
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003127 hg = drbd_uuid_compare(device, &rule_nr);
3128 spin_unlock_irq(&device->ldev->md.uuid_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003129
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003130 drbd_info(device, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003131
3132 if (hg == -1000) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003133 drbd_alert(device, "Unrelated data, aborting!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003134 return C_MASK;
3135 }
Philipp Reisner4a23f262011-01-11 17:42:17 +01003136 if (hg < -1000) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003137 drbd_alert(device, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003138 return C_MASK;
3139 }
3140
3141 if ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) ||
3142 (peer_disk == D_INCONSISTENT && mydisk > D_INCONSISTENT)) {
3143 int f = (hg == -100) || abs(hg) == 2;
3144 hg = mydisk > D_INCONSISTENT ? 1 : -1;
3145 if (f)
3146 hg = hg*2;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003147 drbd_info(device, "Becoming sync %s due to disk states.\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07003148 hg > 0 ? "source" : "target");
3149 }
3150
Adam Gandelman3a11a482010-04-08 16:48:23 -07003151 if (abs(hg) == 100)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003152 drbd_khelper(device, "initial-split-brain");
Adam Gandelman3a11a482010-04-08 16:48:23 -07003153
Philipp Reisner44ed1672011-04-19 17:10:19 +02003154 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003155 nc = rcu_dereference(peer_device->connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +02003156
3157 if (hg == 100 || (hg == -100 && nc->always_asbp)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003158 int pcount = (device->state.role == R_PRIMARY)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003159 + (peer_role == R_PRIMARY);
3160 int forced = (hg == -100);
3161
3162 switch (pcount) {
3163 case 0:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003164 hg = drbd_asb_recover_0p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003165 break;
3166 case 1:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003167 hg = drbd_asb_recover_1p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003168 break;
3169 case 2:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003170 hg = drbd_asb_recover_2p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003171 break;
3172 }
3173 if (abs(hg) < 100) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003174 drbd_warn(device, "Split-Brain detected, %d primaries, "
Philipp Reisnerb411b362009-09-25 16:07:19 -07003175 "automatically solved. Sync from %s node\n",
3176 pcount, (hg < 0) ? "peer" : "this");
3177 if (forced) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003178 drbd_warn(device, "Doing a full sync, since"
Philipp Reisnerb411b362009-09-25 16:07:19 -07003179 " UUIDs where ambiguous.\n");
3180 hg = hg*2;
3181 }
3182 }
3183 }
3184
3185 if (hg == -100) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003186 if (test_bit(DISCARD_MY_DATA, &device->flags) && !(device->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003187 hg = -1;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003188 if (!test_bit(DISCARD_MY_DATA, &device->flags) && (device->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003189 hg = 1;
3190
3191 if (abs(hg) < 100)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003192 drbd_warn(device, "Split-Brain detected, manually solved. "
Philipp Reisnerb411b362009-09-25 16:07:19 -07003193 "Sync from %s node\n",
3194 (hg < 0) ? "peer" : "this");
3195 }
Philipp Reisner44ed1672011-04-19 17:10:19 +02003196 rr_conflict = nc->rr_conflict;
Andreas Gruenbacher6dff2902011-06-28 14:18:12 +02003197 tentative = nc->tentative;
Philipp Reisner44ed1672011-04-19 17:10:19 +02003198 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07003199
3200 if (hg == -100) {
Lars Ellenberg580b9762010-02-26 23:15:23 +01003201 /* FIXME this log message is not correct if we end up here
3202 * after an attempted attach on a diskless node.
3203 * We just refuse to attach -- well, we drop the "connection"
3204 * to that disk, in a way... */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003205 drbd_alert(device, "Split-Brain detected but unresolved, dropping connection!\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003206 drbd_khelper(device, "split-brain");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003207 return C_MASK;
3208 }
3209
3210 if (hg > 0 && mydisk <= D_INCONSISTENT) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003211 drbd_err(device, "I shall become SyncSource, but I am inconsistent!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003212 return C_MASK;
3213 }
3214
3215 if (hg < 0 && /* by intention we do not use mydisk here. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003216 device->state.role == R_PRIMARY && device->state.disk >= D_CONSISTENT) {
Philipp Reisner44ed1672011-04-19 17:10:19 +02003217 switch (rr_conflict) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003218 case ASB_CALL_HELPER:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003219 drbd_khelper(device, "pri-lost");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003220 /* fall through */
3221 case ASB_DISCONNECT:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003222 drbd_err(device, "I shall become SyncTarget, but I am primary!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003223 return C_MASK;
3224 case ASB_VIOLENTLY:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003225 drbd_warn(device, "Becoming SyncTarget, violating the stable-data"
Philipp Reisnerb411b362009-09-25 16:07:19 -07003226 "assumption\n");
3227 }
3228 }
3229
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003230 if (tentative || test_bit(CONN_DRY_RUN, &peer_device->connection->flags)) {
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003231 if (hg == 0)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003232 drbd_info(device, "dry-run connect: No resync, would become Connected immediately.\n");
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003233 else
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003234 drbd_info(device, "dry-run connect: Would become %s, doing a %s resync.",
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003235 drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET),
3236 abs(hg) >= 2 ? "full" : "bit-map based");
3237 return C_MASK;
3238 }
3239
Philipp Reisnerb411b362009-09-25 16:07:19 -07003240 if (abs(hg) >= 2) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003241 drbd_info(device, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003242 if (drbd_bitmap_io(device, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003243 BM_LOCKED_SET_ALLOWED))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003244 return C_MASK;
3245 }
3246
3247 if (hg > 0) { /* become sync source. */
3248 rv = C_WF_BITMAP_S;
3249 } else if (hg < 0) { /* become sync target */
3250 rv = C_WF_BITMAP_T;
3251 } else {
3252 rv = C_CONNECTED;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003253 if (drbd_bm_total_weight(device)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003254 drbd_info(device, "No resync, but %lu bits in bitmap!\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003255 drbd_bm_total_weight(device));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003256 }
3257 }
3258
3259 return rv;
3260}
3261
Philipp Reisnerf179d762011-05-16 17:31:47 +02003262static enum drbd_after_sb_p convert_after_sb(enum drbd_after_sb_p peer)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003263{
3264 /* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */
Philipp Reisnerf179d762011-05-16 17:31:47 +02003265 if (peer == ASB_DISCARD_REMOTE)
3266 return ASB_DISCARD_LOCAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003267
3268 /* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */
Philipp Reisnerf179d762011-05-16 17:31:47 +02003269 if (peer == ASB_DISCARD_LOCAL)
3270 return ASB_DISCARD_REMOTE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003271
3272 /* everything else is valid if they are equal on both sides. */
Philipp Reisnerf179d762011-05-16 17:31:47 +02003273 return peer;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003274}
3275
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003276static int receive_protocol(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003277{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003278 struct p_protocol *p = pi->data;
Philipp Reisner036b17e2011-05-16 17:38:11 +02003279 enum drbd_after_sb_p p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
3280 int p_proto, p_discard_my_data, p_two_primaries, cf;
3281 struct net_conf *nc, *old_net_conf, *new_net_conf = NULL;
3282 char integrity_alg[SHARED_SECRET_MAX] = "";
Andreas Gruenbacheraccdbcc2011-07-15 17:41:09 +02003283 struct crypto_hash *peer_integrity_tfm = NULL;
Philipp Reisner7aca6c72011-05-17 10:12:56 +02003284 void *int_dig_in = NULL, *int_dig_vv = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003285
Philipp Reisnerb411b362009-09-25 16:07:19 -07003286 p_proto = be32_to_cpu(p->protocol);
3287 p_after_sb_0p = be32_to_cpu(p->after_sb_0p);
3288 p_after_sb_1p = be32_to_cpu(p->after_sb_1p);
3289 p_after_sb_2p = be32_to_cpu(p->after_sb_2p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003290 p_two_primaries = be32_to_cpu(p->two_primaries);
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003291 cf = be32_to_cpu(p->conn_flags);
Andreas Gruenbacher6139f602011-05-06 20:00:02 +02003292 p_discard_my_data = cf & CF_DISCARD_MY_DATA;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003293
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003294 if (connection->agreed_pro_version >= 87) {
Andreas Gruenbacher86db0612011-04-28 15:24:18 +02003295 int err;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003296
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02003297 if (pi->size > sizeof(integrity_alg))
Andreas Gruenbacher86db0612011-04-28 15:24:18 +02003298 return -EIO;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003299 err = drbd_recv_all(connection, integrity_alg, pi->size);
Andreas Gruenbacher86db0612011-04-28 15:24:18 +02003300 if (err)
3301 return err;
Philipp Reisner036b17e2011-05-16 17:38:11 +02003302 integrity_alg[SHARED_SECRET_MAX - 1] = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003303 }
3304
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003305 if (pi->cmd != P_PROTOCOL_UPDATE) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003306 clear_bit(CONN_DRY_RUN, &connection->flags);
Philipp Reisner036b17e2011-05-16 17:38:11 +02003307
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003308 if (cf & CF_DRY_RUN)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003309 set_bit(CONN_DRY_RUN, &connection->flags);
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003310
3311 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003312 nc = rcu_dereference(connection->net_conf);
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003313
3314 if (p_proto != nc->wire_protocol) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003315 drbd_err(connection, "incompatible %s settings\n", "protocol");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003316 goto disconnect_rcu_unlock;
3317 }
3318
3319 if (convert_after_sb(p_after_sb_0p) != nc->after_sb_0p) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003320 drbd_err(connection, "incompatible %s settings\n", "after-sb-0pri");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003321 goto disconnect_rcu_unlock;
3322 }
3323
3324 if (convert_after_sb(p_after_sb_1p) != nc->after_sb_1p) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003325 drbd_err(connection, "incompatible %s settings\n", "after-sb-1pri");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003326 goto disconnect_rcu_unlock;
3327 }
3328
3329 if (convert_after_sb(p_after_sb_2p) != nc->after_sb_2p) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003330 drbd_err(connection, "incompatible %s settings\n", "after-sb-2pri");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003331 goto disconnect_rcu_unlock;
3332 }
3333
3334 if (p_discard_my_data && nc->discard_my_data) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003335 drbd_err(connection, "incompatible %s settings\n", "discard-my-data");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003336 goto disconnect_rcu_unlock;
3337 }
3338
3339 if (p_two_primaries != nc->two_primaries) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003340 drbd_err(connection, "incompatible %s settings\n", "allow-two-primaries");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003341 goto disconnect_rcu_unlock;
3342 }
3343
3344 if (strcmp(integrity_alg, nc->integrity_alg)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003345 drbd_err(connection, "incompatible %s settings\n", "data-integrity-alg");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003346 goto disconnect_rcu_unlock;
3347 }
3348
3349 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07003350 }
3351
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003352 if (integrity_alg[0]) {
3353 int hash_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003354
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003355 /*
3356 * We can only change the peer data integrity algorithm
3357 * here. Changing our own data integrity algorithm
3358 * requires that we send a P_PROTOCOL_UPDATE packet at
3359 * the same time; otherwise, the peer has no way to
3360 * tell between which packets the algorithm should
3361 * change.
3362 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003363
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003364 peer_integrity_tfm = crypto_alloc_hash(integrity_alg, 0, CRYPTO_ALG_ASYNC);
3365 if (!peer_integrity_tfm) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003366 drbd_err(connection, "peer data-integrity-alg %s not supported\n",
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003367 integrity_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003368 goto disconnect;
3369 }
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003370
3371 hash_size = crypto_hash_digestsize(peer_integrity_tfm);
3372 int_dig_in = kmalloc(hash_size, GFP_KERNEL);
3373 int_dig_vv = kmalloc(hash_size, GFP_KERNEL);
3374 if (!(int_dig_in && int_dig_vv)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003375 drbd_err(connection, "Allocation of buffers for data integrity checking failed\n");
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003376 goto disconnect;
3377 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003378 }
3379
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003380 new_net_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL);
3381 if (!new_net_conf) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003382 drbd_err(connection, "Allocation of new net_conf failed\n");
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003383 goto disconnect;
3384 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003385
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003386 mutex_lock(&connection->data.mutex);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003387 mutex_lock(&connection->resource->conf_update);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003388 old_net_conf = connection->net_conf;
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003389 *new_net_conf = *old_net_conf;
3390
3391 new_net_conf->wire_protocol = p_proto;
3392 new_net_conf->after_sb_0p = convert_after_sb(p_after_sb_0p);
3393 new_net_conf->after_sb_1p = convert_after_sb(p_after_sb_1p);
3394 new_net_conf->after_sb_2p = convert_after_sb(p_after_sb_2p);
3395 new_net_conf->two_primaries = p_two_primaries;
3396
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003397 rcu_assign_pointer(connection->net_conf, new_net_conf);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003398 mutex_unlock(&connection->resource->conf_update);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003399 mutex_unlock(&connection->data.mutex);
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003400
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003401 crypto_free_hash(connection->peer_integrity_tfm);
3402 kfree(connection->int_dig_in);
3403 kfree(connection->int_dig_vv);
3404 connection->peer_integrity_tfm = peer_integrity_tfm;
3405 connection->int_dig_in = int_dig_in;
3406 connection->int_dig_vv = int_dig_vv;
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003407
3408 if (strcmp(old_net_conf->integrity_alg, integrity_alg))
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003409 drbd_info(connection, "peer data-integrity-alg: %s\n",
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003410 integrity_alg[0] ? integrity_alg : "(none)");
3411
3412 synchronize_rcu();
3413 kfree(old_net_conf);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003414 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003415
Philipp Reisner44ed1672011-04-19 17:10:19 +02003416disconnect_rcu_unlock:
3417 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07003418disconnect:
Andreas Gruenbacherb792c352011-07-15 16:48:49 +02003419 crypto_free_hash(peer_integrity_tfm);
Philipp Reisner036b17e2011-05-16 17:38:11 +02003420 kfree(int_dig_in);
3421 kfree(int_dig_vv);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003422 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003423 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003424}
3425
3426/* helper function
3427 * input: alg name, feature name
3428 * return: NULL (alg name was "")
3429 * ERR_PTR(error) if something goes wrong
3430 * or the crypto hash ptr, if it worked out ok. */
Lars Ellenberg8ce953a2014-02-27 09:46:18 +01003431static struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_device *device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003432 const char *alg, const char *name)
3433{
3434 struct crypto_hash *tfm;
3435
3436 if (!alg[0])
3437 return NULL;
3438
3439 tfm = crypto_alloc_hash(alg, 0, CRYPTO_ALG_ASYNC);
3440 if (IS_ERR(tfm)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003441 drbd_err(device, "Can not allocate \"%s\" as %s (reason: %ld)\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07003442 alg, name, PTR_ERR(tfm));
3443 return tfm;
3444 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003445 return tfm;
3446}
3447
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003448static int ignore_remaining_packet(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003449{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003450 void *buffer = connection->data.rbuf;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003451 int size = pi->size;
3452
3453 while (size) {
3454 int s = min_t(int, size, DRBD_SOCKET_BUFFER_SIZE);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003455 s = drbd_recv(connection, buffer, s);
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003456 if (s <= 0) {
3457 if (s < 0)
3458 return s;
3459 break;
3460 }
3461 size -= s;
3462 }
3463 if (size)
3464 return -EIO;
3465 return 0;
3466}
3467
3468/*
3469 * config_unknown_volume - device configuration command for unknown volume
3470 *
3471 * When a device is added to an existing connection, the node on which the
3472 * device is added first will send configuration commands to its peer but the
3473 * peer will not know about the device yet. It will warn and ignore these
3474 * commands. Once the device is added on the second node, the second node will
3475 * send the same device configuration commands, but in the other direction.
3476 *
3477 * (We can also end up here if drbd is misconfigured.)
3478 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003479static int config_unknown_volume(struct drbd_connection *connection, struct packet_info *pi)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003480{
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003481 drbd_warn(connection, "%s packet received for volume %u, which is not configured locally\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02003482 cmdname(pi->cmd), pi->vnr);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003483 return ignore_remaining_packet(connection, pi);
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003484}
3485
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003486static int receive_SyncParam(struct drbd_connection *connection, struct packet_info *pi)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003487{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003488 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003489 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003490 struct p_rs_param_95 *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003491 unsigned int header_size, data_size, exp_max_sz;
3492 struct crypto_hash *verify_tfm = NULL;
3493 struct crypto_hash *csums_tfm = NULL;
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003494 struct net_conf *old_net_conf, *new_net_conf = NULL;
Philipp Reisner813472c2011-05-03 16:47:02 +02003495 struct disk_conf *old_disk_conf = NULL, *new_disk_conf = NULL;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003496 const int apv = connection->agreed_pro_version;
Philipp Reisner813472c2011-05-03 16:47:02 +02003497 struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
Philipp Reisner778f2712010-07-06 11:14:00 +02003498 int fifo_size = 0;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003499 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003500
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003501 peer_device = conn_peer_device(connection, pi->vnr);
3502 if (!peer_device)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003503 return config_unknown_volume(connection, pi);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003504 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003505
3506 exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param)
3507 : apv == 88 ? sizeof(struct p_rs_param)
3508 + SHARED_SECRET_MAX
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003509 : apv <= 94 ? sizeof(struct p_rs_param_89)
3510 : /* apv >= 95 */ sizeof(struct p_rs_param_95);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003511
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003512 if (pi->size > exp_max_sz) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003513 drbd_err(device, "SyncParam packet too long: received %u, expected <= %u bytes\n",
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003514 pi->size, exp_max_sz);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003515 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003516 }
3517
3518 if (apv <= 88) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003519 header_size = sizeof(struct p_rs_param);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003520 data_size = pi->size - header_size;
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003521 } else if (apv <= 94) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003522 header_size = sizeof(struct p_rs_param_89);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003523 data_size = pi->size - header_size;
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02003524 D_ASSERT(device, data_size == 0);
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003525 } else {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003526 header_size = sizeof(struct p_rs_param_95);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003527 data_size = pi->size - header_size;
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02003528 D_ASSERT(device, data_size == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003529 }
3530
3531 /* initialize verify_alg and csums_alg */
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003532 p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003533 memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
3534
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003535 err = drbd_recv_all(peer_device->connection, p, header_size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003536 if (err)
3537 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003538
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003539 mutex_lock(&connection->resource->conf_update);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003540 old_net_conf = peer_device->connection->net_conf;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003541 if (get_ldev(device)) {
Philipp Reisner813472c2011-05-03 16:47:02 +02003542 new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3543 if (!new_disk_conf) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003544 put_ldev(device);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003545 mutex_unlock(&connection->resource->conf_update);
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003546 drbd_err(device, "Allocation of new disk_conf failed\n");
Philipp Reisner813472c2011-05-03 16:47:02 +02003547 return -ENOMEM;
3548 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003549
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003550 old_disk_conf = device->ldev->disk_conf;
Philipp Reisner813472c2011-05-03 16:47:02 +02003551 *new_disk_conf = *old_disk_conf;
3552
Andreas Gruenbacher6394b932011-05-11 14:29:52 +02003553 new_disk_conf->resync_rate = be32_to_cpu(p->resync_rate);
Philipp Reisner813472c2011-05-03 16:47:02 +02003554 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003555
3556 if (apv >= 88) {
3557 if (apv == 88) {
Philipp Reisner5de73822012-03-28 10:17:32 +02003558 if (data_size > SHARED_SECRET_MAX || data_size == 0) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003559 drbd_err(device, "verify-alg of wrong size, "
Philipp Reisner5de73822012-03-28 10:17:32 +02003560 "peer wants %u, accepting only up to %u byte\n",
3561 data_size, SHARED_SECRET_MAX);
Philipp Reisner813472c2011-05-03 16:47:02 +02003562 err = -EIO;
3563 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003564 }
3565
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003566 err = drbd_recv_all(peer_device->connection, p->verify_alg, data_size);
Philipp Reisner813472c2011-05-03 16:47:02 +02003567 if (err)
3568 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003569 /* we expect NUL terminated string */
3570 /* but just in case someone tries to be evil */
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02003571 D_ASSERT(device, p->verify_alg[data_size-1] == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003572 p->verify_alg[data_size-1] = 0;
3573
3574 } else /* apv >= 89 */ {
3575 /* we still expect NUL terminated strings */
3576 /* but just in case someone tries to be evil */
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02003577 D_ASSERT(device, p->verify_alg[SHARED_SECRET_MAX-1] == 0);
3578 D_ASSERT(device, p->csums_alg[SHARED_SECRET_MAX-1] == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003579 p->verify_alg[SHARED_SECRET_MAX-1] = 0;
3580 p->csums_alg[SHARED_SECRET_MAX-1] = 0;
3581 }
3582
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003583 if (strcmp(old_net_conf->verify_alg, p->verify_alg)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003584 if (device->state.conn == C_WF_REPORT_PARAMS) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003585 drbd_err(device, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003586 old_net_conf->verify_alg, p->verify_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003587 goto disconnect;
3588 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003589 verify_tfm = drbd_crypto_alloc_digest_safe(device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003590 p->verify_alg, "verify-alg");
3591 if (IS_ERR(verify_tfm)) {
3592 verify_tfm = NULL;
3593 goto disconnect;
3594 }
3595 }
3596
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003597 if (apv >= 89 && strcmp(old_net_conf->csums_alg, p->csums_alg)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003598 if (device->state.conn == C_WF_REPORT_PARAMS) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003599 drbd_err(device, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003600 old_net_conf->csums_alg, p->csums_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003601 goto disconnect;
3602 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003603 csums_tfm = drbd_crypto_alloc_digest_safe(device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003604 p->csums_alg, "csums-alg");
3605 if (IS_ERR(csums_tfm)) {
3606 csums_tfm = NULL;
3607 goto disconnect;
3608 }
3609 }
3610
Philipp Reisner813472c2011-05-03 16:47:02 +02003611 if (apv > 94 && new_disk_conf) {
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003612 new_disk_conf->c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
3613 new_disk_conf->c_delay_target = be32_to_cpu(p->c_delay_target);
3614 new_disk_conf->c_fill_target = be32_to_cpu(p->c_fill_target);
3615 new_disk_conf->c_max_rate = be32_to_cpu(p->c_max_rate);
Philipp Reisner778f2712010-07-06 11:14:00 +02003616
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003617 fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003618 if (fifo_size != device->rs_plan_s->size) {
Philipp Reisner813472c2011-05-03 16:47:02 +02003619 new_plan = fifo_alloc(fifo_size);
3620 if (!new_plan) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003621 drbd_err(device, "kmalloc of fifo_buffer failed");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003622 put_ldev(device);
Philipp Reisner778f2712010-07-06 11:14:00 +02003623 goto disconnect;
3624 }
3625 }
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003626 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003627
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003628 if (verify_tfm || csums_tfm) {
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003629 new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
3630 if (!new_net_conf) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003631 drbd_err(device, "Allocation of new net_conf failed\n");
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003632 goto disconnect;
3633 }
3634
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003635 *new_net_conf = *old_net_conf;
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003636
3637 if (verify_tfm) {
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003638 strcpy(new_net_conf->verify_alg, p->verify_alg);
3639 new_net_conf->verify_alg_len = strlen(p->verify_alg) + 1;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003640 crypto_free_hash(peer_device->connection->verify_tfm);
3641 peer_device->connection->verify_tfm = verify_tfm;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003642 drbd_info(device, "using verify-alg: \"%s\"\n", p->verify_alg);
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003643 }
3644 if (csums_tfm) {
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003645 strcpy(new_net_conf->csums_alg, p->csums_alg);
3646 new_net_conf->csums_alg_len = strlen(p->csums_alg) + 1;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003647 crypto_free_hash(peer_device->connection->csums_tfm);
3648 peer_device->connection->csums_tfm = csums_tfm;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003649 drbd_info(device, "using csums-alg: \"%s\"\n", p->csums_alg);
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003650 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003651 rcu_assign_pointer(connection->net_conf, new_net_conf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003652 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003653 }
3654
Philipp Reisner813472c2011-05-03 16:47:02 +02003655 if (new_disk_conf) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003656 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
3657 put_ldev(device);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003658 }
Philipp Reisner813472c2011-05-03 16:47:02 +02003659
3660 if (new_plan) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003661 old_plan = device->rs_plan_s;
3662 rcu_assign_pointer(device->rs_plan_s, new_plan);
Philipp Reisner813472c2011-05-03 16:47:02 +02003663 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003664
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003665 mutex_unlock(&connection->resource->conf_update);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003666 synchronize_rcu();
3667 if (new_net_conf)
3668 kfree(old_net_conf);
3669 kfree(old_disk_conf);
Philipp Reisner813472c2011-05-03 16:47:02 +02003670 kfree(old_plan);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003671
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003672 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003673
Philipp Reisner813472c2011-05-03 16:47:02 +02003674reconnect:
3675 if (new_disk_conf) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003676 put_ldev(device);
Philipp Reisner813472c2011-05-03 16:47:02 +02003677 kfree(new_disk_conf);
3678 }
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003679 mutex_unlock(&connection->resource->conf_update);
Philipp Reisner813472c2011-05-03 16:47:02 +02003680 return -EIO;
3681
Philipp Reisnerb411b362009-09-25 16:07:19 -07003682disconnect:
Philipp Reisner813472c2011-05-03 16:47:02 +02003683 kfree(new_plan);
3684 if (new_disk_conf) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003685 put_ldev(device);
Philipp Reisner813472c2011-05-03 16:47:02 +02003686 kfree(new_disk_conf);
3687 }
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003688 mutex_unlock(&connection->resource->conf_update);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003689 /* just for completeness: actually not needed,
3690 * as this is not reached if csums_tfm was ok. */
3691 crypto_free_hash(csums_tfm);
3692 /* but free the verify_tfm again, if csums_tfm did not work out */
3693 crypto_free_hash(verify_tfm);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003694 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003695 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003696}
3697
Philipp Reisnerb411b362009-09-25 16:07:19 -07003698/* warn if the arguments differ by more than 12.5% */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003699static void warn_if_differ_considerably(struct drbd_device *device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003700 const char *s, sector_t a, sector_t b)
3701{
3702 sector_t d;
3703 if (a == 0 || b == 0)
3704 return;
3705 d = (a > b) ? (a - b) : (b - a);
3706 if (d > (a>>3) || d > (b>>3))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003707 drbd_warn(device, "Considerable difference in %s: %llus vs. %llus\n", s,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003708 (unsigned long long)a, (unsigned long long)b);
3709}
3710
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003711static int receive_sizes(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003712{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003713 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003714 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003715 struct p_sizes *p = pi->data;
Philipp Reisnere96c9632013-06-25 16:50:07 +02003716 enum determine_dev_size dd = DS_UNCHANGED;
Lars Ellenberg6a8d68b2014-03-18 12:22:14 +01003717 sector_t p_size, p_usize, p_csize, my_usize;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003718 int ldsc = 0; /* local disk size changed */
Philipp Reisnere89b5912010-03-24 17:11:33 +01003719 enum dds_flags ddsf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003720
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003721 peer_device = conn_peer_device(connection, pi->vnr);
3722 if (!peer_device)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003723 return config_unknown_volume(connection, pi);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003724 device = peer_device->device;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003725
Philipp Reisnerb411b362009-09-25 16:07:19 -07003726 p_size = be64_to_cpu(p->d_size);
3727 p_usize = be64_to_cpu(p->u_size);
Lars Ellenberg6a8d68b2014-03-18 12:22:14 +01003728 p_csize = be64_to_cpu(p->c_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003729
Philipp Reisnerb411b362009-09-25 16:07:19 -07003730 /* just store the peer's disk size for now.
3731 * we still need to figure out whether we accept that. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003732 device->p_size = p_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003733
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003734 if (get_ldev(device)) {
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003735 rcu_read_lock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003736 my_usize = rcu_dereference(device->ldev->disk_conf)->disk_size;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003737 rcu_read_unlock();
3738
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003739 warn_if_differ_considerably(device, "lower level device sizes",
3740 p_size, drbd_get_max_capacity(device->ldev));
3741 warn_if_differ_considerably(device, "user requested size",
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003742 p_usize, my_usize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003743
3744 /* if this is the first connect, or an otherwise expected
3745 * param exchange, choose the minimum */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003746 if (device->state.conn == C_WF_REPORT_PARAMS)
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003747 p_usize = min_not_zero(my_usize, p_usize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003748
3749 /* Never shrink a device with usable data during connect.
3750 But allow online shrinking if we are connected. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003751 if (drbd_new_dev_size(device, device->ldev, p_usize, 0) <
3752 drbd_get_capacity(device->this_bdev) &&
3753 device->state.disk >= D_OUTDATED &&
3754 device->state.conn < C_CONNECTED) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003755 drbd_err(device, "The peer's disk size is too small!\n");
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003756 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003757 put_ldev(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003758 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003759 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003760
3761 if (my_usize != p_usize) {
3762 struct disk_conf *old_disk_conf, *new_disk_conf = NULL;
3763
3764 new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3765 if (!new_disk_conf) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003766 drbd_err(device, "Allocation of new disk_conf failed\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003767 put_ldev(device);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003768 return -ENOMEM;
3769 }
3770
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003771 mutex_lock(&connection->resource->conf_update);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003772 old_disk_conf = device->ldev->disk_conf;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003773 *new_disk_conf = *old_disk_conf;
3774 new_disk_conf->disk_size = p_usize;
3775
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003776 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003777 mutex_unlock(&connection->resource->conf_update);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003778 synchronize_rcu();
3779 kfree(old_disk_conf);
3780
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003781 drbd_info(device, "Peer sets u_size to %lu sectors\n",
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003782 (unsigned long)my_usize);
3783 }
3784
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003785 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003786 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003787
Lars Ellenberg20c68fd2014-04-28 18:43:25 +02003788 device->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
Lars Ellenberg20c68fd2014-04-28 18:43:25 +02003789 /* Leave drbd_reconsider_max_bio_size() before drbd_determine_dev_size().
3790 In case we cleared the QUEUE_FLAG_DISCARD from our queue in
3791 drbd_reconsider_max_bio_size(), we can be sure that after
3792 drbd_determine_dev_size() no REQ_DISCARDs are in the queue. */
3793
Philipp Reisnere89b5912010-03-24 17:11:33 +01003794 ddsf = be16_to_cpu(p->dds_flags);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003795 if (get_ldev(device)) {
Philipp Reisner8fe39aa2013-11-22 13:22:13 +01003796 drbd_reconsider_max_bio_size(device, device->ldev);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003797 dd = drbd_determine_dev_size(device, ddsf, NULL);
3798 put_ldev(device);
Philipp Reisnere96c9632013-06-25 16:50:07 +02003799 if (dd == DS_ERROR)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003800 return -EIO;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003801 drbd_md_sync(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003802 } else {
Lars Ellenberg6a8d68b2014-03-18 12:22:14 +01003803 /*
3804 * I am diskless, need to accept the peer's *current* size.
3805 * I must NOT accept the peers backing disk size,
3806 * it may have been larger than mine all along...
3807 *
3808 * At this point, the peer knows more about my disk, or at
3809 * least about what we last agreed upon, than myself.
3810 * So if his c_size is less than his d_size, the most likely
3811 * reason is that *my* d_size was smaller last time we checked.
3812 *
3813 * However, if he sends a zero current size,
3814 * take his (user-capped or) backing disk size anyways.
3815 */
Philipp Reisner8fe39aa2013-11-22 13:22:13 +01003816 drbd_reconsider_max_bio_size(device, NULL);
Lars Ellenberg6a8d68b2014-03-18 12:22:14 +01003817 drbd_set_my_capacity(device, p_csize ?: p_usize ?: p_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003818 }
3819
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003820 if (get_ldev(device)) {
3821 if (device->ldev->known_size != drbd_get_capacity(device->ldev->backing_bdev)) {
3822 device->ldev->known_size = drbd_get_capacity(device->ldev->backing_bdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003823 ldsc = 1;
3824 }
3825
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003826 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003827 }
3828
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003829 if (device->state.conn > C_WF_REPORT_PARAMS) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003830 if (be64_to_cpu(p->c_size) !=
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003831 drbd_get_capacity(device->this_bdev) || ldsc) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003832 /* we have different sizes, probably peer
3833 * needs to know my new size... */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003834 drbd_send_sizes(peer_device, 0, ddsf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003835 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003836 if (test_and_clear_bit(RESIZE_PENDING, &device->flags) ||
3837 (dd == DS_GREW && device->state.conn == C_CONNECTED)) {
3838 if (device->state.pdsk >= D_INCONSISTENT &&
3839 device->state.disk >= D_INCONSISTENT) {
Philipp Reisnere89b5912010-03-24 17:11:33 +01003840 if (ddsf & DDSF_NO_RESYNC)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003841 drbd_info(device, "Resync of new storage suppressed with --assume-clean\n");
Philipp Reisnere89b5912010-03-24 17:11:33 +01003842 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003843 resync_after_online_grow(device);
Philipp Reisnere89b5912010-03-24 17:11:33 +01003844 } else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003845 set_bit(RESYNC_AFTER_NEG, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003846 }
3847 }
3848
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003849 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003850}
3851
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003852static int receive_uuids(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003853{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003854 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003855 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003856 struct p_uuids *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003857 u64 *p_uuid;
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003858 int i, updated_uuids = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003859
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003860 peer_device = conn_peer_device(connection, pi->vnr);
3861 if (!peer_device)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003862 return config_unknown_volume(connection, pi);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003863 device = peer_device->device;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003864
Philipp Reisnerb411b362009-09-25 16:07:19 -07003865 p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO);
Jing Wang063eacf2012-10-25 15:00:56 +08003866 if (!p_uuid) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003867 drbd_err(device, "kmalloc of p_uuid failed\n");
Jing Wang063eacf2012-10-25 15:00:56 +08003868 return false;
3869 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003870
3871 for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
3872 p_uuid[i] = be64_to_cpu(p->uuid[i]);
3873
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003874 kfree(device->p_uuid);
3875 device->p_uuid = p_uuid;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003876
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003877 if (device->state.conn < C_CONNECTED &&
3878 device->state.disk < D_INCONSISTENT &&
3879 device->state.role == R_PRIMARY &&
3880 (device->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003881 drbd_err(device, "Can only connect to data with current UUID=%016llX\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003882 (unsigned long long)device->ed_uuid);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003883 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003884 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003885 }
3886
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003887 if (get_ldev(device)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003888 int skip_initial_sync =
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003889 device->state.conn == C_CONNECTED &&
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003890 peer_device->connection->agreed_pro_version >= 90 &&
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003891 device->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003892 (p_uuid[UI_FLAGS] & 8);
3893 if (skip_initial_sync) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003894 drbd_info(device, "Accepted new current UUID, preparing to skip initial sync\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003895 drbd_bitmap_io(device, &drbd_bmio_clear_n_write,
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003896 "clear_n_write from receive_uuids",
3897 BM_LOCKED_TEST_ALLOWED);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003898 _drbd_uuid_set(device, UI_CURRENT, p_uuid[UI_CURRENT]);
3899 _drbd_uuid_set(device, UI_BITMAP, 0);
3900 _drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
Philipp Reisnerb411b362009-09-25 16:07:19 -07003901 CS_VERBOSE, NULL);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003902 drbd_md_sync(device);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003903 updated_uuids = 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003904 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003905 put_ldev(device);
3906 } else if (device->state.disk < D_INCONSISTENT &&
3907 device->state.role == R_PRIMARY) {
Philipp Reisner18a50fa2010-06-21 14:14:15 +02003908 /* I am a diskless primary, the peer just created a new current UUID
3909 for me. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003910 updated_uuids = drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003911 }
3912
3913 /* Before we test for the disk state, we should wait until an eventually
3914 ongoing cluster wide state change is finished. That is important if
3915 we are primary and are detaching from our disk. We need to see the
3916 new disk state... */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003917 mutex_lock(device->state_mutex);
3918 mutex_unlock(device->state_mutex);
3919 if (device->state.conn >= C_CONNECTED && device->state.disk < D_INCONSISTENT)
3920 updated_uuids |= drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003921
3922 if (updated_uuids)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003923 drbd_print_uuids(device, "receiver updated UUIDs to");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003924
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003925 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003926}
3927
3928/**
3929 * convert_state() - Converts the peer's view of the cluster state to our point of view
3930 * @ps: The state as seen by the peer.
3931 */
3932static union drbd_state convert_state(union drbd_state ps)
3933{
3934 union drbd_state ms;
3935
3936 static enum drbd_conns c_tab[] = {
Philipp Reisner369bea62011-07-06 23:04:44 +02003937 [C_WF_REPORT_PARAMS] = C_WF_REPORT_PARAMS,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003938 [C_CONNECTED] = C_CONNECTED,
3939
3940 [C_STARTING_SYNC_S] = C_STARTING_SYNC_T,
3941 [C_STARTING_SYNC_T] = C_STARTING_SYNC_S,
3942 [C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */
3943 [C_VERIFY_S] = C_VERIFY_T,
3944 [C_MASK] = C_MASK,
3945 };
3946
3947 ms.i = ps.i;
3948
3949 ms.conn = c_tab[ps.conn];
3950 ms.peer = ps.role;
3951 ms.role = ps.peer;
3952 ms.pdsk = ps.disk;
3953 ms.disk = ps.pdsk;
3954 ms.peer_isp = (ps.aftr_isp | ps.user_isp);
3955
3956 return ms;
3957}
3958
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003959static int receive_req_state(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003960{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003961 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003962 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003963 struct p_req_state *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003964 union drbd_state mask, val;
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +01003965 enum drbd_state_rv rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003966
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003967 peer_device = conn_peer_device(connection, pi->vnr);
3968 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003969 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003970 device = peer_device->device;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003971
Philipp Reisnerb411b362009-09-25 16:07:19 -07003972 mask.i = be32_to_cpu(p->mask);
3973 val.i = be32_to_cpu(p->val);
3974
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003975 if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags) &&
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003976 mutex_is_locked(device->state_mutex)) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003977 drbd_send_sr_reply(peer_device, SS_CONCURRENT_ST_CHG);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003978 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003979 }
3980
3981 mask = convert_state(mask);
3982 val = convert_state(val);
3983
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003984 rv = drbd_change_state(device, CS_VERBOSE, mask, val);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003985 drbd_send_sr_reply(peer_device, rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003986
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003987 drbd_md_sync(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003988
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003989 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003990}
3991
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003992static int receive_req_conn_state(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003993{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003994 struct p_req_state *p = pi->data;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003995 union drbd_state mask, val;
3996 enum drbd_state_rv rv;
3997
3998 mask.i = be32_to_cpu(p->mask);
3999 val.i = be32_to_cpu(p->val);
4000
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004001 if (test_bit(RESOLVE_CONFLICTS, &connection->flags) &&
4002 mutex_is_locked(&connection->cstate_mutex)) {
4003 conn_send_sr_reply(connection, SS_CONCURRENT_ST_CHG);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004004 return 0;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01004005 }
4006
4007 mask = convert_state(mask);
4008 val = convert_state(val);
4009
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004010 rv = conn_request_state(connection, mask, val, CS_VERBOSE | CS_LOCAL_ONLY | CS_IGN_OUTD_FAIL);
4011 conn_send_sr_reply(connection, rv);
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01004012
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004013 return 0;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01004014}
4015
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004016static int receive_state(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004017{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004018 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004019 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004020 struct p_state *p = pi->data;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004021 union drbd_state os, ns, peer_state;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004022 enum drbd_disk_state real_peer_disk;
Philipp Reisner65d922c2010-06-16 16:18:09 +02004023 enum chg_state_flags cs_flags;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004024 int rv;
4025
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004026 peer_device = conn_peer_device(connection, pi->vnr);
4027 if (!peer_device)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004028 return config_unknown_volume(connection, pi);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004029 device = peer_device->device;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004030
Philipp Reisnerb411b362009-09-25 16:07:19 -07004031 peer_state.i = be32_to_cpu(p->state);
4032
4033 real_peer_disk = peer_state.disk;
4034 if (peer_state.disk == D_NEGOTIATING) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004035 real_peer_disk = device->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004036 drbd_info(device, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004037 }
4038
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004039 spin_lock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004040 retry:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004041 os = ns = drbd_read_state(device);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004042 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004043
Lars Ellenberg545752d2011-12-05 14:39:25 +01004044 /* If some other part of the code (asender thread, timeout)
4045 * already decided to close the connection again,
4046 * we must not "re-establish" it here. */
4047 if (os.conn <= C_TEAR_DOWN)
Lars Ellenberg58ffa582012-07-26 14:09:49 +02004048 return -ECONNRESET;
Lars Ellenberg545752d2011-12-05 14:39:25 +01004049
Lars Ellenberg40424e42011-09-26 15:24:56 +02004050 /* If this is the "end of sync" confirmation, usually the peer disk
4051 * transitions from D_INCONSISTENT to D_UP_TO_DATE. For empty (0 bits
4052 * set) resync started in PausedSyncT, or if the timing of pause-/
4053 * unpause-sync events has been "just right", the peer disk may
4054 * transition from D_CONSISTENT to D_UP_TO_DATE as well.
4055 */
4056 if ((os.pdsk == D_INCONSISTENT || os.pdsk == D_CONSISTENT) &&
4057 real_peer_disk == D_UP_TO_DATE &&
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02004058 os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
4059 /* If we are (becoming) SyncSource, but peer is still in sync
4060 * preparation, ignore its uptodate-ness to avoid flapping, it
4061 * will change to inconsistent once the peer reaches active
4062 * syncing states.
4063 * It may have changed syncer-paused flags, however, so we
4064 * cannot ignore this completely. */
4065 if (peer_state.conn > C_CONNECTED &&
4066 peer_state.conn < C_SYNC_SOURCE)
4067 real_peer_disk = D_INCONSISTENT;
4068
4069 /* if peer_state changes to connected at the same time,
4070 * it explicitly notifies us that it finished resync.
4071 * Maybe we should finish it up, too? */
4072 else if (os.conn >= C_SYNC_SOURCE &&
4073 peer_state.conn == C_CONNECTED) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004074 if (drbd_bm_total_weight(device) <= device->rs_failed)
4075 drbd_resync_finished(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004076 return 0;
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02004077 }
4078 }
4079
Lars Ellenberg02b91b52012-06-28 18:26:52 +02004080 /* explicit verify finished notification, stop sector reached. */
4081 if (os.conn == C_VERIFY_T && os.disk == D_UP_TO_DATE &&
4082 peer_state.conn == C_CONNECTED && real_peer_disk == D_UP_TO_DATE) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004083 ov_out_of_sync_print(device);
4084 drbd_resync_finished(device);
Lars Ellenberg58ffa582012-07-26 14:09:49 +02004085 return 0;
Lars Ellenberg02b91b52012-06-28 18:26:52 +02004086 }
4087
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02004088 /* peer says his disk is inconsistent, while we think it is uptodate,
4089 * and this happens while the peer still thinks we have a sync going on,
4090 * but we think we are already done with the sync.
4091 * We ignore this to avoid flapping pdsk.
4092 * This should not happen, if the peer is a recent version of drbd. */
4093 if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT &&
4094 os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE)
4095 real_peer_disk = D_UP_TO_DATE;
4096
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004097 if (ns.conn == C_WF_REPORT_PARAMS)
4098 ns.conn = C_CONNECTED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004099
Philipp Reisner67531712010-10-27 12:21:30 +02004100 if (peer_state.conn == C_AHEAD)
4101 ns.conn = C_BEHIND;
4102
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004103 if (device->p_uuid && peer_state.disk >= D_NEGOTIATING &&
4104 get_ldev_if_state(device, D_NEGOTIATING)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004105 int cr; /* consider resync */
4106
4107 /* if we established a new connection */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004108 cr = (os.conn < C_CONNECTED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004109 /* if we had an established connection
4110 * and one of the nodes newly attaches a disk */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004111 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07004112 (peer_state.disk == D_NEGOTIATING ||
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004113 os.disk == D_NEGOTIATING));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004114 /* if we have both been inconsistent, and the peer has been
4115 * forced to be UpToDate with --overwrite-data */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004116 cr |= test_bit(CONSIDER_RESYNC, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004117 /* if we had been plain connected, and the admin requested to
4118 * start a sync by "invalidate" or "invalidate-remote" */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004119 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07004120 (peer_state.conn >= C_STARTING_SYNC_S &&
4121 peer_state.conn <= C_WF_BITMAP_T));
4122
4123 if (cr)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004124 ns.conn = drbd_sync_handshake(peer_device, peer_state.role, real_peer_disk);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004125
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004126 put_ldev(device);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004127 if (ns.conn == C_MASK) {
4128 ns.conn = C_CONNECTED;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004129 if (device->state.disk == D_NEGOTIATING) {
4130 drbd_force_state(device, NS(disk, D_FAILED));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004131 } else if (peer_state.disk == D_NEGOTIATING) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004132 drbd_err(device, "Disk attach process on the peer node was aborted.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004133 peer_state.disk = D_DISKLESS;
Lars Ellenberg580b9762010-02-26 23:15:23 +01004134 real_peer_disk = D_DISKLESS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004135 } else {
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004136 if (test_and_clear_bit(CONN_DRY_RUN, &peer_device->connection->flags))
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004137 return -EIO;
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02004138 D_ASSERT(device, os.conn == C_WF_REPORT_PARAMS);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004139 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004140 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004141 }
4142 }
4143 }
4144
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004145 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004146 if (os.i != drbd_read_state(device).i)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004147 goto retry;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004148 clear_bit(CONSIDER_RESYNC, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004149 ns.peer = peer_state.role;
4150 ns.pdsk = real_peer_disk;
4151 ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004152 if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004153 ns.disk = device->new_state_tmp.disk;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004154 cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004155 if (ns.pdsk == D_CONSISTENT && drbd_suspended(device) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
4156 test_bit(NEW_CUR_UUID, &device->flags)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004157 /* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this
Philipp Reisner481c6f52010-06-22 14:03:27 +02004158 for temporal network outages! */
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004159 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004160 drbd_err(device, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004161 tl_clear(peer_device->connection);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004162 drbd_uuid_new_current(device);
4163 clear_bit(NEW_CUR_UUID, &device->flags);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004164 conn_request_state(peer_device->connection, NS2(conn, C_PROTOCOL_ERROR, susp, 0), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004165 return -EIO;
Philipp Reisner481c6f52010-06-22 14:03:27 +02004166 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004167 rv = _drbd_set_state(device, ns, cs_flags, NULL);
4168 ns = drbd_read_state(device);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004169 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004170
4171 if (rv < SS_SUCCESS) {
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004172 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004173 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004174 }
4175
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004176 if (os.conn > C_WF_REPORT_PARAMS) {
4177 if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07004178 peer_state.disk != D_NEGOTIATING ) {
4179 /* we want resync, peer has not yet decided to sync... */
4180 /* Nowadays only used when forcing a node into primary role and
4181 setting its disk to UpToDate with that */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004182 drbd_send_uuids(peer_device);
4183 drbd_send_current_state(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004184 }
4185 }
4186
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004187 clear_bit(DISCARD_MY_DATA, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004188
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004189 drbd_md_sync(device); /* update connected indicator, la_size_sect, ... */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004190
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004191 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004192}
4193
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004194static int receive_sync_uuid(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004195{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004196 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004197 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004198 struct p_rs_uuid *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004199
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004200 peer_device = conn_peer_device(connection, pi->vnr);
4201 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004202 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004203 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004204
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004205 wait_event(device->misc_wait,
4206 device->state.conn == C_WF_SYNC_UUID ||
4207 device->state.conn == C_BEHIND ||
4208 device->state.conn < C_CONNECTED ||
4209 device->state.disk < D_NEGOTIATING);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004210
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02004211 /* D_ASSERT(device, device->state.conn == C_WF_SYNC_UUID ); */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004212
Philipp Reisnerb411b362009-09-25 16:07:19 -07004213 /* Here the _drbd_uuid_ functions are right, current should
4214 _not_ be rotated into the history */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004215 if (get_ldev_if_state(device, D_NEGOTIATING)) {
4216 _drbd_uuid_set(device, UI_CURRENT, be64_to_cpu(p->uuid));
4217 _drbd_uuid_set(device, UI_BITMAP, 0UL);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004218
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004219 drbd_print_uuids(device, "updated sync uuid");
4220 drbd_start_resync(device, C_SYNC_TARGET);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004221
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004222 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004223 } else
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004224 drbd_err(device, "Ignoring SyncUUID packet!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004225
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004226 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004227}
4228
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004229/**
4230 * receive_bitmap_plain
4231 *
4232 * Return 0 when done, 1 when another iteration is needed, and a negative error
4233 * code upon failure.
4234 */
4235static int
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004236receive_bitmap_plain(struct drbd_peer_device *peer_device, unsigned int size,
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004237 unsigned long *p, struct bm_xfer_ctx *c)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004238{
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004239 unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE -
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004240 drbd_header_size(peer_device->connection);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004241 unsigned int num_words = min_t(size_t, data_size / sizeof(*p),
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004242 c->bm_words - c->word_offset);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004243 unsigned int want = num_words * sizeof(*p);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004244 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004245
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004246 if (want != size) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004247 drbd_err(peer_device, "%s:want (%u) != size (%u)\n", __func__, want, size);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004248 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004249 }
4250 if (want == 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004251 return 0;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004252 err = drbd_recv_all(peer_device->connection, p, want);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004253 if (err)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004254 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004255
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004256 drbd_bm_merge_lel(peer_device->device, c->word_offset, num_words, p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004257
4258 c->word_offset += num_words;
4259 c->bit_offset = c->word_offset * BITS_PER_LONG;
4260 if (c->bit_offset > c->bm_bits)
4261 c->bit_offset = c->bm_bits;
4262
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004263 return 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004264}
4265
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004266static enum drbd_bitmap_code dcbp_get_code(struct p_compressed_bm *p)
4267{
4268 return (enum drbd_bitmap_code)(p->encoding & 0x0f);
4269}
4270
4271static int dcbp_get_start(struct p_compressed_bm *p)
4272{
4273 return (p->encoding & 0x80) != 0;
4274}
4275
4276static int dcbp_get_pad_bits(struct p_compressed_bm *p)
4277{
4278 return (p->encoding >> 4) & 0x7;
4279}
4280
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004281/**
4282 * recv_bm_rle_bits
4283 *
4284 * Return 0 when done, 1 when another iteration is needed, and a negative error
4285 * code upon failure.
4286 */
4287static int
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004288recv_bm_rle_bits(struct drbd_peer_device *peer_device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07004289 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01004290 struct bm_xfer_ctx *c,
4291 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004292{
4293 struct bitstream bs;
4294 u64 look_ahead;
4295 u64 rl;
4296 u64 tmp;
4297 unsigned long s = c->bit_offset;
4298 unsigned long e;
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004299 int toggle = dcbp_get_start(p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004300 int have;
4301 int bits;
4302
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004303 bitstream_init(&bs, p->code, len, dcbp_get_pad_bits(p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004304
4305 bits = bitstream_get_bits(&bs, &look_ahead, 64);
4306 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004307 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004308
4309 for (have = bits; have > 0; s += rl, toggle = !toggle) {
4310 bits = vli_decode_bits(&rl, look_ahead);
4311 if (bits <= 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004312 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004313
4314 if (toggle) {
4315 e = s + rl -1;
4316 if (e >= c->bm_bits) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004317 drbd_err(peer_device, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004318 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004319 }
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004320 _drbd_bm_set_bits(peer_device->device, s, e);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004321 }
4322
4323 if (have < bits) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004324 drbd_err(peer_device, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07004325 have, bits, look_ahead,
4326 (unsigned int)(bs.cur.b - p->code),
4327 (unsigned int)bs.buf_len);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004328 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004329 }
Lars Ellenbergd2da5b02013-10-23 10:59:18 +02004330 /* if we consumed all 64 bits, assign 0; >> 64 is "undefined"; */
4331 if (likely(bits < 64))
4332 look_ahead >>= bits;
4333 else
4334 look_ahead = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004335 have -= bits;
4336
4337 bits = bitstream_get_bits(&bs, &tmp, 64 - have);
4338 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004339 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004340 look_ahead |= tmp << have;
4341 have += bits;
4342 }
4343
4344 c->bit_offset = s;
4345 bm_xfer_ctx_bit_to_word_offset(c);
4346
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004347 return (s != c->bm_bits);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004348}
4349
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004350/**
4351 * decode_bitmap_c
4352 *
4353 * Return 0 when done, 1 when another iteration is needed, and a negative error
4354 * code upon failure.
4355 */
4356static int
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004357decode_bitmap_c(struct drbd_peer_device *peer_device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07004358 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01004359 struct bm_xfer_ctx *c,
4360 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004361{
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004362 if (dcbp_get_code(p) == RLE_VLI_Bits)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004363 return recv_bm_rle_bits(peer_device, p, c, len - sizeof(*p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004364
4365 /* other variants had been implemented for evaluation,
4366 * but have been dropped as this one turned out to be "best"
4367 * during all our tests. */
4368
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004369 drbd_err(peer_device, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
4370 conn_request_state(peer_device->connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004371 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004372}
4373
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004374void INFO_bm_xfer_stats(struct drbd_device *device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07004375 const char *direction, struct bm_xfer_ctx *c)
4376{
4377 /* what would it take to transfer it "plaintext" */
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02004378 unsigned int header_size = drbd_header_size(first_peer_device(device)->connection);
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004379 unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE - header_size;
4380 unsigned int plain =
4381 header_size * (DIV_ROUND_UP(c->bm_words, data_size) + 1) +
4382 c->bm_words * sizeof(unsigned long);
4383 unsigned int total = c->bytes[0] + c->bytes[1];
4384 unsigned int r;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004385
4386 /* total can not be zero. but just in case: */
4387 if (total == 0)
4388 return;
4389
4390 /* don't report if not compressed */
4391 if (total >= plain)
4392 return;
4393
4394 /* total < plain. check for overflow, still */
4395 r = (total > UINT_MAX/1000) ? (total / (plain/1000))
4396 : (1000 * total / plain);
4397
4398 if (r > 1000)
4399 r = 1000;
4400
4401 r = 1000 - r;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004402 drbd_info(device, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
Philipp Reisnerb411b362009-09-25 16:07:19 -07004403 "total %u; compression: %u.%u%%\n",
4404 direction,
4405 c->bytes[1], c->packets[1],
4406 c->bytes[0], c->packets[0],
4407 total, r/10, r % 10);
4408}
4409
4410/* Since we are processing the bitfield from lower addresses to higher,
4411 it does not matter if the process it in 32 bit chunks or 64 bit
4412 chunks as long as it is little endian. (Understand it as byte stream,
4413 beginning with the lowest byte...) If we would use big endian
4414 we would need to process it from the highest address to the lowest,
4415 in order to be agnostic to the 32 vs 64 bits issue.
4416
4417 returns 0 on failure, 1 if we successfully received it. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004418static int receive_bitmap(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004419{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004420 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004421 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004422 struct bm_xfer_ctx c;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004423 int err;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004424
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004425 peer_device = conn_peer_device(connection, pi->vnr);
4426 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004427 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004428 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004429
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004430 drbd_bm_lock(device, "receive bitmap", BM_LOCKED_SET_ALLOWED);
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004431 /* you are supposed to send additional out-of-sync information
4432 * if you actually set bits during this phase */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004433
Philipp Reisnerb411b362009-09-25 16:07:19 -07004434 c = (struct bm_xfer_ctx) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004435 .bm_bits = drbd_bm_bits(device),
4436 .bm_words = drbd_bm_words(device),
Philipp Reisnerb411b362009-09-25 16:07:19 -07004437 };
4438
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004439 for(;;) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004440 if (pi->cmd == P_BITMAP)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004441 err = receive_bitmap_plain(peer_device, pi->size, pi->data, &c);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004442 else if (pi->cmd == P_COMPRESSED_BITMAP) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004443 /* MAYBE: sanity check that we speak proto >= 90,
4444 * and the feature is enabled! */
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004445 struct p_compressed_bm *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004446
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004447 if (pi->size > DRBD_SOCKET_BUFFER_SIZE - drbd_header_size(connection)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004448 drbd_err(device, "ReportCBitmap packet too large\n");
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004449 err = -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004450 goto out;
4451 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004452 if (pi->size <= sizeof(*p)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004453 drbd_err(device, "ReportCBitmap packet too small (l:%u)\n", pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004454 err = -EIO;
Andreas Gruenbacher78fcbda2010-12-10 22:18:27 +01004455 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004456 }
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004457 err = drbd_recv_all(peer_device->connection, p, pi->size);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004458 if (err)
4459 goto out;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004460 err = decode_bitmap_c(peer_device, p, &c, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004461 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004462 drbd_warn(device, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", pi->cmd);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004463 err = -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004464 goto out;
4465 }
4466
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004467 c.packets[pi->cmd == P_BITMAP]++;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004468 c.bytes[pi->cmd == P_BITMAP] += drbd_header_size(connection) + pi->size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004469
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004470 if (err <= 0) {
4471 if (err < 0)
4472 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004473 break;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004474 }
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004475 err = drbd_recv_header(peer_device->connection, pi);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004476 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004477 goto out;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004478 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004479
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004480 INFO_bm_xfer_stats(device, "receive", &c);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004481
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004482 if (device->state.conn == C_WF_BITMAP_T) {
Andreas Gruenbacherde1f8e42010-12-10 21:04:00 +01004483 enum drbd_state_rv rv;
4484
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004485 err = drbd_send_bitmap(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004486 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004487 goto out;
4488 /* Omit CS_ORDERED with this state transition to avoid deadlocks. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004489 rv = _drbd_request_state(device, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02004490 D_ASSERT(device, rv == SS_SUCCESS);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004491 } else if (device->state.conn != C_WF_BITMAP_S) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004492 /* admin may have requested C_DISCONNECTING,
4493 * other threads may have noticed network errors */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004494 drbd_info(device, "unexpected cstate (%s) in receive_bitmap\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004495 drbd_conn_str(device->state.conn));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004496 }
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004497 err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004498
Philipp Reisnerb411b362009-09-25 16:07:19 -07004499 out:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004500 drbd_bm_unlock(device);
4501 if (!err && device->state.conn == C_WF_BITMAP_S)
4502 drbd_start_resync(device, C_SYNC_SOURCE);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004503 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004504}
4505
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004506static int receive_skip(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004507{
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004508 drbd_warn(connection, "skipping unknown optional packet type %d, l: %d!\n",
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004509 pi->cmd, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004510
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004511 return ignore_remaining_packet(connection, pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004512}
4513
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004514static int receive_UnplugRemote(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004515{
Philipp Reisnerb411b362009-09-25 16:07:19 -07004516 /* Make sure we've acked all the TCP data associated
4517 * with the data requests being unplugged */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004518 drbd_tcp_quickack(connection->data.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004519
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004520 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004521}
4522
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004523static int receive_out_of_sync(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisner73a01a12010-10-27 14:33:00 +02004524{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004525 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004526 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004527 struct p_block_desc *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004528
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004529 peer_device = conn_peer_device(connection, pi->vnr);
4530 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004531 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004532 device = peer_device->device;
Philipp Reisner73a01a12010-10-27 14:33:00 +02004533
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004534 switch (device->state.conn) {
Lars Ellenbergf735e3632010-12-17 21:06:18 +01004535 case C_WF_SYNC_UUID:
4536 case C_WF_BITMAP_T:
4537 case C_BEHIND:
4538 break;
4539 default:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004540 drbd_err(device, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004541 drbd_conn_str(device->state.conn));
Lars Ellenbergf735e3632010-12-17 21:06:18 +01004542 }
4543
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004544 drbd_set_out_of_sync(device, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
Philipp Reisner73a01a12010-10-27 14:33:00 +02004545
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004546 return 0;
Philipp Reisner73a01a12010-10-27 14:33:00 +02004547}
4548
Philipp Reisner02918be2010-08-20 14:35:10 +02004549struct data_cmd {
4550 int expect_payload;
4551 size_t pkt_size;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004552 int (*fn)(struct drbd_connection *, struct packet_info *);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004553};
4554
Philipp Reisner02918be2010-08-20 14:35:10 +02004555static struct data_cmd drbd_cmd_handler[] = {
4556 [P_DATA] = { 1, sizeof(struct p_data), receive_Data },
4557 [P_DATA_REPLY] = { 1, sizeof(struct p_data), receive_DataReply },
4558 [P_RS_DATA_REPLY] = { 1, sizeof(struct p_data), receive_RSDataReply } ,
4559 [P_BARRIER] = { 0, sizeof(struct p_barrier), receive_Barrier } ,
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004560 [P_BITMAP] = { 1, 0, receive_bitmap } ,
4561 [P_COMPRESSED_BITMAP] = { 1, 0, receive_bitmap } ,
4562 [P_UNPLUG_REMOTE] = { 0, 0, receive_UnplugRemote },
Philipp Reisner02918be2010-08-20 14:35:10 +02004563 [P_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
4564 [P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004565 [P_SYNC_PARAM] = { 1, 0, receive_SyncParam },
4566 [P_SYNC_PARAM89] = { 1, 0, receive_SyncParam },
Philipp Reisner02918be2010-08-20 14:35:10 +02004567 [P_PROTOCOL] = { 1, sizeof(struct p_protocol), receive_protocol },
4568 [P_UUIDS] = { 0, sizeof(struct p_uuids), receive_uuids },
4569 [P_SIZES] = { 0, sizeof(struct p_sizes), receive_sizes },
4570 [P_STATE] = { 0, sizeof(struct p_state), receive_state },
4571 [P_STATE_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_state },
4572 [P_SYNC_UUID] = { 0, sizeof(struct p_rs_uuid), receive_sync_uuid },
4573 [P_OV_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
4574 [P_OV_REPLY] = { 1, sizeof(struct p_block_req), receive_DataRequest },
4575 [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest },
4576 [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip },
Philipp Reisner73a01a12010-10-27 14:33:00 +02004577 [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004578 [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_conn_state },
Philipp Reisner036b17e2011-05-16 17:38:11 +02004579 [P_PROTOCOL_UPDATE] = { 1, sizeof(struct p_protocol), receive_protocol },
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02004580 [P_TRIM] = { 0, sizeof(struct p_trim), receive_Data },
Philipp Reisner02918be2010-08-20 14:35:10 +02004581};
4582
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004583static void drbdd(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004584{
Philipp Reisner77351055b2011-02-07 17:24:26 +01004585 struct packet_info pi;
Philipp Reisner02918be2010-08-20 14:35:10 +02004586 size_t shs; /* sub header size */
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004587 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004588
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004589 while (get_t_state(&connection->receiver) == RUNNING) {
Andreas Gruenbacherdeebe192011-03-25 00:01:04 +01004590 struct data_cmd *cmd;
4591
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004592 drbd_thread_current_set_cpu(&connection->receiver);
4593 if (drbd_recv_header(connection, &pi))
Philipp Reisner02918be2010-08-20 14:35:10 +02004594 goto err_out;
4595
Andreas Gruenbacherdeebe192011-03-25 00:01:04 +01004596 cmd = &drbd_cmd_handler[pi.cmd];
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004597 if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) || !cmd->fn)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004598 drbd_err(connection, "Unexpected data packet %s (0x%04x)",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004599 cmdname(pi.cmd), pi.cmd);
Philipp Reisner02918be2010-08-20 14:35:10 +02004600 goto err_out;
Lars Ellenberg0b33a912009-11-16 15:58:04 +01004601 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004602
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004603 shs = cmd->pkt_size;
4604 if (pi.size > shs && !cmd->expect_payload) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004605 drbd_err(connection, "No payload expected %s l:%d\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004606 cmdname(pi.cmd), pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02004607 goto err_out;
4608 }
4609
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02004610 if (shs) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004611 err = drbd_recv_all_warn(connection, pi.data, shs);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004612 if (err)
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02004613 goto err_out;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004614 pi.size -= shs;
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02004615 }
4616
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004617 err = cmd->fn(connection, &pi);
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004618 if (err) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004619 drbd_err(connection, "error receiving %s, e: %d l: %d!\n",
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004620 cmdname(pi.cmd), err, pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02004621 goto err_out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004622 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004623 }
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004624 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004625
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004626 err_out:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004627 conn_request_state(connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004628}
4629
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004630static void conn_disconnect(struct drbd_connection *connection)
Philipp Reisnerf70b35112010-06-24 14:34:40 +02004631{
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02004632 struct drbd_peer_device *peer_device;
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004633 enum drbd_conns oc;
Philipp Reisner376694a2011-11-07 10:54:28 +01004634 int vnr;
Philipp Reisnerf70b35112010-06-24 14:34:40 +02004635
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004636 if (connection->cstate == C_STANDALONE)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004637 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004638
Lars Ellenberg545752d2011-12-05 14:39:25 +01004639 /* We are about to start the cleanup after connection loss.
4640 * Make sure drbd_make_request knows about that.
4641 * Usually we should be in some network failure state already,
4642 * but just in case we are not, we fix it up here.
4643 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004644 conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
Lars Ellenberg545752d2011-12-05 14:39:25 +01004645
Philipp Reisnerb411b362009-09-25 16:07:19 -07004646 /* asender does not clean up anything. it must not interfere, either */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004647 drbd_thread_stop(&connection->asender);
4648 drbd_free_sock(connection);
Philipp Reisner360cc742011-02-08 14:29:53 +01004649
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02004650 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02004651 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
4652 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004653 kref_get(&device->kref);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02004654 rcu_read_unlock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004655 drbd_disconnected(peer_device);
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02004656 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02004657 rcu_read_lock();
4658 }
4659 rcu_read_unlock();
4660
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004661 if (!list_empty(&connection->current_epoch->list))
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004662 drbd_err(connection, "ASSERTION FAILED: connection->current_epoch->list not empty\n");
Philipp Reisner12038a32011-11-09 19:18:00 +01004663 /* ok, no more ee's on the fly, it is safe to reset the epoch_size */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004664 atomic_set(&connection->current_epoch->epoch_size, 0);
4665 connection->send.seen_any_write_yet = false;
Philipp Reisner12038a32011-11-09 19:18:00 +01004666
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004667 drbd_info(connection, "Connection closed\n");
Philipp Reisner360cc742011-02-08 14:29:53 +01004668
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004669 if (conn_highest_role(connection) == R_PRIMARY && conn_highest_pdsk(connection) >= D_UNKNOWN)
4670 conn_try_outdate_peer_async(connection);
Philipp Reisnercb703452011-03-24 11:03:07 +01004671
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004672 spin_lock_irq(&connection->resource->req_lock);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004673 oc = connection->cstate;
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004674 if (oc >= C_UNCONNECTED)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004675 _conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE);
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004676
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004677 spin_unlock_irq(&connection->resource->req_lock);
Philipp Reisner360cc742011-02-08 14:29:53 +01004678
Lars Ellenbergf3dfa402011-05-02 10:45:05 +02004679 if (oc == C_DISCONNECTING)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004680 conn_request_state(connection, NS(conn, C_STANDALONE), CS_VERBOSE | CS_HARD);
Philipp Reisner360cc742011-02-08 14:29:53 +01004681}
4682
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004683static int drbd_disconnected(struct drbd_peer_device *peer_device)
Philipp Reisner360cc742011-02-08 14:29:53 +01004684{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004685 struct drbd_device *device = peer_device->device;
Philipp Reisner360cc742011-02-08 14:29:53 +01004686 unsigned int i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004687
Philipp Reisner85719572010-07-21 10:20:17 +02004688 /* wait for current activity to cease. */
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004689 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004690 _drbd_wait_ee_list_empty(device, &device->active_ee);
4691 _drbd_wait_ee_list_empty(device, &device->sync_ee);
4692 _drbd_wait_ee_list_empty(device, &device->read_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004693 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004694
4695 /* We do not have data structures that would allow us to
4696 * get the rs_pending_cnt down to 0 again.
4697 * * On C_SYNC_TARGET we do not have any data structures describing
4698 * the pending RSDataRequest's we have sent.
4699 * * On C_SYNC_SOURCE there is no data structure that tracks
4700 * the P_RS_DATA_REPLY blocks that we sent to the SyncTarget.
4701 * And no, it is not the sum of the reference counts in the
4702 * resync_LRU. The resync_LRU tracks the whole operation including
4703 * the disk-IO, while the rs_pending_cnt only tracks the blocks
4704 * on the fly. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004705 drbd_rs_cancel_all(device);
4706 device->rs_total = 0;
4707 device->rs_failed = 0;
4708 atomic_set(&device->rs_pending_cnt, 0);
4709 wake_up(&device->misc_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004710
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004711 del_timer_sync(&device->resync_timer);
4712 resync_timer_fn((unsigned long)device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004713
Philipp Reisnerb411b362009-09-25 16:07:19 -07004714 /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
4715 * w_make_resync_request etc. which may still be on the worker queue
4716 * to be "canceled" */
Andreas Gruenbacherb5043c52011-07-28 15:56:02 +02004717 drbd_flush_workqueue(&peer_device->connection->sender_work);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004718
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004719 drbd_finish_peer_reqs(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004720
Philipp Reisnerd10b4ea2011-11-30 23:25:36 +01004721 /* This second workqueue flush is necessary, since drbd_finish_peer_reqs()
4722 might have issued a work again. The one before drbd_finish_peer_reqs() is
4723 necessary to reclain net_ee in drbd_finish_peer_reqs(). */
Andreas Gruenbacherb5043c52011-07-28 15:56:02 +02004724 drbd_flush_workqueue(&peer_device->connection->sender_work);
Philipp Reisnerd10b4ea2011-11-30 23:25:36 +01004725
Lars Ellenberg08332d72012-08-17 15:09:13 +02004726 /* need to do it again, drbd_finish_peer_reqs() may have populated it
4727 * again via drbd_try_clear_on_disk_bm(). */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004728 drbd_rs_cancel_all(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004729
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004730 kfree(device->p_uuid);
4731 device->p_uuid = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004732
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004733 if (!drbd_suspended(device))
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004734 tl_clear(peer_device->connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004735
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004736 drbd_md_sync(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004737
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004738 /* serialize with bitmap writeout triggered by the state change,
4739 * if any. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004740 wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004741
Philipp Reisnerb411b362009-09-25 16:07:19 -07004742 /* tcp_close and release of sendpage pages can be deferred. I don't
4743 * want to use SO_LINGER, because apparently it can be deferred for
4744 * more than 20 seconds (longest time I checked).
4745 *
4746 * Actually we don't care for exactly when the network stack does its
4747 * put_page(), but release our reference on these pages right here.
4748 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004749 i = drbd_free_peer_reqs(device, &device->net_ee);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004750 if (i)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004751 drbd_info(device, "net_ee not empty, killed %u entries\n", i);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004752 i = atomic_read(&device->pp_in_use_by_net);
Lars Ellenberg435f0742010-09-06 12:30:25 +02004753 if (i)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004754 drbd_info(device, "pp_in_use_by_net = %d, expected 0\n", i);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004755 i = atomic_read(&device->pp_in_use);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004756 if (i)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004757 drbd_info(device, "pp_in_use = %d, expected 0\n", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004758
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02004759 D_ASSERT(device, list_empty(&device->read_ee));
4760 D_ASSERT(device, list_empty(&device->active_ee));
4761 D_ASSERT(device, list_empty(&device->sync_ee));
4762 D_ASSERT(device, list_empty(&device->done_ee));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004763
Philipp Reisner360cc742011-02-08 14:29:53 +01004764 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004765}
4766
4767/*
4768 * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version
4769 * we can agree on is stored in agreed_pro_version.
4770 *
4771 * feature flags and the reserved array should be enough room for future
4772 * enhancements of the handshake protocol, and possible plugins...
4773 *
4774 * for now, they are expected to be zero, but ignored.
4775 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004776static int drbd_send_features(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004777{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004778 struct drbd_socket *sock;
4779 struct p_connection_features *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004780
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004781 sock = &connection->data;
4782 p = conn_prepare_command(connection, sock);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004783 if (!p)
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004784 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004785 memset(p, 0, sizeof(*p));
4786 p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
4787 p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
Lars Ellenberg20c68fd2014-04-28 18:43:25 +02004788 p->feature_flags = cpu_to_be32(PRO_FEATURES);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004789 return conn_send_command(connection, sock, P_CONNECTION_FEATURES, sizeof(*p), NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004790}
4791
4792/*
4793 * return values:
4794 * 1 yes, we have a valid connection
4795 * 0 oops, did not work out, please try again
4796 * -1 peer talks different language,
4797 * no point in trying again, please go standalone.
4798 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004799static int drbd_do_features(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004800{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004801 /* ASSERT current == connection->receiver ... */
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004802 struct p_connection_features *p;
4803 const int expect = sizeof(struct p_connection_features);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004804 struct packet_info pi;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004805 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004806
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004807 err = drbd_send_features(connection);
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004808 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004809 return 0;
4810
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004811 err = drbd_recv_header(connection, &pi);
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004812 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004813 return 0;
4814
Andreas Gruenbacher60381782011-03-28 17:05:50 +02004815 if (pi.cmd != P_CONNECTION_FEATURES) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004816 drbd_err(connection, "expected ConnectionFeatures packet, received: %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004817 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004818 return -1;
4819 }
4820
Philipp Reisner77351055b2011-02-07 17:24:26 +01004821 if (pi.size != expect) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004822 drbd_err(connection, "expected ConnectionFeatures length: %u, received: %u\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004823 expect, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004824 return -1;
4825 }
4826
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004827 p = pi.data;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004828 err = drbd_recv_all_warn(connection, p, expect);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004829 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004830 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004831
Philipp Reisnerb411b362009-09-25 16:07:19 -07004832 p->protocol_min = be32_to_cpu(p->protocol_min);
4833 p->protocol_max = be32_to_cpu(p->protocol_max);
4834 if (p->protocol_max == 0)
4835 p->protocol_max = p->protocol_min;
4836
4837 if (PRO_VERSION_MAX < p->protocol_min ||
4838 PRO_VERSION_MIN > p->protocol_max)
4839 goto incompat;
4840
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004841 connection->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
Lars Ellenberg20c68fd2014-04-28 18:43:25 +02004842 connection->agreed_features = PRO_FEATURES & be32_to_cpu(p->feature_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004843
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004844 drbd_info(connection, "Handshake successful: "
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004845 "Agreed network protocol version %d\n", connection->agreed_pro_version);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004846
Lars Ellenberg20c68fd2014-04-28 18:43:25 +02004847 drbd_info(connection, "Agreed to%ssupport TRIM on protocol level\n",
4848 connection->agreed_features & FF_TRIM ? " " : " not ");
4849
Philipp Reisnerb411b362009-09-25 16:07:19 -07004850 return 1;
4851
4852 incompat:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004853 drbd_err(connection, "incompatible DRBD dialects: "
Philipp Reisnerb411b362009-09-25 16:07:19 -07004854 "I support %d-%d, peer supports %d-%d\n",
4855 PRO_VERSION_MIN, PRO_VERSION_MAX,
4856 p->protocol_min, p->protocol_max);
4857 return -1;
4858}
4859
4860#if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004861static int drbd_do_auth(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004862{
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004863 drbd_err(connection, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
4864 drbd_err(connection, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004865 return -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004866}
4867#else
4868#define CHALLENGE_LEN 64
Johannes Thomab10d96c2010-01-07 16:02:50 +01004869
4870/* Return value:
4871 1 - auth succeeded,
4872 0 - failed, try again (network error),
4873 -1 - auth failed, don't try again.
4874*/
4875
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004876static int drbd_do_auth(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004877{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004878 struct drbd_socket *sock;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004879 char my_challenge[CHALLENGE_LEN]; /* 64 Bytes... */
4880 struct scatterlist sg;
4881 char *response = NULL;
4882 char *right_response = NULL;
4883 char *peers_ch = NULL;
Philipp Reisner44ed1672011-04-19 17:10:19 +02004884 unsigned int key_len;
4885 char secret[SHARED_SECRET_MAX]; /* 64 byte */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004886 unsigned int resp_size;
4887 struct hash_desc desc;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004888 struct packet_info pi;
Philipp Reisner44ed1672011-04-19 17:10:19 +02004889 struct net_conf *nc;
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004890 int err, rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004891
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004892 /* FIXME: Put the challenge/response into the preallocated socket buffer. */
4893
Philipp Reisner44ed1672011-04-19 17:10:19 +02004894 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004895 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +02004896 key_len = strlen(nc->shared_secret);
4897 memcpy(secret, nc->shared_secret, key_len);
4898 rcu_read_unlock();
4899
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004900 desc.tfm = connection->cram_hmac_tfm;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004901 desc.flags = 0;
4902
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004903 rv = crypto_hash_setkey(connection->cram_hmac_tfm, (u8 *)secret, key_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004904 if (rv) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004905 drbd_err(connection, "crypto_hash_setkey() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004906 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004907 goto fail;
4908 }
4909
4910 get_random_bytes(my_challenge, CHALLENGE_LEN);
4911
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004912 sock = &connection->data;
4913 if (!conn_prepare_command(connection, sock)) {
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004914 rv = 0;
4915 goto fail;
4916 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004917 rv = !conn_send_command(connection, sock, P_AUTH_CHALLENGE, 0,
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004918 my_challenge, CHALLENGE_LEN);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004919 if (!rv)
4920 goto fail;
4921
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004922 err = drbd_recv_header(connection, &pi);
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004923 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004924 rv = 0;
4925 goto fail;
4926 }
4927
Philipp Reisner77351055b2011-02-07 17:24:26 +01004928 if (pi.cmd != P_AUTH_CHALLENGE) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004929 drbd_err(connection, "expected AuthChallenge packet, received: %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004930 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004931 rv = 0;
4932 goto fail;
4933 }
4934
Philipp Reisner77351055b2011-02-07 17:24:26 +01004935 if (pi.size > CHALLENGE_LEN * 2) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004936 drbd_err(connection, "expected AuthChallenge payload too big.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004937 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004938 goto fail;
4939 }
4940
Philipp Reisner67cca282014-04-28 18:43:30 +02004941 if (pi.size < CHALLENGE_LEN) {
4942 drbd_err(connection, "AuthChallenge payload too small.\n");
4943 rv = -1;
4944 goto fail;
4945 }
4946
Philipp Reisner77351055b2011-02-07 17:24:26 +01004947 peers_ch = kmalloc(pi.size, GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004948 if (peers_ch == NULL) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004949 drbd_err(connection, "kmalloc of peers_ch failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004950 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004951 goto fail;
4952 }
4953
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004954 err = drbd_recv_all_warn(connection, peers_ch, pi.size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004955 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004956 rv = 0;
4957 goto fail;
4958 }
4959
Philipp Reisner67cca282014-04-28 18:43:30 +02004960 if (!memcmp(my_challenge, peers_ch, CHALLENGE_LEN)) {
4961 drbd_err(connection, "Peer presented the same challenge!\n");
4962 rv = -1;
4963 goto fail;
4964 }
4965
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004966 resp_size = crypto_hash_digestsize(connection->cram_hmac_tfm);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004967 response = kmalloc(resp_size, GFP_NOIO);
4968 if (response == NULL) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004969 drbd_err(connection, "kmalloc of response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004970 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004971 goto fail;
4972 }
4973
4974 sg_init_table(&sg, 1);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004975 sg_set_buf(&sg, peers_ch, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004976
4977 rv = crypto_hash_digest(&desc, &sg, sg.length, response);
4978 if (rv) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004979 drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004980 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004981 goto fail;
4982 }
4983
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004984 if (!conn_prepare_command(connection, sock)) {
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004985 rv = 0;
4986 goto fail;
4987 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004988 rv = !conn_send_command(connection, sock, P_AUTH_RESPONSE, 0,
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004989 response, resp_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004990 if (!rv)
4991 goto fail;
4992
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004993 err = drbd_recv_header(connection, &pi);
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004994 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004995 rv = 0;
4996 goto fail;
4997 }
4998
Philipp Reisner77351055b2011-02-07 17:24:26 +01004999 if (pi.cmd != P_AUTH_RESPONSE) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005000 drbd_err(connection, "expected AuthResponse packet, received: %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02005001 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005002 rv = 0;
5003 goto fail;
5004 }
5005
Philipp Reisner77351055b2011-02-07 17:24:26 +01005006 if (pi.size != resp_size) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005007 drbd_err(connection, "expected AuthResponse payload of wrong size\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005008 rv = 0;
5009 goto fail;
5010 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005011
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005012 err = drbd_recv_all_warn(connection, response , resp_size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01005013 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07005014 rv = 0;
5015 goto fail;
5016 }
5017
5018 right_response = kmalloc(resp_size, GFP_NOIO);
Julia Lawall2d1ee872009-12-27 22:27:11 +01005019 if (right_response == NULL) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005020 drbd_err(connection, "kmalloc of right_response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01005021 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005022 goto fail;
5023 }
5024
5025 sg_set_buf(&sg, my_challenge, CHALLENGE_LEN);
5026
5027 rv = crypto_hash_digest(&desc, &sg, sg.length, right_response);
5028 if (rv) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005029 drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01005030 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005031 goto fail;
5032 }
5033
5034 rv = !memcmp(response, right_response, resp_size);
5035
5036 if (rv)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005037 drbd_info(connection, "Peer authenticated using %d bytes HMAC\n",
Philipp Reisner44ed1672011-04-19 17:10:19 +02005038 resp_size);
Johannes Thomab10d96c2010-01-07 16:02:50 +01005039 else
5040 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005041
5042 fail:
5043 kfree(peers_ch);
5044 kfree(response);
5045 kfree(right_response);
5046
5047 return rv;
5048}
5049#endif
5050
Andreas Gruenbacher8fe60552011-07-22 11:04:36 +02005051int drbd_receiver(struct drbd_thread *thi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005052{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005053 struct drbd_connection *connection = thi->connection;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005054 int h;
5055
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005056 drbd_info(connection, "receiver (re)started\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005057
5058 do {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005059 h = conn_connect(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005060 if (h == 0) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005061 conn_disconnect(connection);
Philipp Reisner20ee6392011-01-18 15:28:59 +01005062 schedule_timeout_interruptible(HZ);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005063 }
5064 if (h == -1) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005065 drbd_warn(connection, "Discarding network configuration.\n");
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005066 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005067 }
5068 } while (h == 0);
5069
Philipp Reisner91fd4da2011-04-20 17:47:29 +02005070 if (h > 0)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005071 drbdd(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005072
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005073 conn_disconnect(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005074
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005075 drbd_info(connection, "receiver terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005076 return 0;
5077}
5078
5079/* ********* acknowledge sender ******** */
5080
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005081static int got_conn_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005082{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005083 struct p_req_state_reply *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005084 int retcode = be32_to_cpu(p->retcode);
5085
5086 if (retcode >= SS_SUCCESS) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005087 set_bit(CONN_WD_ST_CHG_OKAY, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005088 } else {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005089 set_bit(CONN_WD_ST_CHG_FAIL, &connection->flags);
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005090 drbd_err(connection, "Requested state change failed by peer: %s (%d)\n",
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01005091 drbd_set_st_err_str(retcode), retcode);
5092 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005093 wake_up(&connection->ping_wait);
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01005094
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005095 return 0;
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01005096}
5097
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005098static int got_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005099{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005100 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005101 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005102 struct p_req_state_reply *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005103 int retcode = be32_to_cpu(p->retcode);
5104
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005105 peer_device = conn_peer_device(connection, pi->vnr);
5106 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005107 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005108 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005109
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005110 if (test_bit(CONN_WD_ST_CHG_REQ, &connection->flags)) {
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02005111 D_ASSERT(device, connection->agreed_pro_version < 100);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005112 return got_conn_RqSReply(connection, pi);
Philipp Reisner4d0fc3f2012-01-20 13:52:27 +01005113 }
5114
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01005115 if (retcode >= SS_SUCCESS) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005116 set_bit(CL_ST_CHG_SUCCESS, &device->flags);
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01005117 } else {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005118 set_bit(CL_ST_CHG_FAIL, &device->flags);
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02005119 drbd_err(device, "Requested state change failed by peer: %s (%d)\n",
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01005120 drbd_set_st_err_str(retcode), retcode);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005121 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005122 wake_up(&device->state_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005123
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005124 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005125}
5126
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005127static int got_Ping(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005128{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005129 return drbd_send_ping_ack(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005130
5131}
5132
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005133static int got_PingAck(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005134{
5135 /* restore idle timeout */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005136 connection->meta.socket->sk->sk_rcvtimeo = connection->net_conf->ping_int*HZ;
5137 if (!test_and_set_bit(GOT_PING_ACK, &connection->flags))
5138 wake_up(&connection->ping_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005139
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005140 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005141}
5142
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005143static int got_IsInSync(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005144{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005145 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005146 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005147 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005148 sector_t sector = be64_to_cpu(p->sector);
5149 int blksize = be32_to_cpu(p->blksize);
5150
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005151 peer_device = conn_peer_device(connection, pi->vnr);
5152 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005153 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005154 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005155
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005156 D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005157
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005158 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005159
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005160 if (get_ldev(device)) {
5161 drbd_rs_complete_io(device, sector);
5162 drbd_set_in_sync(device, sector, blksize);
Lars Ellenberg1d53f092010-09-05 01:13:24 +02005163 /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005164 device->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
5165 put_ldev(device);
Lars Ellenberg1d53f092010-09-05 01:13:24 +02005166 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005167 dec_rs_pending(device);
5168 atomic_add(blksize >> 9, &device->rs_sect_in);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005169
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005170 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005171}
5172
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01005173static int
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005174validate_req_change_req_state(struct drbd_device *device, u64 id, sector_t sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01005175 struct rb_root *root, const char *func,
5176 enum drbd_req_event what, bool missing_ok)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005177{
5178 struct drbd_request *req;
5179 struct bio_and_error m;
5180
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005181 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005182 req = find_request(device, root, id, sector, missing_ok, func);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005183 if (unlikely(!req)) {
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005184 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005185 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005186 }
5187 __req_mod(req, what, &m);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005188 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005189
5190 if (m.bio)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005191 complete_master_bio(device, &m);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005192 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005193}
5194
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005195static int got_BlockAck(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005196{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005197 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005198 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005199 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005200 sector_t sector = be64_to_cpu(p->sector);
5201 int blksize = be32_to_cpu(p->blksize);
5202 enum drbd_req_event what;
5203
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005204 peer_device = conn_peer_device(connection, pi->vnr);
5205 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005206 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005207 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005208
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005209 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005210
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01005211 if (p->block_id == ID_SYNCER) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005212 drbd_set_in_sync(device, sector, blksize);
5213 dec_rs_pending(device);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005214 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005215 }
Andreas Gruenbachere05e1e52011-03-25 15:16:26 +01005216 switch (pi->cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07005217 case P_RS_WRITE_ACK:
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01005218 what = WRITE_ACKED_BY_PEER_AND_SIS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005219 break;
5220 case P_WRITE_ACK:
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01005221 what = WRITE_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005222 break;
5223 case P_RECV_ACK:
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01005224 what = RECV_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005225 break;
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02005226 case P_SUPERSEDED:
5227 what = CONFLICT_RESOLVED;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01005228 break;
5229 case P_RETRY_WRITE:
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01005230 what = POSTPONE_WRITE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005231 break;
5232 default:
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005233 BUG();
Philipp Reisnerb411b362009-09-25 16:07:19 -07005234 }
5235
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005236 return validate_req_change_req_state(device, p->block_id, sector,
5237 &device->write_requests, __func__,
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005238 what, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005239}
5240
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005241static int got_NegAck(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005242{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005243 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005244 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005245 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005246 sector_t sector = be64_to_cpu(p->sector);
Philipp Reisner2deb8332011-01-17 18:39:18 +01005247 int size = be32_to_cpu(p->blksize);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005248 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005249
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005250 peer_device = conn_peer_device(connection, pi->vnr);
5251 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005252 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005253 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005254
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005255 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005256
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01005257 if (p->block_id == ID_SYNCER) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005258 dec_rs_pending(device);
5259 drbd_rs_failed_io(device, sector, size);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005260 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005261 }
Philipp Reisner2deb8332011-01-17 18:39:18 +01005262
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005263 err = validate_req_change_req_state(device, p->block_id, sector,
5264 &device->write_requests, __func__,
Philipp Reisner303d1442011-04-13 16:24:47 -07005265 NEG_ACKED, true);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005266 if (err) {
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01005267 /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs.
5268 The master bio might already be completed, therefore the
5269 request is no longer in the collision hash. */
5270 /* In Protocol B we might already have got a P_RECV_ACK
5271 but then get a P_NEG_ACK afterwards. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005272 drbd_set_out_of_sync(device, sector, size);
Philipp Reisner2deb8332011-01-17 18:39:18 +01005273 }
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005274 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005275}
5276
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005277static int got_NegDReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005278{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005279 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005280 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005281 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005282 sector_t sector = be64_to_cpu(p->sector);
5283
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005284 peer_device = conn_peer_device(connection, pi->vnr);
5285 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005286 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005287 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005288
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005289 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01005290
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02005291 drbd_err(device, "Got NegDReply; Sector %llus, len %u.\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07005292 (unsigned long long)sector, be32_to_cpu(p->blksize));
5293
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005294 return validate_req_change_req_state(device, p->block_id, sector,
5295 &device->read_requests, __func__,
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005296 NEG_ACKED, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005297}
5298
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005299static int got_NegRSDReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005300{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005301 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005302 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005303 sector_t sector;
5304 int size;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005305 struct p_block_ack *p = pi->data;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005306
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005307 peer_device = conn_peer_device(connection, pi->vnr);
5308 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005309 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005310 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005311
5312 sector = be64_to_cpu(p->sector);
5313 size = be32_to_cpu(p->blksize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005314
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005315 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005316
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005317 dec_rs_pending(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005318
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005319 if (get_ldev_if_state(device, D_FAILED)) {
5320 drbd_rs_complete_io(device, sector);
Andreas Gruenbachere05e1e52011-03-25 15:16:26 +01005321 switch (pi->cmd) {
Philipp Reisnerd612d302010-12-27 10:53:28 +01005322 case P_NEG_RS_DREPLY:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005323 drbd_rs_failed_io(device, sector, size);
Philipp Reisnerd612d302010-12-27 10:53:28 +01005324 case P_RS_CANCEL:
5325 break;
5326 default:
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005327 BUG();
Philipp Reisnerd612d302010-12-27 10:53:28 +01005328 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005329 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005330 }
5331
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005332 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005333}
5334
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005335static int got_BarrierAck(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005336{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005337 struct p_barrier_ack *p = pi->data;
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005338 struct drbd_peer_device *peer_device;
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005339 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005340
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005341 tl_release(connection, p->barrier, be32_to_cpu(p->set_size));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005342
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005343 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005344 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5345 struct drbd_device *device = peer_device->device;
5346
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005347 if (device->state.conn == C_AHEAD &&
5348 atomic_read(&device->ap_in_flight) == 0 &&
5349 !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &device->flags)) {
5350 device->start_resync_timer.expires = jiffies + HZ;
5351 add_timer(&device->start_resync_timer);
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005352 }
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02005353 }
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005354 rcu_read_unlock();
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02005355
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005356 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005357}
5358
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005359static int got_OVResult(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005360{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005361 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005362 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005363 struct p_block_ack *p = pi->data;
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02005364 struct drbd_device_work *dw;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005365 sector_t sector;
5366 int size;
5367
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005368 peer_device = conn_peer_device(connection, pi->vnr);
5369 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005370 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005371 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005372
Philipp Reisnerb411b362009-09-25 16:07:19 -07005373 sector = be64_to_cpu(p->sector);
5374 size = be32_to_cpu(p->blksize);
5375
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005376 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005377
5378 if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005379 drbd_ov_out_of_sync_found(device, sector, size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005380 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005381 ov_out_of_sync_print(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005382
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005383 if (!get_ldev(device))
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005384 return 0;
Lars Ellenberg1d53f092010-09-05 01:13:24 +02005385
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005386 drbd_rs_complete_io(device, sector);
5387 dec_rs_pending(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005388
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005389 --device->ov_left;
Lars Ellenbergea5442a2010-11-05 09:48:01 +01005390
5391 /* let's advance progress step marks only for every other megabyte */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005392 if ((device->ov_left & 0x200) == 0x200)
5393 drbd_advance_rs_marks(device, device->ov_left);
Lars Ellenbergea5442a2010-11-05 09:48:01 +01005394
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005395 if (device->ov_left == 0) {
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02005396 dw = kmalloc(sizeof(*dw), GFP_NOIO);
5397 if (dw) {
5398 dw->w.cb = w_ov_finished;
5399 dw->device = device;
5400 drbd_queue_work(&peer_device->connection->sender_work, &dw->w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005401 } else {
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02005402 drbd_err(device, "kmalloc(dw) failed.");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005403 ov_out_of_sync_print(device);
5404 drbd_resync_finished(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005405 }
5406 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005407 put_ldev(device);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005408 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005409}
5410
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005411static int got_skip(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisner0ced55a2010-04-30 15:26:20 +02005412{
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005413 return 0;
Philipp Reisner0ced55a2010-04-30 15:26:20 +02005414}
5415
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005416static int connection_finish_peer_reqs(struct drbd_connection *connection)
Philipp Reisner32862ec2011-02-08 16:41:01 +01005417{
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005418 struct drbd_peer_device *peer_device;
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005419 int vnr, not_empty = 0;
Philipp Reisner32862ec2011-02-08 16:41:01 +01005420
5421 do {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005422 clear_bit(SIGNAL_ASENDER, &connection->flags);
Philipp Reisner32862ec2011-02-08 16:41:01 +01005423 flush_signals(current);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005424
5425 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005426 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5427 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005428 kref_get(&device->kref);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005429 rcu_read_unlock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005430 if (drbd_finish_peer_reqs(device)) {
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02005431 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005432 return 1;
Philipp Reisnerd3fcb492011-04-13 14:46:05 -07005433 }
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02005434 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005435 rcu_read_lock();
Philipp Reisner082a3432011-03-15 16:05:42 +01005436 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005437 set_bit(SIGNAL_ASENDER, &connection->flags);
Philipp Reisner082a3432011-03-15 16:05:42 +01005438
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005439 spin_lock_irq(&connection->resource->req_lock);
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005440 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5441 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005442 not_empty = !list_empty(&device->done_ee);
Philipp Reisner082a3432011-03-15 16:05:42 +01005443 if (not_empty)
5444 break;
5445 }
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005446 spin_unlock_irq(&connection->resource->req_lock);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005447 rcu_read_unlock();
Philipp Reisner32862ec2011-02-08 16:41:01 +01005448 } while (not_empty);
5449
5450 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005451}
5452
5453struct asender_cmd {
5454 size_t pkt_size;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005455 int (*fn)(struct drbd_connection *connection, struct packet_info *);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005456};
5457
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01005458static struct asender_cmd asender_tbl[] = {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005459 [P_PING] = { 0, got_Ping },
5460 [P_PING_ACK] = { 0, got_PingAck },
Philipp Reisnerb411b362009-09-25 16:07:19 -07005461 [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
5462 [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
5463 [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02005464 [P_SUPERSEDED] = { sizeof(struct p_block_ack), got_BlockAck },
Philipp Reisnerb411b362009-09-25 16:07:19 -07005465 [P_NEG_ACK] = { sizeof(struct p_block_ack), got_NegAck },
5466 [P_NEG_DREPLY] = { sizeof(struct p_block_ack), got_NegDReply },
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005467 [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), got_NegRSDReply },
Philipp Reisnerb411b362009-09-25 16:07:19 -07005468 [P_OV_RESULT] = { sizeof(struct p_block_ack), got_OVResult },
5469 [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck },
5470 [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
5471 [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync },
Philipp Reisner02918be2010-08-20 14:35:10 +02005472 [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip },
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005473 [P_RS_CANCEL] = { sizeof(struct p_block_ack), got_NegRSDReply },
5474 [P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), got_conn_RqSReply },
5475 [P_RETRY_WRITE] = { sizeof(struct p_block_ack), got_BlockAck },
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01005476};
Philipp Reisnerb411b362009-09-25 16:07:19 -07005477
5478int drbd_asender(struct drbd_thread *thi)
5479{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005480 struct drbd_connection *connection = thi->connection;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005481 struct asender_cmd *cmd = NULL;
Philipp Reisner77351055b2011-02-07 17:24:26 +01005482 struct packet_info pi;
Philipp Reisner257d0af2011-01-26 12:15:29 +01005483 int rv;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005484 void *buf = connection->meta.rbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005485 int received = 0;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005486 unsigned int header_size = drbd_header_size(connection);
Andreas Gruenbacher52b061a2011-03-30 11:38:49 +02005487 int expect = header_size;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005488 bool ping_timeout_active = false;
5489 struct net_conf *nc;
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005490 int ping_timeo, tcp_cork, ping_int;
Philipp Reisner3990e042013-03-27 14:08:48 +01005491 struct sched_param param = { .sched_priority = 2 };
Philipp Reisnerb411b362009-09-25 16:07:19 -07005492
Philipp Reisner3990e042013-03-27 14:08:48 +01005493 rv = sched_setscheduler(current, SCHED_RR, &param);
5494 if (rv < 0)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005495 drbd_err(connection, "drbd_asender: ERROR set priority, ret=%d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005496
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +01005497 while (get_t_state(thi) == RUNNING) {
Philipp Reisner80822282011-02-08 12:46:30 +01005498 drbd_thread_current_set_cpu(thi);
Philipp Reisner44ed1672011-04-19 17:10:19 +02005499
5500 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005501 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +02005502 ping_timeo = nc->ping_timeo;
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005503 tcp_cork = nc->tcp_cork;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005504 ping_int = nc->ping_int;
5505 rcu_read_unlock();
5506
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005507 if (test_and_clear_bit(SEND_PING, &connection->flags)) {
5508 if (drbd_send_ping(connection)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005509 drbd_err(connection, "drbd_send_ping has failed\n");
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01005510 goto reconnect;
5511 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005512 connection->meta.socket->sk->sk_rcvtimeo = ping_timeo * HZ / 10;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005513 ping_timeout_active = true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005514 }
5515
Philipp Reisner32862ec2011-02-08 16:41:01 +01005516 /* TODO: conditionally cork; it may hurt latency if we cork without
5517 much to send */
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005518 if (tcp_cork)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005519 drbd_tcp_cork(connection->meta.socket);
5520 if (connection_finish_peer_reqs(connection)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005521 drbd_err(connection, "connection_finish_peer_reqs() failed\n");
Philipp Reisner32862ec2011-02-08 16:41:01 +01005522 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005523 }
5524 /* but unconditionally uncork unless disabled */
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005525 if (tcp_cork)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005526 drbd_tcp_uncork(connection->meta.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005527
5528 /* short circuit, recv_msg would return EINTR anyways. */
5529 if (signal_pending(current))
5530 continue;
5531
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005532 rv = drbd_recv_short(connection->meta.socket, buf, expect-received, 0);
5533 clear_bit(SIGNAL_ASENDER, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005534
5535 flush_signals(current);
5536
5537 /* Note:
5538 * -EINTR (on meta) we got a signal
5539 * -EAGAIN (on meta) rcvtimeo expired
5540 * -ECONNRESET other side closed the connection
5541 * -ERESTARTSYS (on data) we got a signal
5542 * rv < 0 other than above: unexpected error!
5543 * rv == expected: full header or command
5544 * rv < expected: "woken" by signal during receive
5545 * rv == 0 : "connection shut down by peer"
5546 */
5547 if (likely(rv > 0)) {
5548 received += rv;
5549 buf += rv;
5550 } else if (rv == 0) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005551 if (test_bit(DISCONNECT_SENT, &connection->flags)) {
Philipp Reisnerb66623e2012-08-08 21:19:09 +02005552 long t;
5553 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005554 t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
Philipp Reisnerb66623e2012-08-08 21:19:09 +02005555 rcu_read_unlock();
5556
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005557 t = wait_event_timeout(connection->ping_wait,
5558 connection->cstate < C_WF_REPORT_PARAMS,
Philipp Reisnerb66623e2012-08-08 21:19:09 +02005559 t);
Philipp Reisner599377a2012-08-17 14:50:22 +02005560 if (t)
5561 break;
5562 }
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005563 drbd_err(connection, "meta connection shut down by peer.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005564 goto reconnect;
5565 } else if (rv == -EAGAIN) {
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02005566 /* If the data socket received something meanwhile,
5567 * that is good enough: peer is still alive. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005568 if (time_after(connection->last_received,
5569 jiffies - connection->meta.socket->sk->sk_rcvtimeo))
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02005570 continue;
Lars Ellenbergf36af182011-03-09 22:44:55 +01005571 if (ping_timeout_active) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005572 drbd_err(connection, "PingAck did not arrive in time.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005573 goto reconnect;
5574 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005575 set_bit(SEND_PING, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005576 continue;
5577 } else if (rv == -EINTR) {
5578 continue;
5579 } else {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005580 drbd_err(connection, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005581 goto reconnect;
5582 }
5583
5584 if (received == expect && cmd == NULL) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005585 if (decode_header(connection, connection->meta.rbuf, &pi))
Philipp Reisnerb411b362009-09-25 16:07:19 -07005586 goto reconnect;
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01005587 cmd = &asender_tbl[pi.cmd];
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005588 if (pi.cmd >= ARRAY_SIZE(asender_tbl) || !cmd->fn) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005589 drbd_err(connection, "Unexpected meta packet %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02005590 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005591 goto disconnect;
5592 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005593 expect = header_size + cmd->pkt_size;
Andreas Gruenbacher52b061a2011-03-30 11:38:49 +02005594 if (pi.size != expect - header_size) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005595 drbd_err(connection, "Wrong packet size on meta (c: %d, l: %d)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01005596 pi.cmd, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005597 goto reconnect;
Philipp Reisner257d0af2011-01-26 12:15:29 +01005598 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005599 }
5600 if (received == expect) {
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005601 bool err;
Philipp Reisnera4fbda82011-03-16 11:13:17 +01005602
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005603 err = cmd->fn(connection, &pi);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005604 if (err) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005605 drbd_err(connection, "%pf failed\n", cmd->fn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005606 goto reconnect;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005607 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005608
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005609 connection->last_received = jiffies;
Lars Ellenbergf36af182011-03-09 22:44:55 +01005610
Philipp Reisner44ed1672011-04-19 17:10:19 +02005611 if (cmd == &asender_tbl[P_PING_ACK]) {
5612 /* restore idle timeout */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005613 connection->meta.socket->sk->sk_rcvtimeo = ping_int * HZ;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005614 ping_timeout_active = false;
5615 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005616
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005617 buf = connection->meta.rbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005618 received = 0;
Andreas Gruenbacher52b061a2011-03-30 11:38:49 +02005619 expect = header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005620 cmd = NULL;
5621 }
5622 }
5623
5624 if (0) {
5625reconnect:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005626 conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
5627 conn_md_sync(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005628 }
5629 if (0) {
5630disconnect:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005631 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005632 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005633 clear_bit(SIGNAL_ASENDER, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005634
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005635 drbd_info(connection, "asender terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005636
5637 return 0;
5638}