blob: b6c8aaf4931bc8434635e74efa004f9ddf8c0304 [file] [log] [blame]
Philipp Reisnerb411b362009-09-25 16:07:19 -07001/*
2 drbd_receiver.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23 */
24
25
Philipp Reisnerb411b362009-09-25 16:07:19 -070026#include <linux/module.h>
27
28#include <asm/uaccess.h>
29#include <net/sock.h>
30
Philipp Reisnerb411b362009-09-25 16:07:19 -070031#include <linux/drbd.h>
32#include <linux/fs.h>
33#include <linux/file.h>
34#include <linux/in.h>
35#include <linux/mm.h>
36#include <linux/memcontrol.h>
37#include <linux/mm_inline.h>
38#include <linux/slab.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070039#include <linux/pkt_sched.h>
40#define __KERNEL_SYSCALLS__
41#include <linux/unistd.h>
42#include <linux/vmalloc.h>
43#include <linux/random.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070044#include <linux/string.h>
45#include <linux/scatterlist.h>
46#include "drbd_int.h"
Andreas Gruenbachera3603a62011-05-30 11:47:37 +020047#include "drbd_protocol.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070048#include "drbd_req.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070049#include "drbd_vli.h"
50
Lars Ellenberg20c68fd2014-04-28 18:43:25 +020051#define PRO_FEATURES (FF_TRIM)
52
Philipp Reisner77351055b2011-02-07 17:24:26 +010053struct packet_info {
54 enum drbd_packet cmd;
Andreas Gruenbachere2857212011-03-25 00:57:38 +010055 unsigned int size;
56 unsigned int vnr;
Andreas Gruenbachere6589832011-03-30 12:54:42 +020057 void *data;
Philipp Reisner77351055b2011-02-07 17:24:26 +010058};
59
Philipp Reisnerb411b362009-09-25 16:07:19 -070060enum finish_epoch {
61 FE_STILL_LIVE,
62 FE_DESTROYED,
63 FE_RECYCLED,
64};
65
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +020066static int drbd_do_features(struct drbd_connection *connection);
67static int drbd_do_auth(struct drbd_connection *connection);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +020068static int drbd_disconnected(struct drbd_peer_device *);
Lars Ellenberga0fb3c42014-04-28 18:43:23 +020069static void conn_wait_active_ee_empty(struct drbd_connection *connection);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +020070static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *, struct drbd_epoch *, enum epoch_event);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +010071static int e_end_block(struct drbd_work *, int);
Philipp Reisnerb411b362009-09-25 16:07:19 -070072
Philipp Reisnerb411b362009-09-25 16:07:19 -070073
74#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
75
Lars Ellenberg45bb9122010-05-14 17:10:48 +020076/*
77 * some helper functions to deal with single linked page lists,
78 * page->private being our "next" pointer.
79 */
80
81/* If at least n pages are linked at head, get n pages off.
82 * Otherwise, don't modify head, and return NULL.
83 * Locking is the responsibility of the caller.
84 */
85static struct page *page_chain_del(struct page **head, int n)
86{
87 struct page *page;
88 struct page *tmp;
89
90 BUG_ON(!n);
91 BUG_ON(!head);
92
93 page = *head;
Philipp Reisner23ce4222010-05-20 13:35:31 +020094
95 if (!page)
96 return NULL;
97
Lars Ellenberg45bb9122010-05-14 17:10:48 +020098 while (page) {
99 tmp = page_chain_next(page);
100 if (--n == 0)
101 break; /* found sufficient pages */
102 if (tmp == NULL)
103 /* insufficient pages, don't use any of them. */
104 return NULL;
105 page = tmp;
106 }
107
108 /* add end of list marker for the returned list */
109 set_page_private(page, 0);
110 /* actual return value, and adjustment of head */
111 page = *head;
112 *head = tmp;
113 return page;
114}
115
116/* may be used outside of locks to find the tail of a (usually short)
117 * "private" page chain, before adding it back to a global chain head
118 * with page_chain_add() under a spinlock. */
119static struct page *page_chain_tail(struct page *page, int *len)
120{
121 struct page *tmp;
122 int i = 1;
123 while ((tmp = page_chain_next(page)))
124 ++i, page = tmp;
125 if (len)
126 *len = i;
127 return page;
128}
129
130static int page_chain_free(struct page *page)
131{
132 struct page *tmp;
133 int i = 0;
134 page_chain_for_each_safe(page, tmp) {
135 put_page(page);
136 ++i;
137 }
138 return i;
139}
140
141static void page_chain_add(struct page **head,
142 struct page *chain_first, struct page *chain_last)
143{
144#if 1
145 struct page *tmp;
146 tmp = page_chain_tail(chain_first, NULL);
147 BUG_ON(tmp != chain_last);
148#endif
149
150 /* add chain to head */
151 set_page_private(chain_last, (unsigned long)*head);
152 *head = chain_first;
153}
154
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200155static struct page *__drbd_alloc_pages(struct drbd_device *device,
Andreas Gruenbacher18c2d522011-04-07 21:08:50 +0200156 unsigned int number)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700157{
158 struct page *page = NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200159 struct page *tmp = NULL;
Andreas Gruenbacher18c2d522011-04-07 21:08:50 +0200160 unsigned int i = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700161
162 /* Yes, testing drbd_pp_vacant outside the lock is racy.
163 * So what. It saves a spin_lock. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200164 if (drbd_pp_vacant >= number) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700165 spin_lock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200166 page = page_chain_del(&drbd_pp_pool, number);
167 if (page)
168 drbd_pp_vacant -= number;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700169 spin_unlock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200170 if (page)
171 return page;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700172 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200173
Philipp Reisnerb411b362009-09-25 16:07:19 -0700174 /* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD
175 * "criss-cross" setup, that might cause write-out on some other DRBD,
176 * which in turn might block on the other node at this very place. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200177 for (i = 0; i < number; i++) {
178 tmp = alloc_page(GFP_TRY);
179 if (!tmp)
180 break;
181 set_page_private(tmp, (unsigned long)page);
182 page = tmp;
183 }
184
185 if (i == number)
186 return page;
187
188 /* Not enough pages immediately available this time.
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200189 * No need to jump around here, drbd_alloc_pages will retry this
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200190 * function "soon". */
191 if (page) {
192 tmp = page_chain_tail(page, NULL);
193 spin_lock(&drbd_pp_lock);
194 page_chain_add(&drbd_pp_pool, page, tmp);
195 drbd_pp_vacant += i;
196 spin_unlock(&drbd_pp_lock);
197 }
198 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700199}
200
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200201static void reclaim_finished_net_peer_reqs(struct drbd_device *device,
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200202 struct list_head *to_be_freed)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700203{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200204 struct drbd_peer_request *peer_req, *tmp;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700205
206 /* The EEs are always appended to the end of the list. Since
207 they are sent in order over the wire, they have to finish
208 in order. As soon as we see the first not finished we can
209 stop to examine the list... */
210
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200211 list_for_each_entry_safe(peer_req, tmp, &device->net_ee, w.list) {
Andreas Gruenbacher045417f2011-04-07 21:34:24 +0200212 if (drbd_peer_req_has_active_page(peer_req))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700213 break;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200214 list_move(&peer_req->w.list, to_be_freed);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700215 }
216}
217
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200218static void drbd_kick_lo_and_reclaim_net(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700219{
220 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100221 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700222
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200223 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200224 reclaim_finished_net_peer_reqs(device, &reclaimed);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200225 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700226
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200227 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200228 drbd_free_net_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700229}
230
231/**
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200232 * drbd_alloc_pages() - Returns @number pages, retries forever (or until signalled)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200233 * @device: DRBD device.
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200234 * @number: number of pages requested
235 * @retry: whether to retry, if not enough pages are available right now
Philipp Reisnerb411b362009-09-25 16:07:19 -0700236 *
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200237 * Tries to allocate number pages, first from our own page pool, then from
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200238 * the kernel.
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200239 * Possibly retry until DRBD frees sufficient pages somewhere else.
240 *
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200241 * If this allocation would exceed the max_buffers setting, we throttle
242 * allocation (schedule_timeout) to give the system some room to breathe.
243 *
244 * We do not use max-buffers as hard limit, because it could lead to
245 * congestion and further to a distributed deadlock during online-verify or
246 * (checksum based) resync, if the max-buffers, socket buffer sizes and
247 * resync-rate settings are mis-configured.
248 *
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200249 * Returns a page chain linked via page->private.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700250 */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200251struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int number,
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200252 bool retry)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700253{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200254 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700255 struct page *page = NULL;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200256 struct net_conf *nc;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700257 DEFINE_WAIT(wait);
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200258 unsigned int mxb;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700259
Philipp Reisner44ed1672011-04-19 17:10:19 +0200260 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200261 nc = rcu_dereference(peer_device->connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200262 mxb = nc ? nc->max_buffers : 1000000;
263 rcu_read_unlock();
264
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200265 if (atomic_read(&device->pp_in_use) < mxb)
266 page = __drbd_alloc_pages(device, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700267
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200268 while (page == NULL) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700269 prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
270
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200271 drbd_kick_lo_and_reclaim_net(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700272
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200273 if (atomic_read(&device->pp_in_use) < mxb) {
274 page = __drbd_alloc_pages(device, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700275 if (page)
276 break;
277 }
278
279 if (!retry)
280 break;
281
282 if (signal_pending(current)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200283 drbd_warn(device, "drbd_alloc_pages interrupted!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700284 break;
285 }
286
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200287 if (schedule_timeout(HZ/10) == 0)
288 mxb = UINT_MAX;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700289 }
290 finish_wait(&drbd_pp_wait, &wait);
291
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200292 if (page)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200293 atomic_add(number, &device->pp_in_use);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700294 return page;
295}
296
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200297/* Must not be used from irq, as that may deadlock: see drbd_alloc_pages.
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200298 * Is also used from inside an other spin_lock_irq(&resource->req_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200299 * Either links the page chain back to the global pool,
300 * or returns all pages to the system. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200301static void drbd_free_pages(struct drbd_device *device, struct page *page, int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700302{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200303 atomic_t *a = is_net ? &device->pp_in_use_by_net : &device->pp_in_use;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700304 int i;
Lars Ellenberg435f0742010-09-06 12:30:25 +0200305
Lars Ellenberga73ff322012-06-25 19:15:38 +0200306 if (page == NULL)
307 return;
308
Philipp Reisner81a5d602011-02-22 19:53:16 -0500309 if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200310 i = page_chain_free(page);
311 else {
312 struct page *tmp;
313 tmp = page_chain_tail(page, &i);
314 spin_lock(&drbd_pp_lock);
315 page_chain_add(&drbd_pp_pool, page, tmp);
316 drbd_pp_vacant += i;
317 spin_unlock(&drbd_pp_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700318 }
Lars Ellenberg435f0742010-09-06 12:30:25 +0200319 i = atomic_sub_return(i, a);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200320 if (i < 0)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200321 drbd_warn(device, "ASSERTION FAILED: %s: %d < 0\n",
Lars Ellenberg435f0742010-09-06 12:30:25 +0200322 is_net ? "pp_in_use_by_net" : "pp_in_use", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700323 wake_up(&drbd_pp_wait);
324}
325
326/*
327You need to hold the req_lock:
328 _drbd_wait_ee_list_empty()
329
330You must not have the req_lock:
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +0200331 drbd_free_peer_req()
Andreas Gruenbacher0db55362011-04-06 16:09:15 +0200332 drbd_alloc_peer_req()
Andreas Gruenbacher7721f562011-04-06 17:14:02 +0200333 drbd_free_peer_reqs()
Philipp Reisnerb411b362009-09-25 16:07:19 -0700334 drbd_ee_fix_bhs()
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200335 drbd_finish_peer_reqs()
Philipp Reisnerb411b362009-09-25 16:07:19 -0700336 drbd_clear_done_ee()
337 drbd_wait_ee_list_empty()
338*/
339
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100340struct drbd_peer_request *
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200341drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
Lars Ellenberga0fb3c42014-04-28 18:43:23 +0200342 unsigned int data_size, bool has_payload, gfp_t gfp_mask) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700343{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200344 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100345 struct drbd_peer_request *peer_req;
Lars Ellenberga73ff322012-06-25 19:15:38 +0200346 struct page *page = NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200347 unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700348
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200349 if (drbd_insert_fault(device, DRBD_FAULT_AL_EE))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700350 return NULL;
351
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100352 peer_req = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
353 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700354 if (!(gfp_mask & __GFP_NOWARN))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200355 drbd_err(device, "%s: allocation failed\n", __func__);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700356 return NULL;
357 }
358
Lars Ellenberga0fb3c42014-04-28 18:43:23 +0200359 if (has_payload && data_size) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200360 page = drbd_alloc_pages(peer_device, nr_pages, (gfp_mask & __GFP_WAIT));
Lars Ellenberga73ff322012-06-25 19:15:38 +0200361 if (!page)
362 goto fail;
363 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700364
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100365 drbd_clear_interval(&peer_req->i);
366 peer_req->i.size = data_size;
367 peer_req->i.sector = sector;
368 peer_req->i.local = false;
369 peer_req->i.waiting = false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700370
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100371 peer_req->epoch = NULL;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200372 peer_req->peer_device = peer_device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100373 peer_req->pages = page;
374 atomic_set(&peer_req->pending_bios, 0);
375 peer_req->flags = 0;
Andreas Gruenbacher9a8e7752011-01-11 14:04:09 +0100376 /*
377 * The block_id is opaque to the receiver. It is not endianness
378 * converted, and sent back to the sender unchanged.
379 */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100380 peer_req->block_id = id;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700381
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100382 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700383
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200384 fail:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100385 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700386 return NULL;
387}
388
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200389void __drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request *peer_req,
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100390 int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700391{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100392 if (peer_req->flags & EE_HAS_DIGEST)
393 kfree(peer_req->digest);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200394 drbd_free_pages(device, peer_req->pages, is_net);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +0200395 D_ASSERT(device, atomic_read(&peer_req->pending_bios) == 0);
396 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100397 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700398}
399
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200400int drbd_free_peer_reqs(struct drbd_device *device, struct list_head *list)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700401{
402 LIST_HEAD(work_list);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100403 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700404 int count = 0;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200405 int is_net = list == &device->net_ee;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700406
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200407 spin_lock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700408 list_splice_init(list, &work_list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200409 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700410
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200411 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200412 __drbd_free_peer_req(device, peer_req, is_net);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700413 count++;
414 }
415 return count;
416}
417
Philipp Reisnerb411b362009-09-25 16:07:19 -0700418/*
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200419 * See also comments in _req_mod(,BARRIER_ACKED) and receive_Barrier.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700420 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200421static int drbd_finish_peer_reqs(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700422{
423 LIST_HEAD(work_list);
424 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100425 struct drbd_peer_request *peer_req, *t;
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100426 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700427
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200428 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200429 reclaim_finished_net_peer_reqs(device, &reclaimed);
430 list_splice_init(&device->done_ee, &work_list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200431 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700432
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200433 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200434 drbd_free_net_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700435
436 /* possible callbacks here:
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +0200437 * e_end_block, and e_end_resync_block, e_send_superseded.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700438 * all ignore the last argument.
439 */
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200440 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100441 int err2;
442
Philipp Reisnerb411b362009-09-25 16:07:19 -0700443 /* list_del not necessary, next/prev members not touched */
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200444 err2 = peer_req->w.cb(&peer_req->w, !!err);
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100445 if (!err)
446 err = err2;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200447 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700448 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200449 wake_up(&device->ee_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700450
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100451 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700452}
453
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200454static void _drbd_wait_ee_list_empty(struct drbd_device *device,
Andreas Gruenbacherd4da1532011-04-07 00:06:56 +0200455 struct list_head *head)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700456{
457 DEFINE_WAIT(wait);
458
459 /* avoids spin_lock/unlock
460 * and calling prepare_to_wait in the fast path */
461 while (!list_empty(head)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200462 prepare_to_wait(&device->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200463 spin_unlock_irq(&device->resource->req_lock);
Jens Axboe7eaceac2011-03-10 08:52:07 +0100464 io_schedule();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200465 finish_wait(&device->ee_wait, &wait);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200466 spin_lock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700467 }
468}
469
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200470static void drbd_wait_ee_list_empty(struct drbd_device *device,
Andreas Gruenbacherd4da1532011-04-07 00:06:56 +0200471 struct list_head *head)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700472{
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200473 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200474 _drbd_wait_ee_list_empty(device, head);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200475 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700476}
477
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100478static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700479{
Philipp Reisnerb411b362009-09-25 16:07:19 -0700480 struct kvec iov = {
481 .iov_base = buf,
482 .iov_len = size,
483 };
484 struct msghdr msg = {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700485 .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
486 };
Al Virof730c842014-02-08 21:07:38 -0500487 return kernel_recvmsg(sock, &msg, &iov, 1, size, msg.msg_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700488}
489
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200490static int drbd_recv(struct drbd_connection *connection, void *buf, size_t size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700491{
Philipp Reisnerb411b362009-09-25 16:07:19 -0700492 int rv;
493
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200494 rv = drbd_recv_short(connection->data.socket, buf, size, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700495
Philipp Reisnerdbd08202012-08-17 16:55:47 +0200496 if (rv < 0) {
497 if (rv == -ECONNRESET)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200498 drbd_info(connection, "sock was reset by peer\n");
Philipp Reisnerdbd08202012-08-17 16:55:47 +0200499 else if (rv != -ERESTARTSYS)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200500 drbd_err(connection, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerdbd08202012-08-17 16:55:47 +0200501 } else if (rv == 0) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200502 if (test_bit(DISCONNECT_SENT, &connection->flags)) {
Philipp Reisnerb66623e2012-08-08 21:19:09 +0200503 long t;
504 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200505 t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
Philipp Reisnerb66623e2012-08-08 21:19:09 +0200506 rcu_read_unlock();
507
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200508 t = wait_event_timeout(connection->ping_wait, connection->cstate < C_WF_REPORT_PARAMS, t);
Philipp Reisnerb66623e2012-08-08 21:19:09 +0200509
Philipp Reisner599377a2012-08-17 14:50:22 +0200510 if (t)
511 goto out;
512 }
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200513 drbd_info(connection, "sock was shut down by peer\n");
Philipp Reisner599377a2012-08-17 14:50:22 +0200514 }
515
Philipp Reisnerb411b362009-09-25 16:07:19 -0700516 if (rv != size)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200517 conn_request_state(connection, NS(conn, C_BROKEN_PIPE), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700518
Philipp Reisner599377a2012-08-17 14:50:22 +0200519out:
Philipp Reisnerb411b362009-09-25 16:07:19 -0700520 return rv;
521}
522
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200523static int drbd_recv_all(struct drbd_connection *connection, void *buf, size_t size)
Andreas Gruenbacherc6967742011-03-17 17:15:20 +0100524{
525 int err;
526
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200527 err = drbd_recv(connection, buf, size);
Andreas Gruenbacherc6967742011-03-17 17:15:20 +0100528 if (err != size) {
529 if (err >= 0)
530 err = -EIO;
531 } else
532 err = 0;
533 return err;
534}
535
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200536static int drbd_recv_all_warn(struct drbd_connection *connection, void *buf, size_t size)
Andreas Gruenbachera5c31902011-03-24 03:28:04 +0100537{
538 int err;
539
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200540 err = drbd_recv_all(connection, buf, size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +0100541 if (err && !signal_pending(current))
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200542 drbd_warn(connection, "short read (expected size %d)\n", (int)size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +0100543 return err;
544}
545
Lars Ellenberg5dbf1672010-05-25 16:18:01 +0200546/* quoting tcp(7):
547 * On individual connections, the socket buffer size must be set prior to the
548 * listen(2) or connect(2) calls in order to have it take effect.
549 * This is our wrapper to do so.
550 */
551static void drbd_setbufsize(struct socket *sock, unsigned int snd,
552 unsigned int rcv)
553{
554 /* open coded SO_SNDBUF, SO_RCVBUF */
555 if (snd) {
556 sock->sk->sk_sndbuf = snd;
557 sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
558 }
559 if (rcv) {
560 sock->sk->sk_rcvbuf = rcv;
561 sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
562 }
563}
564
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200565static struct socket *drbd_try_connect(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700566{
567 const char *what;
568 struct socket *sock;
569 struct sockaddr_in6 src_in6;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200570 struct sockaddr_in6 peer_in6;
571 struct net_conf *nc;
572 int err, peer_addr_len, my_addr_len;
Andreas Gruenbacher69ef82d2011-05-11 14:34:35 +0200573 int sndbuf_size, rcvbuf_size, connect_int;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700574 int disconnect_on_error = 1;
575
Philipp Reisner44ed1672011-04-19 17:10:19 +0200576 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200577 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200578 if (!nc) {
579 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700580 return NULL;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200581 }
Philipp Reisner44ed1672011-04-19 17:10:19 +0200582 sndbuf_size = nc->sndbuf_size;
583 rcvbuf_size = nc->rcvbuf_size;
Andreas Gruenbacher69ef82d2011-05-11 14:34:35 +0200584 connect_int = nc->connect_int;
Andreas Gruenbacher089c0752011-06-14 18:28:09 +0200585 rcu_read_unlock();
Philipp Reisner44ed1672011-04-19 17:10:19 +0200586
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200587 my_addr_len = min_t(int, connection->my_addr_len, sizeof(src_in6));
588 memcpy(&src_in6, &connection->my_addr, my_addr_len);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200589
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200590 if (((struct sockaddr *)&connection->my_addr)->sa_family == AF_INET6)
Philipp Reisner44ed1672011-04-19 17:10:19 +0200591 src_in6.sin6_port = 0;
592 else
593 ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
594
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200595 peer_addr_len = min_t(int, connection->peer_addr_len, sizeof(src_in6));
596 memcpy(&peer_in6, &connection->peer_addr, peer_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700597
598 what = "sock_create_kern";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200599 err = sock_create_kern(((struct sockaddr *)&src_in6)->sa_family,
600 SOCK_STREAM, IPPROTO_TCP, &sock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700601 if (err < 0) {
602 sock = NULL;
603 goto out;
604 }
605
606 sock->sk->sk_rcvtimeo =
Andreas Gruenbacher69ef82d2011-05-11 14:34:35 +0200607 sock->sk->sk_sndtimeo = connect_int * HZ;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200608 drbd_setbufsize(sock, sndbuf_size, rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700609
610 /* explicitly bind to the configured IP as source IP
611 * for the outgoing connections.
612 * This is needed for multihomed hosts and to be
613 * able to use lo: interfaces for drbd.
614 * Make sure to use 0 as port number, so linux selects
615 * a free one dynamically.
616 */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700617 what = "bind before connect";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200618 err = sock->ops->bind(sock, (struct sockaddr *) &src_in6, my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700619 if (err < 0)
620 goto out;
621
622 /* connect may fail, peer not yet available.
623 * stay C_WF_CONNECTION, don't go Disconnecting! */
624 disconnect_on_error = 0;
625 what = "connect";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200626 err = sock->ops->connect(sock, (struct sockaddr *) &peer_in6, peer_addr_len, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700627
628out:
629 if (err < 0) {
630 if (sock) {
631 sock_release(sock);
632 sock = NULL;
633 }
634 switch (-err) {
635 /* timeout, busy, signal pending */
636 case ETIMEDOUT: case EAGAIN: case EINPROGRESS:
637 case EINTR: case ERESTARTSYS:
638 /* peer not (yet) available, network problem */
639 case ECONNREFUSED: case ENETUNREACH:
640 case EHOSTDOWN: case EHOSTUNREACH:
641 disconnect_on_error = 0;
642 break;
643 default:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200644 drbd_err(connection, "%s failed, err = %d\n", what, err);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700645 }
646 if (disconnect_on_error)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200647 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700648 }
Philipp Reisner44ed1672011-04-19 17:10:19 +0200649
Philipp Reisnerb411b362009-09-25 16:07:19 -0700650 return sock;
651}
652
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200653struct accept_wait_data {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200654 struct drbd_connection *connection;
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200655 struct socket *s_listen;
656 struct completion door_bell;
657 void (*original_sk_state_change)(struct sock *sk);
658
659};
660
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200661static void drbd_incoming_connection(struct sock *sk)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700662{
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200663 struct accept_wait_data *ad = sk->sk_user_data;
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200664 void (*state_change)(struct sock *sk);
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200665
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200666 state_change = ad->original_sk_state_change;
667 if (sk->sk_state == TCP_ESTABLISHED)
668 complete(&ad->door_bell);
669 state_change(sk);
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200670}
671
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200672static int prepare_listen_socket(struct drbd_connection *connection, struct accept_wait_data *ad)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700673{
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200674 int err, sndbuf_size, rcvbuf_size, my_addr_len;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200675 struct sockaddr_in6 my_addr;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200676 struct socket *s_listen;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200677 struct net_conf *nc;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700678 const char *what;
679
Philipp Reisner44ed1672011-04-19 17:10:19 +0200680 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200681 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200682 if (!nc) {
683 rcu_read_unlock();
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200684 return -EIO;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200685 }
Philipp Reisner44ed1672011-04-19 17:10:19 +0200686 sndbuf_size = nc->sndbuf_size;
687 rcvbuf_size = nc->rcvbuf_size;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200688 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700689
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200690 my_addr_len = min_t(int, connection->my_addr_len, sizeof(struct sockaddr_in6));
691 memcpy(&my_addr, &connection->my_addr, my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700692
693 what = "sock_create_kern";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200694 err = sock_create_kern(((struct sockaddr *)&my_addr)->sa_family,
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200695 SOCK_STREAM, IPPROTO_TCP, &s_listen);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700696 if (err) {
697 s_listen = NULL;
698 goto out;
699 }
700
Philipp Reisner98683652012-11-09 14:18:43 +0100701 s_listen->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
Philipp Reisner44ed1672011-04-19 17:10:19 +0200702 drbd_setbufsize(s_listen, sndbuf_size, rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700703
704 what = "bind before listen";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200705 err = s_listen->ops->bind(s_listen, (struct sockaddr *)&my_addr, my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700706 if (err < 0)
707 goto out;
708
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200709 ad->s_listen = s_listen;
710 write_lock_bh(&s_listen->sk->sk_callback_lock);
711 ad->original_sk_state_change = s_listen->sk->sk_state_change;
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200712 s_listen->sk->sk_state_change = drbd_incoming_connection;
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200713 s_listen->sk->sk_user_data = ad;
714 write_unlock_bh(&s_listen->sk->sk_callback_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700715
Philipp Reisner2820fd32012-07-12 10:22:48 +0200716 what = "listen";
717 err = s_listen->ops->listen(s_listen, 5);
718 if (err < 0)
719 goto out;
720
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200721 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700722out:
723 if (s_listen)
724 sock_release(s_listen);
725 if (err < 0) {
726 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200727 drbd_err(connection, "%s failed, err = %d\n", what, err);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200728 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700729 }
730 }
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200731
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200732 return -EIO;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200733}
734
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200735static void unregister_state_change(struct sock *sk, struct accept_wait_data *ad)
736{
737 write_lock_bh(&sk->sk_callback_lock);
738 sk->sk_state_change = ad->original_sk_state_change;
739 sk->sk_user_data = NULL;
740 write_unlock_bh(&sk->sk_callback_lock);
741}
742
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200743static struct socket *drbd_wait_for_connect(struct drbd_connection *connection, struct accept_wait_data *ad)
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200744{
745 int timeo, connect_int, err = 0;
746 struct socket *s_estab = NULL;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200747 struct net_conf *nc;
748
749 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200750 nc = rcu_dereference(connection->net_conf);
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200751 if (!nc) {
752 rcu_read_unlock();
753 return NULL;
754 }
755 connect_int = nc->connect_int;
756 rcu_read_unlock();
757
758 timeo = connect_int * HZ;
Akinobu Mita38b682b22013-04-29 16:21:31 -0700759 /* 28.5% random jitter */
760 timeo += (prandom_u32() & 1) ? timeo / 7 : -timeo / 7;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200761
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200762 err = wait_for_completion_interruptible_timeout(&ad->door_bell, timeo);
763 if (err <= 0)
764 return NULL;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200765
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200766 err = kernel_accept(ad->s_listen, &s_estab, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700767 if (err < 0) {
768 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200769 drbd_err(connection, "accept failed, err = %d\n", err);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200770 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700771 }
772 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700773
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200774 if (s_estab)
775 unregister_state_change(s_estab->sk, ad);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700776
777 return s_estab;
778}
779
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200780static int decode_header(struct drbd_connection *, void *, struct packet_info *);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700781
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200782static int send_first_packet(struct drbd_connection *connection, struct drbd_socket *sock,
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200783 enum drbd_packet cmd)
784{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200785 if (!conn_prepare_command(connection, sock))
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200786 return -EIO;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200787 return conn_send_command(connection, sock, cmd, 0, NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700788}
789
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200790static int receive_first_packet(struct drbd_connection *connection, struct socket *sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700791{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200792 unsigned int header_size = drbd_header_size(connection);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200793 struct packet_info pi;
794 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700795
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200796 err = drbd_recv_short(sock, connection->data.rbuf, header_size, 0);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200797 if (err != header_size) {
798 if (err >= 0)
799 err = -EIO;
800 return err;
801 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200802 err = decode_header(connection, connection->data.rbuf, &pi);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200803 if (err)
804 return err;
805 return pi.cmd;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700806}
807
808/**
809 * drbd_socket_okay() - Free the socket if its connection is not okay
Philipp Reisnerb411b362009-09-25 16:07:19 -0700810 * @sock: pointer to the pointer to the socket.
811 */
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100812static int drbd_socket_okay(struct socket **sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700813{
814 int rr;
815 char tb[4];
816
817 if (!*sock)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100818 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700819
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100820 rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700821
822 if (rr > 0 || rr == -EAGAIN) {
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100823 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700824 } else {
825 sock_release(*sock);
826 *sock = NULL;
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100827 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700828 }
829}
Philipp Reisner2325eb62011-03-15 16:56:18 +0100830/* Gets called if a connection is established, or if a new minor gets created
831 in a connection */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200832int drbd_connected(struct drbd_peer_device *peer_device)
Philipp Reisner907599e2011-02-08 11:25:37 +0100833{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200834 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100835 int err;
Philipp Reisner907599e2011-02-08 11:25:37 +0100836
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200837 atomic_set(&device->packet_seq, 0);
838 device->peer_seq = 0;
Philipp Reisner907599e2011-02-08 11:25:37 +0100839
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200840 device->state_mutex = peer_device->connection->agreed_pro_version < 100 ?
841 &peer_device->connection->cstate_mutex :
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200842 &device->own_state_mutex;
Philipp Reisner8410da82011-02-11 20:11:10 +0100843
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200844 err = drbd_send_sync_param(peer_device);
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100845 if (!err)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200846 err = drbd_send_sizes(peer_device, 0, 0);
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100847 if (!err)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200848 err = drbd_send_uuids(peer_device);
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100849 if (!err)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200850 err = drbd_send_current_state(peer_device);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200851 clear_bit(USE_DEGR_WFC_T, &device->flags);
852 clear_bit(RESIZE_PENDING, &device->flags);
853 atomic_set(&device->ap_in_flight, 0);
854 mod_timer(&device->request_timer, jiffies + HZ); /* just start it here. */
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100855 return err;
Philipp Reisner907599e2011-02-08 11:25:37 +0100856}
Philipp Reisnerb411b362009-09-25 16:07:19 -0700857
858/*
859 * return values:
860 * 1 yes, we have a valid connection
861 * 0 oops, did not work out, please try again
862 * -1 peer talks different language,
863 * no point in trying again, please go standalone.
864 * -2 We do not have a network config...
865 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200866static int conn_connect(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700867{
Philipp Reisner7da35862011-12-19 22:42:56 +0100868 struct drbd_socket sock, msock;
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +0200869 struct drbd_peer_device *peer_device;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200870 struct net_conf *nc;
Philipp Reisner92f14952012-08-01 11:41:01 +0200871 int vnr, timeout, h, ok;
Philipp Reisner08b165b2011-09-05 16:22:33 +0200872 bool discard_my_data;
Philipp Reisner197296f2012-03-26 16:47:11 +0200873 enum drbd_state_rv rv;
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200874 struct accept_wait_data ad = {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200875 .connection = connection,
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200876 .door_bell = COMPLETION_INITIALIZER_ONSTACK(ad.door_bell),
877 };
Philipp Reisnerb411b362009-09-25 16:07:19 -0700878
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200879 clear_bit(DISCONNECT_SENT, &connection->flags);
880 if (conn_request_state(connection, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700881 return -2;
882
Philipp Reisner7da35862011-12-19 22:42:56 +0100883 mutex_init(&sock.mutex);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200884 sock.sbuf = connection->data.sbuf;
885 sock.rbuf = connection->data.rbuf;
Philipp Reisner7da35862011-12-19 22:42:56 +0100886 sock.socket = NULL;
887 mutex_init(&msock.mutex);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200888 msock.sbuf = connection->meta.sbuf;
889 msock.rbuf = connection->meta.rbuf;
Philipp Reisner7da35862011-12-19 22:42:56 +0100890 msock.socket = NULL;
891
Andreas Gruenbacher0916e0e2011-03-21 14:10:15 +0100892 /* Assume that the peer only understands protocol 80 until we know better. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200893 connection->agreed_pro_version = 80;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700894
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200895 if (prepare_listen_socket(connection, &ad))
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200896 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700897
898 do {
Andreas Gruenbacher2bf89622011-03-28 16:33:12 +0200899 struct socket *s;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700900
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200901 s = drbd_try_connect(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700902 if (s) {
Philipp Reisner7da35862011-12-19 22:42:56 +0100903 if (!sock.socket) {
904 sock.socket = s;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200905 send_first_packet(connection, &sock, P_INITIAL_DATA);
Philipp Reisner7da35862011-12-19 22:42:56 +0100906 } else if (!msock.socket) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200907 clear_bit(RESOLVE_CONFLICTS, &connection->flags);
Philipp Reisner7da35862011-12-19 22:42:56 +0100908 msock.socket = s;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200909 send_first_packet(connection, &msock, P_INITIAL_META);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700910 } else {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200911 drbd_err(connection, "Logic error in conn_connect()\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700912 goto out_release_sockets;
913 }
914 }
915
Philipp Reisner7da35862011-12-19 22:42:56 +0100916 if (sock.socket && msock.socket) {
917 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200918 nc = rcu_dereference(connection->net_conf);
Philipp Reisner7da35862011-12-19 22:42:56 +0100919 timeout = nc->ping_timeo * HZ / 10;
920 rcu_read_unlock();
921 schedule_timeout_interruptible(timeout);
922 ok = drbd_socket_okay(&sock.socket);
923 ok = drbd_socket_okay(&msock.socket) && ok;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700924 if (ok)
925 break;
926 }
927
928retry:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200929 s = drbd_wait_for_connect(connection, &ad);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700930 if (s) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200931 int fp = receive_first_packet(connection, s);
Philipp Reisner7da35862011-12-19 22:42:56 +0100932 drbd_socket_okay(&sock.socket);
933 drbd_socket_okay(&msock.socket);
Philipp Reisner92f14952012-08-01 11:41:01 +0200934 switch (fp) {
Andreas Gruenbachere5d6f332011-03-28 16:44:40 +0200935 case P_INITIAL_DATA:
Philipp Reisner7da35862011-12-19 22:42:56 +0100936 if (sock.socket) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200937 drbd_warn(connection, "initial packet S crossed\n");
Philipp Reisner7da35862011-12-19 22:42:56 +0100938 sock_release(sock.socket);
Philipp Reisner80c6eed2012-08-01 14:53:39 +0200939 sock.socket = s;
940 goto randomize;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700941 }
Philipp Reisner7da35862011-12-19 22:42:56 +0100942 sock.socket = s;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700943 break;
Andreas Gruenbachere5d6f332011-03-28 16:44:40 +0200944 case P_INITIAL_META:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200945 set_bit(RESOLVE_CONFLICTS, &connection->flags);
Philipp Reisner7da35862011-12-19 22:42:56 +0100946 if (msock.socket) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200947 drbd_warn(connection, "initial packet M crossed\n");
Philipp Reisner7da35862011-12-19 22:42:56 +0100948 sock_release(msock.socket);
Philipp Reisner80c6eed2012-08-01 14:53:39 +0200949 msock.socket = s;
950 goto randomize;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700951 }
Philipp Reisner7da35862011-12-19 22:42:56 +0100952 msock.socket = s;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700953 break;
954 default:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200955 drbd_warn(connection, "Error receiving initial packet\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700956 sock_release(s);
Philipp Reisner80c6eed2012-08-01 14:53:39 +0200957randomize:
Akinobu Mita38b682b22013-04-29 16:21:31 -0700958 if (prandom_u32() & 1)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700959 goto retry;
960 }
961 }
962
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200963 if (connection->cstate <= C_DISCONNECTING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700964 goto out_release_sockets;
965 if (signal_pending(current)) {
966 flush_signals(current);
967 smp_rmb();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200968 if (get_t_state(&connection->receiver) == EXITING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700969 goto out_release_sockets;
970 }
971
Philipp Reisnerb666dbf2012-07-26 14:12:59 +0200972 ok = drbd_socket_okay(&sock.socket);
973 ok = drbd_socket_okay(&msock.socket) && ok;
974 } while (!ok);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700975
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200976 if (ad.s_listen)
977 sock_release(ad.s_listen);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700978
Philipp Reisner98683652012-11-09 14:18:43 +0100979 sock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
980 msock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700981
Philipp Reisner7da35862011-12-19 22:42:56 +0100982 sock.socket->sk->sk_allocation = GFP_NOIO;
983 msock.socket->sk->sk_allocation = GFP_NOIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700984
Philipp Reisner7da35862011-12-19 22:42:56 +0100985 sock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
986 msock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700987
Philipp Reisnerb411b362009-09-25 16:07:19 -0700988 /* NOT YET ...
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200989 * sock.socket->sk->sk_sndtimeo = connection->net_conf->timeout*HZ/10;
Philipp Reisner7da35862011-12-19 22:42:56 +0100990 * sock.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
Andreas Gruenbacher60381782011-03-28 17:05:50 +0200991 * first set it to the P_CONNECTION_FEATURES timeout,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700992 * which we set to 4x the configured ping_timeout. */
Philipp Reisner44ed1672011-04-19 17:10:19 +0200993 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200994 nc = rcu_dereference(connection->net_conf);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700995
Philipp Reisner7da35862011-12-19 22:42:56 +0100996 sock.socket->sk->sk_sndtimeo =
997 sock.socket->sk->sk_rcvtimeo = nc->ping_timeo*4*HZ/10;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200998
Philipp Reisner7da35862011-12-19 22:42:56 +0100999 msock.socket->sk->sk_rcvtimeo = nc->ping_int*HZ;
Philipp Reisner44ed1672011-04-19 17:10:19 +02001000 timeout = nc->timeout * HZ / 10;
Philipp Reisner08b165b2011-09-05 16:22:33 +02001001 discard_my_data = nc->discard_my_data;
Philipp Reisner44ed1672011-04-19 17:10:19 +02001002 rcu_read_unlock();
1003
Philipp Reisner7da35862011-12-19 22:42:56 +01001004 msock.socket->sk->sk_sndtimeo = timeout;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001005
1006 /* we don't want delays.
Lucas De Marchi25985ed2011-03-30 22:57:33 -03001007 * we use TCP_CORK where appropriate, though */
Philipp Reisner7da35862011-12-19 22:42:56 +01001008 drbd_tcp_nodelay(sock.socket);
1009 drbd_tcp_nodelay(msock.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001010
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001011 connection->data.socket = sock.socket;
1012 connection->meta.socket = msock.socket;
1013 connection->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001014
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001015 h = drbd_do_features(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001016 if (h <= 0)
1017 return h;
1018
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001019 if (connection->cram_hmac_tfm) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001020 /* drbd_request_state(device, NS(conn, WFAuth)); */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001021 switch (drbd_do_auth(connection)) {
Johannes Thomab10d96c2010-01-07 16:02:50 +01001022 case -1:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001023 drbd_err(connection, "Authentication of peer failed\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001024 return -1;
Johannes Thomab10d96c2010-01-07 16:02:50 +01001025 case 0:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001026 drbd_err(connection, "Authentication of peer failed, trying again.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01001027 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001028 }
1029 }
1030
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001031 connection->data.socket->sk->sk_sndtimeo = timeout;
1032 connection->data.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001033
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001034 if (drbd_send_protocol(connection) == -EOPNOTSUPP)
Philipp Reisner7e2455c2010-04-22 14:50:23 +02001035 return -1;
Philipp Reisner1e86ac42011-08-04 10:33:08 +02001036
Philipp Reisner31007742014-04-28 18:43:12 +02001037 /* Prevent a race between resync-handshake and
1038 * being promoted to Primary.
1039 *
1040 * Grab and release the state mutex, so we know that any current
1041 * drbd_set_role() is finished, and any incoming drbd_set_role
1042 * will see the STATE_SENT flag, and wait for it to be cleared.
1043 */
1044 idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
1045 mutex_lock(peer_device->device->state_mutex);
1046
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001047 set_bit(STATE_SENT, &connection->flags);
Philipp Reisner197296f2012-03-26 16:47:11 +02001048
Philipp Reisner31007742014-04-28 18:43:12 +02001049 idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
1050 mutex_unlock(peer_device->device->state_mutex);
1051
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02001052 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001053 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1054 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001055 kref_get(&device->kref);
Andreas Gruenbacher26ea8f92013-06-25 16:50:03 +02001056 rcu_read_unlock();
1057
Philipp Reisner08b165b2011-09-05 16:22:33 +02001058 if (discard_my_data)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001059 set_bit(DISCARD_MY_DATA, &device->flags);
Philipp Reisner08b165b2011-09-05 16:22:33 +02001060 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001061 clear_bit(DISCARD_MY_DATA, &device->flags);
Philipp Reisner08b165b2011-09-05 16:22:33 +02001062
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001063 drbd_connected(peer_device);
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001064 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02001065 rcu_read_lock();
1066 }
1067 rcu_read_unlock();
1068
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001069 rv = conn_request_state(connection, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE);
1070 if (rv < SS_SUCCESS || connection->cstate != C_WF_REPORT_PARAMS) {
1071 clear_bit(STATE_SENT, &connection->flags);
Philipp Reisner1e86ac42011-08-04 10:33:08 +02001072 return 0;
Philipp Reisnera1096a62012-04-06 12:07:34 +02001073 }
Philipp Reisner1e86ac42011-08-04 10:33:08 +02001074
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001075 drbd_thread_start(&connection->asender);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001076
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001077 mutex_lock(&connection->resource->conf_update);
Philipp Reisner08b165b2011-09-05 16:22:33 +02001078 /* The discard_my_data flag is a single-shot modifier to the next
1079 * connection attempt, the handshake of which is now well underway.
1080 * No need for rcu style copying of the whole struct
1081 * just to clear a single value. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001082 connection->net_conf->discard_my_data = 0;
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001083 mutex_unlock(&connection->resource->conf_update);
Philipp Reisner08b165b2011-09-05 16:22:33 +02001084
Philipp Reisnerd3fcb492011-04-13 14:46:05 -07001085 return h;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001086
1087out_release_sockets:
Philipp Reisner7a426fd2012-07-12 14:22:37 +02001088 if (ad.s_listen)
1089 sock_release(ad.s_listen);
Philipp Reisner7da35862011-12-19 22:42:56 +01001090 if (sock.socket)
1091 sock_release(sock.socket);
1092 if (msock.socket)
1093 sock_release(msock.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001094 return -1;
1095}
1096
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001097static int decode_header(struct drbd_connection *connection, void *header, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001098{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001099 unsigned int header_size = drbd_header_size(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001100
Andreas Gruenbacher0c8e36d2011-03-30 16:00:17 +02001101 if (header_size == sizeof(struct p_header100) &&
1102 *(__be32 *)header == cpu_to_be32(DRBD_MAGIC_100)) {
1103 struct p_header100 *h = header;
1104 if (h->pad != 0) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001105 drbd_err(connection, "Header padding is not zero\n");
Andreas Gruenbacher0c8e36d2011-03-30 16:00:17 +02001106 return -EINVAL;
1107 }
1108 pi->vnr = be16_to_cpu(h->volume);
1109 pi->cmd = be16_to_cpu(h->command);
1110 pi->size = be32_to_cpu(h->length);
1111 } else if (header_size == sizeof(struct p_header95) &&
1112 *(__be16 *)header == cpu_to_be16(DRBD_MAGIC_BIG)) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001113 struct p_header95 *h = header;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001114 pi->cmd = be16_to_cpu(h->command);
Andreas Gruenbacherb55d84b2011-03-22 13:17:47 +01001115 pi->size = be32_to_cpu(h->length);
1116 pi->vnr = 0;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001117 } else if (header_size == sizeof(struct p_header80) &&
1118 *(__be32 *)header == cpu_to_be32(DRBD_MAGIC)) {
1119 struct p_header80 *h = header;
1120 pi->cmd = be16_to_cpu(h->command);
1121 pi->size = be16_to_cpu(h->length);
Philipp Reisner77351055b2011-02-07 17:24:26 +01001122 pi->vnr = 0;
Philipp Reisner02918be2010-08-20 14:35:10 +02001123 } else {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001124 drbd_err(connection, "Wrong magic value 0x%08x in protocol version %d\n",
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001125 be32_to_cpu(*(__be32 *)header),
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001126 connection->agreed_pro_version);
Andreas Gruenbacher8172f3e2011-03-16 17:22:39 +01001127 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001128 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001129 pi->data = header + header_size;
Andreas Gruenbacher8172f3e2011-03-16 17:22:39 +01001130 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001131}
1132
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001133static int drbd_recv_header(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisner257d0af2011-01-26 12:15:29 +01001134{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001135 void *buffer = connection->data.rbuf;
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001136 int err;
Philipp Reisner257d0af2011-01-26 12:15:29 +01001137
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001138 err = drbd_recv_all_warn(connection, buffer, drbd_header_size(connection));
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001139 if (err)
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001140 return err;
Philipp Reisner257d0af2011-01-26 12:15:29 +01001141
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001142 err = decode_header(connection, buffer, pi);
1143 connection->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001144
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001145 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001146}
1147
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001148static void drbd_flush(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001149{
1150 int rv;
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001151 struct drbd_peer_device *peer_device;
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001152 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001153
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001154 if (connection->write_ordering >= WO_bdev_flush) {
Lars Ellenberg615e0872011-11-17 14:32:12 +01001155 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001156 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1157 struct drbd_device *device = peer_device->device;
1158
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001159 if (!get_ldev(device))
Lars Ellenberg615e0872011-11-17 14:32:12 +01001160 continue;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001161 kref_get(&device->kref);
Lars Ellenberg615e0872011-11-17 14:32:12 +01001162 rcu_read_unlock();
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001163
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001164 rv = blkdev_issue_flush(device->ldev->backing_bdev,
Lars Ellenberg615e0872011-11-17 14:32:12 +01001165 GFP_NOIO, NULL);
1166 if (rv) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001167 drbd_info(device, "local disk flush failed with status %d\n", rv);
Lars Ellenberg615e0872011-11-17 14:32:12 +01001168 /* would rather check on EOPNOTSUPP, but that is not reliable.
1169 * don't try again for ANY return value != 0
1170 * if (rv == -EOPNOTSUPP) */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001171 drbd_bump_write_ordering(connection, WO_drain_io);
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001172 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001173 put_ldev(device);
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001174 kref_put(&device->kref, drbd_destroy_device);
Lars Ellenberg615e0872011-11-17 14:32:12 +01001175
1176 rcu_read_lock();
1177 if (rv)
1178 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001179 }
Lars Ellenberg615e0872011-11-17 14:32:12 +01001180 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07001181 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001182}
1183
1184/**
1185 * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it.
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001186 * @device: DRBD device.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001187 * @epoch: Epoch object.
1188 * @ev: Epoch event.
1189 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001190static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *connection,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001191 struct drbd_epoch *epoch,
1192 enum epoch_event ev)
1193{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001194 int epoch_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001195 struct drbd_epoch *next_epoch;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001196 enum finish_epoch rv = FE_STILL_LIVE;
1197
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001198 spin_lock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001199 do {
1200 next_epoch = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001201
1202 epoch_size = atomic_read(&epoch->epoch_size);
1203
1204 switch (ev & ~EV_CLEANUP) {
1205 case EV_PUT:
1206 atomic_dec(&epoch->active);
1207 break;
1208 case EV_GOT_BARRIER_NR:
1209 set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001210 break;
1211 case EV_BECAME_LAST:
1212 /* nothing to do*/
1213 break;
1214 }
1215
Philipp Reisnerb411b362009-09-25 16:07:19 -07001216 if (epoch_size != 0 &&
1217 atomic_read(&epoch->active) == 0 &&
Philipp Reisner80f9fd52011-07-18 15:45:15 +02001218 (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) || ev & EV_CLEANUP)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001219 if (!(ev & EV_CLEANUP)) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001220 spin_unlock(&connection->epoch_lock);
1221 drbd_send_b_ack(epoch->connection, epoch->barrier_nr, epoch_size);
1222 spin_lock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001223 }
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001224#if 0
1225 /* FIXME: dec unacked on connection, once we have
1226 * something to count pending connection packets in. */
Philipp Reisner80f9fd52011-07-18 15:45:15 +02001227 if (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags))
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001228 dec_unacked(epoch->connection);
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001229#endif
Philipp Reisnerb411b362009-09-25 16:07:19 -07001230
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001231 if (connection->current_epoch != epoch) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001232 next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
1233 list_del(&epoch->list);
1234 ev = EV_BECAME_LAST | (ev & EV_CLEANUP);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001235 connection->epochs--;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001236 kfree(epoch);
1237
1238 if (rv == FE_STILL_LIVE)
1239 rv = FE_DESTROYED;
1240 } else {
1241 epoch->flags = 0;
1242 atomic_set(&epoch->epoch_size, 0);
Uwe Kleine-König698f9312010-07-02 20:41:51 +02001243 /* atomic_set(&epoch->active, 0); is already zero */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001244 if (rv == FE_STILL_LIVE)
1245 rv = FE_RECYCLED;
1246 }
1247 }
1248
1249 if (!next_epoch)
1250 break;
1251
1252 epoch = next_epoch;
1253 } while (1);
1254
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001255 spin_unlock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001256
Philipp Reisnerb411b362009-09-25 16:07:19 -07001257 return rv;
1258}
1259
1260/**
1261 * drbd_bump_write_ordering() - Fall back to an other write ordering method
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001262 * @connection: DRBD connection.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001263 * @wo: Write ordering method to try.
1264 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001265void drbd_bump_write_ordering(struct drbd_connection *connection, enum write_ordering_e wo)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001266{
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001267 struct disk_conf *dc;
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001268 struct drbd_peer_device *peer_device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001269 enum write_ordering_e pwo;
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001270 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001271 static char *write_ordering_str[] = {
1272 [WO_none] = "none",
1273 [WO_drain_io] = "drain",
1274 [WO_bdev_flush] = "flush",
Philipp Reisnerb411b362009-09-25 16:07:19 -07001275 };
1276
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001277 pwo = connection->write_ordering;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001278 wo = min(pwo, wo);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001279 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001280 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1281 struct drbd_device *device = peer_device->device;
1282
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001283 if (!get_ldev_if_state(device, D_ATTACHING))
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001284 continue;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001285 dc = rcu_dereference(device->ldev->disk_conf);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001286
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001287 if (wo == WO_bdev_flush && !dc->disk_flushes)
1288 wo = WO_drain_io;
1289 if (wo == WO_drain_io && !dc->disk_drain)
1290 wo = WO_none;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001291 put_ldev(device);
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001292 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001293 rcu_read_unlock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001294 connection->write_ordering = wo;
1295 if (pwo != connection->write_ordering || wo == WO_bdev_flush)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001296 drbd_info(connection, "Method to ensure write ordering: %s\n", write_ordering_str[connection->write_ordering]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001297}
1298
1299/**
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001300 * drbd_submit_peer_request()
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001301 * @device: DRBD device.
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001302 * @peer_req: peer request
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001303 * @rw: flag field, see bio->bi_rw
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001304 *
1305 * May spread the pages to multiple bios,
1306 * depending on bio_add_page restrictions.
1307 *
1308 * Returns 0 if all bios have been submitted,
1309 * -ENOMEM if we could not allocate enough bios,
1310 * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a
1311 * single page to an empty bio (which should never happen and likely indicates
1312 * that the lower level IO stack is in some way broken). This has been observed
1313 * on certain Xen deployments.
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001314 */
1315/* TODO allocate from our own bio_set. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001316int drbd_submit_peer_request(struct drbd_device *device,
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001317 struct drbd_peer_request *peer_req,
1318 const unsigned rw, const int fault_type)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001319{
1320 struct bio *bios = NULL;
1321 struct bio *bio;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001322 struct page *page = peer_req->pages;
1323 sector_t sector = peer_req->i.sector;
1324 unsigned ds = peer_req->i.size;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001325 unsigned n_bios = 0;
1326 unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT;
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001327 int err = -ENOMEM;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001328
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001329 if (peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) {
1330 /* wait for all pending IO completions, before we start
1331 * zeroing things out. */
1332 conn_wait_active_ee_empty(first_peer_device(device)->connection);
1333 if (blkdev_issue_zeroout(device->ldev->backing_bdev,
1334 sector, ds >> 9, GFP_NOIO))
1335 peer_req->flags |= EE_WAS_ERROR;
1336 drbd_endio_write_sec_final(peer_req);
1337 return 0;
1338 }
1339
1340 if (peer_req->flags & EE_IS_TRIM)
1341 nr_pages = 0; /* discards don't have any payload. */
1342
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001343 /* In most cases, we will only need one bio. But in case the lower
1344 * level restrictions happen to be different at this offset on this
1345 * side than those of the sending peer, we may need to submit the
Lars Ellenberg9476f392011-02-23 17:02:01 +01001346 * request in more than one bio.
1347 *
1348 * Plain bio_alloc is good enough here, this is no DRBD internally
1349 * generated bio, but a bio allocated on behalf of the peer.
1350 */
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001351next_bio:
1352 bio = bio_alloc(GFP_NOIO, nr_pages);
1353 if (!bio) {
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001354 drbd_err(device, "submit_ee: Allocation of a bio failed (nr_pages=%u)\n", nr_pages);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001355 goto fail;
1356 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001357 /* > peer_req->i.sector, unless this is the first bio */
Kent Overstreet4f024f32013-10-11 15:44:27 -07001358 bio->bi_iter.bi_sector = sector;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001359 bio->bi_bdev = device->ldev->backing_bdev;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001360 bio->bi_rw = rw;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001361 bio->bi_private = peer_req;
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001362 bio->bi_end_io = drbd_peer_request_endio;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001363
1364 bio->bi_next = bios;
1365 bios = bio;
1366 ++n_bios;
1367
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001368 if (rw & REQ_DISCARD) {
1369 bio->bi_iter.bi_size = ds;
1370 goto submit;
1371 }
1372
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001373 page_chain_for_each(page) {
1374 unsigned len = min_t(unsigned, ds, PAGE_SIZE);
1375 if (!bio_add_page(bio, page, len, 0)) {
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001376 /* A single page must always be possible!
1377 * But in case it fails anyways,
1378 * we deal with it, and complain (below). */
1379 if (bio->bi_vcnt == 0) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001380 drbd_err(device,
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001381 "bio_add_page failed for len=%u, "
1382 "bi_vcnt=0 (bi_sector=%llu)\n",
Kent Overstreet4f024f32013-10-11 15:44:27 -07001383 len, (uint64_t)bio->bi_iter.bi_sector);
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001384 err = -ENOSPC;
1385 goto fail;
1386 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001387 goto next_bio;
1388 }
1389 ds -= len;
1390 sector += len >> 9;
1391 --nr_pages;
1392 }
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001393 D_ASSERT(device, ds == 0);
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001394submit:
1395 D_ASSERT(device, page == NULL);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001396
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001397 atomic_set(&peer_req->pending_bios, n_bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001398 do {
1399 bio = bios;
1400 bios = bios->bi_next;
1401 bio->bi_next = NULL;
1402
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001403 drbd_generic_make_request(device, fault_type, bio);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001404 } while (bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001405 return 0;
1406
1407fail:
1408 while (bios) {
1409 bio = bios;
1410 bios = bios->bi_next;
1411 bio_put(bio);
1412 }
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001413 return err;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001414}
1415
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001416static void drbd_remove_epoch_entry_interval(struct drbd_device *device,
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001417 struct drbd_peer_request *peer_req)
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001418{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001419 struct drbd_interval *i = &peer_req->i;
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001420
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001421 drbd_remove_interval(&device->write_requests, i);
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001422 drbd_clear_interval(i);
1423
Andreas Gruenbacher6c852be2011-02-04 15:38:52 +01001424 /* Wake up any processes waiting for this peer request to complete. */
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001425 if (i->waiting)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001426 wake_up(&device->misc_wait);
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001427}
1428
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001429static void conn_wait_active_ee_empty(struct drbd_connection *connection)
Philipp Reisner77fede52011-11-10 21:19:11 +01001430{
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001431 struct drbd_peer_device *peer_device;
Philipp Reisner77fede52011-11-10 21:19:11 +01001432 int vnr;
1433
1434 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001435 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1436 struct drbd_device *device = peer_device->device;
1437
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001438 kref_get(&device->kref);
Philipp Reisner77fede52011-11-10 21:19:11 +01001439 rcu_read_unlock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001440 drbd_wait_ee_list_empty(device, &device->active_ee);
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001441 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisner77fede52011-11-10 21:19:11 +01001442 rcu_read_lock();
1443 }
1444 rcu_read_unlock();
1445}
1446
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001447static struct drbd_peer_device *
1448conn_peer_device(struct drbd_connection *connection, int volume_number)
1449{
1450 return idr_find(&connection->peer_devices, volume_number);
1451}
1452
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001453static int receive_Barrier(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001454{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001455 int rv;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001456 struct p_barrier *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001457 struct drbd_epoch *epoch;
1458
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001459 /* FIXME these are unacked on connection,
1460 * not a specific (peer)device.
1461 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001462 connection->current_epoch->barrier_nr = p->barrier;
1463 connection->current_epoch->connection = connection;
1464 rv = drbd_may_finish_epoch(connection, connection->current_epoch, EV_GOT_BARRIER_NR);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001465
1466 /* P_BARRIER_ACK may imply that the corresponding extent is dropped from
1467 * the activity log, which means it would not be resynced in case the
1468 * R_PRIMARY crashes now.
1469 * Therefore we must send the barrier_ack after the barrier request was
1470 * completed. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001471 switch (connection->write_ordering) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001472 case WO_none:
1473 if (rv == FE_RECYCLED)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001474 return 0;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001475
1476 /* receiver context, in the writeout path of the other node.
1477 * avoid potential distributed deadlock */
1478 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1479 if (epoch)
1480 break;
1481 else
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001482 drbd_warn(connection, "Allocation of an epoch failed, slowing down\n");
Philipp Reisner2451fc32010-08-24 13:43:11 +02001483 /* Fall through */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001484
1485 case WO_bdev_flush:
1486 case WO_drain_io:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001487 conn_wait_active_ee_empty(connection);
1488 drbd_flush(connection);
Philipp Reisner2451fc32010-08-24 13:43:11 +02001489
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001490 if (atomic_read(&connection->current_epoch->epoch_size)) {
Philipp Reisner2451fc32010-08-24 13:43:11 +02001491 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1492 if (epoch)
1493 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001494 }
1495
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001496 return 0;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001497 default:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001498 drbd_err(connection, "Strangeness in connection->write_ordering %d\n", connection->write_ordering);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001499 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001500 }
1501
1502 epoch->flags = 0;
1503 atomic_set(&epoch->epoch_size, 0);
1504 atomic_set(&epoch->active, 0);
1505
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001506 spin_lock(&connection->epoch_lock);
1507 if (atomic_read(&connection->current_epoch->epoch_size)) {
1508 list_add(&epoch->list, &connection->current_epoch->list);
1509 connection->current_epoch = epoch;
1510 connection->epochs++;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001511 } else {
1512 /* The current_epoch got recycled while we allocated this one... */
1513 kfree(epoch);
1514 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001515 spin_unlock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001516
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001517 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001518}
1519
1520/* used from receive_RSDataReply (recv_resync_read)
1521 * and from receive_Data */
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +01001522static struct drbd_peer_request *
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001523read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001524 struct packet_info *pi) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001525{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001526 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001527 const sector_t capacity = drbd_get_capacity(device->this_bdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001528 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001529 struct page *page;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001530 int dgs, ds, err;
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001531 int data_size = pi->size;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001532 void *dig_in = peer_device->connection->int_dig_in;
1533 void *dig_vv = peer_device->connection->int_dig_vv;
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001534 unsigned long *data;
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001535 struct p_trim *trim = (pi->cmd == P_TRIM) ? pi->data : NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001536
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001537 dgs = 0;
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001538 if (!trim && peer_device->connection->peer_integrity_tfm) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001539 dgs = crypto_hash_digestsize(peer_device->connection->peer_integrity_tfm);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001540 /*
1541 * FIXME: Receive the incoming digest into the receive buffer
1542 * here, together with its struct p_data?
1543 */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001544 err = drbd_recv_all_warn(peer_device->connection, dig_in, dgs);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001545 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001546 return NULL;
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001547 data_size -= dgs;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001548 }
1549
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001550 if (trim) {
1551 D_ASSERT(peer_device, data_size == 0);
1552 data_size = be32_to_cpu(trim->size);
1553 }
1554
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01001555 if (!expect(IS_ALIGNED(data_size, 512)))
1556 return NULL;
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001557 /* prepare for larger trim requests. */
1558 if (!trim && !expect(data_size <= DRBD_MAX_BIO_SIZE))
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01001559 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001560
Lars Ellenberg66660322010-04-06 12:15:04 +02001561 /* even though we trust out peer,
1562 * we sometimes have to double check. */
1563 if (sector + (data_size>>9) > capacity) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001564 drbd_err(device, "request from peer beyond end of local disk: "
Lars Ellenbergfdda6542011-01-24 15:11:01 +01001565 "capacity: %llus < sector: %llus + size: %u\n",
Lars Ellenberg66660322010-04-06 12:15:04 +02001566 (unsigned long long)capacity,
1567 (unsigned long long)sector, data_size);
1568 return NULL;
1569 }
1570
Philipp Reisnerb411b362009-09-25 16:07:19 -07001571 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
1572 * "criss-cross" setup, that might cause write-out on some other DRBD,
1573 * which in turn might block on the other node at this very place. */
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001574 peer_req = drbd_alloc_peer_req(peer_device, id, sector, data_size, trim == NULL, GFP_NOIO);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001575 if (!peer_req)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001576 return NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001577
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001578 if (trim)
Lars Ellenberg81a35372012-07-30 09:00:54 +02001579 return peer_req;
Lars Ellenberga73ff322012-06-25 19:15:38 +02001580
Philipp Reisnerb411b362009-09-25 16:07:19 -07001581 ds = data_size;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001582 page = peer_req->pages;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001583 page_chain_for_each(page) {
1584 unsigned len = min_t(int, ds, PAGE_SIZE);
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001585 data = kmap(page);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001586 err = drbd_recv_all_warn(peer_device->connection, data, len);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001587 if (drbd_insert_fault(device, DRBD_FAULT_RECEIVE)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001588 drbd_err(device, "Fault injection: Corrupting data on receive\n");
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001589 data[0] = data[0] ^ (unsigned long)-1;
1590 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001591 kunmap(page);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001592 if (err) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001593 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001594 return NULL;
1595 }
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001596 ds -= len;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001597 }
1598
1599 if (dgs) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001600 drbd_csum_ee(peer_device->connection->peer_integrity_tfm, peer_req, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001601 if (memcmp(dig_in, dig_vv, dgs)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001602 drbd_err(device, "Digest integrity check FAILED: %llus +%u\n",
Lars Ellenberg470be442010-11-10 10:36:52 +01001603 (unsigned long long)sector, data_size);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001604 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001605 return NULL;
1606 }
1607 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001608 device->recv_cnt += data_size>>9;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001609 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001610}
1611
1612/* drbd_drain_block() just takes a data block
1613 * out of the socket input buffer, and discards it.
1614 */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001615static int drbd_drain_block(struct drbd_peer_device *peer_device, int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001616{
1617 struct page *page;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001618 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001619 void *data;
1620
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001621 if (!data_size)
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001622 return 0;
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001623
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001624 page = drbd_alloc_pages(peer_device, 1, 1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001625
1626 data = kmap(page);
1627 while (data_size) {
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001628 unsigned int len = min_t(int, data_size, PAGE_SIZE);
1629
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001630 err = drbd_recv_all_warn(peer_device->connection, data, len);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001631 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001632 break;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001633 data_size -= len;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001634 }
1635 kunmap(page);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001636 drbd_free_pages(peer_device->device, page, 0);
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001637 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001638}
1639
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001640static int recv_dless_read(struct drbd_peer_device *peer_device, struct drbd_request *req,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001641 sector_t sector, int data_size)
1642{
Kent Overstreet79886132013-11-23 17:19:00 -08001643 struct bio_vec bvec;
1644 struct bvec_iter iter;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001645 struct bio *bio;
Kent Overstreet79886132013-11-23 17:19:00 -08001646 int dgs, err, expect;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001647 void *dig_in = peer_device->connection->int_dig_in;
1648 void *dig_vv = peer_device->connection->int_dig_vv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001649
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001650 dgs = 0;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001651 if (peer_device->connection->peer_integrity_tfm) {
1652 dgs = crypto_hash_digestsize(peer_device->connection->peer_integrity_tfm);
1653 err = drbd_recv_all_warn(peer_device->connection, dig_in, dgs);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001654 if (err)
1655 return err;
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001656 data_size -= dgs;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001657 }
1658
Philipp Reisnerb411b362009-09-25 16:07:19 -07001659 /* optimistically update recv_cnt. if receiving fails below,
1660 * we disconnect anyways, and counters will be reset. */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001661 peer_device->device->recv_cnt += data_size>>9;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001662
1663 bio = req->master_bio;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001664 D_ASSERT(peer_device->device, sector == bio->bi_iter.bi_sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001665
Kent Overstreet79886132013-11-23 17:19:00 -08001666 bio_for_each_segment(bvec, bio, iter) {
1667 void *mapped = kmap(bvec.bv_page) + bvec.bv_offset;
1668 expect = min_t(int, data_size, bvec.bv_len);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001669 err = drbd_recv_all_warn(peer_device->connection, mapped, expect);
Kent Overstreet79886132013-11-23 17:19:00 -08001670 kunmap(bvec.bv_page);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001671 if (err)
1672 return err;
1673 data_size -= expect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001674 }
1675
1676 if (dgs) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001677 drbd_csum_bio(peer_device->connection->peer_integrity_tfm, bio, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001678 if (memcmp(dig_in, dig_vv, dgs)) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001679 drbd_err(peer_device, "Digest integrity check FAILED. Broken NICs?\n");
Andreas Gruenbacher28284ce2011-03-16 17:54:02 +01001680 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001681 }
1682 }
1683
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001684 D_ASSERT(peer_device->device, data_size == 0);
Andreas Gruenbacher28284ce2011-03-16 17:54:02 +01001685 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001686}
1687
Andreas Gruenbachera990be42011-04-06 17:56:48 +02001688/*
1689 * e_end_resync_block() is called in asender context via
1690 * drbd_finish_peer_reqs().
1691 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001692static int e_end_resync_block(struct drbd_work *w, int unused)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001693{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001694 struct drbd_peer_request *peer_req =
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001695 container_of(w, struct drbd_peer_request, w);
1696 struct drbd_peer_device *peer_device = peer_req->peer_device;
1697 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001698 sector_t sector = peer_req->i.sector;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001699 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001700
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001701 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001702
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001703 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001704 drbd_set_in_sync(device, sector, peer_req->i.size);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001705 err = drbd_send_ack(peer_device, P_RS_WRITE_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001706 } else {
1707 /* Record failure to sync */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001708 drbd_rs_failed_io(device, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001709
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001710 err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001711 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001712 dec_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001713
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001714 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001715}
1716
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001717static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t sector,
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001718 struct packet_info *pi) __releases(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001719{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001720 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001721 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001722
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001723 peer_req = read_in_block(peer_device, ID_SYNCER, sector, pi);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001724 if (!peer_req)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001725 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001726
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001727 dec_rs_pending(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001728
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001729 inc_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001730 /* corresponding dec_unacked() in e_end_resync_block()
1731 * respective _drbd_clear_done_ee */
1732
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001733 peer_req->w.cb = e_end_resync_block;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001734
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001735 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001736 list_add(&peer_req->w.list, &device->sync_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001737 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001738
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001739 atomic_add(pi->size >> 9, &device->rs_sect_ev);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001740 if (drbd_submit_peer_request(device, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0)
Andreas Gruenbachere1c1b0f2011-03-16 17:58:27 +01001741 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001742
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001743 /* don't care for the reason here */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001744 drbd_err(device, "submit failed, triggering re-connect\n");
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001745 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001746 list_del(&peer_req->w.list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001747 spin_unlock_irq(&device->resource->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02001748
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001749 drbd_free_peer_req(device, peer_req);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001750fail:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001751 put_ldev(device);
Andreas Gruenbachere1c1b0f2011-03-16 17:58:27 +01001752 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001753}
1754
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001755static struct drbd_request *
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001756find_request(struct drbd_device *device, struct rb_root *root, u64 id,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001757 sector_t sector, bool missing_ok, const char *func)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001758{
1759 struct drbd_request *req;
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001760
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001761 /* Request object according to our peer */
1762 req = (struct drbd_request *)(unsigned long)id;
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001763 if (drbd_contains_interval(root, sector, &req->i) && req->i.local)
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001764 return req;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001765 if (!missing_ok) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001766 drbd_err(device, "%s: failed to find request 0x%lx, sector %llus\n", func,
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001767 (unsigned long)id, (unsigned long long)sector);
1768 }
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001769 return NULL;
1770}
1771
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001772static int receive_DataReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001773{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001774 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001775 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001776 struct drbd_request *req;
1777 sector_t sector;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001778 int err;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001779 struct p_data *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001780
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001781 peer_device = conn_peer_device(connection, pi->vnr);
1782 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001783 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001784 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001785
1786 sector = be64_to_cpu(p->sector);
1787
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001788 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001789 req = find_request(device, &device->read_requests, p->block_id, sector, false, __func__);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001790 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001791 if (unlikely(!req))
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001792 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001793
Bart Van Assche24c48302011-05-21 18:32:29 +02001794 /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid
Philipp Reisnerb411b362009-09-25 16:07:19 -07001795 * special casing it there for the various failure cases.
1796 * still no race with drbd_fail_pending_reads */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001797 err = recv_dless_read(peer_device, req, sector, pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001798 if (!err)
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001799 req_mod(req, DATA_RECEIVED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001800 /* else: nothing. handled from drbd_disconnect...
1801 * I don't think we may complete this just yet
1802 * in case we are "on-disconnect: freeze" */
1803
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001804 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001805}
1806
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001807static int receive_RSDataReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001808{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001809 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001810 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001811 sector_t sector;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001812 int err;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001813 struct p_data *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001814
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001815 peer_device = conn_peer_device(connection, pi->vnr);
1816 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001817 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001818 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001819
1820 sector = be64_to_cpu(p->sector);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001821 D_ASSERT(device, p->block_id == ID_SYNCER);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001822
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001823 if (get_ldev(device)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001824 /* data is submitted to disk within recv_resync_read.
1825 * corresponding put_ldev done below on error,
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001826 * or in drbd_peer_request_endio. */
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001827 err = recv_resync_read(peer_device, sector, pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001828 } else {
1829 if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001830 drbd_err(device, "Can not write resync data to local disk.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001831
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001832 err = drbd_drain_block(peer_device, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001833
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001834 drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001835 }
1836
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001837 atomic_add(pi->size >> 9, &device->rs_sect_in);
Philipp Reisner778f2712010-07-06 11:14:00 +02001838
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001839 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001840}
1841
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001842static void restart_conflicting_writes(struct drbd_device *device,
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001843 sector_t sector, int size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001844{
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001845 struct drbd_interval *i;
1846 struct drbd_request *req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001847
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001848 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001849 if (!i->local)
1850 continue;
1851 req = container_of(i, struct drbd_request, i);
1852 if (req->rq_state & RQ_LOCAL_PENDING ||
1853 !(req->rq_state & RQ_POSTPONED))
1854 continue;
Lars Ellenberg2312f0b32011-11-24 10:36:25 +01001855 /* as it is RQ_POSTPONED, this will cause it to
1856 * be queued on the retry workqueue. */
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02001857 __req_mod(req, CONFLICT_RESOLVED, NULL);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001858 }
1859}
1860
Andreas Gruenbachera990be42011-04-06 17:56:48 +02001861/*
1862 * e_end_block() is called in asender context via drbd_finish_peer_reqs().
Philipp Reisnerb411b362009-09-25 16:07:19 -07001863 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001864static int e_end_block(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001865{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001866 struct drbd_peer_request *peer_req =
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001867 container_of(w, struct drbd_peer_request, w);
1868 struct drbd_peer_device *peer_device = peer_req->peer_device;
1869 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001870 sector_t sector = peer_req->i.sector;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001871 int err = 0, pcmd;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001872
Philipp Reisner303d1442011-04-13 16:24:47 -07001873 if (peer_req->flags & EE_SEND_WRITE_ACK) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001874 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001875 pcmd = (device->state.conn >= C_SYNC_SOURCE &&
1876 device->state.conn <= C_PAUSED_SYNC_T &&
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001877 peer_req->flags & EE_MAY_SET_IN_SYNC) ?
Philipp Reisnerb411b362009-09-25 16:07:19 -07001878 P_RS_WRITE_ACK : P_WRITE_ACK;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001879 err = drbd_send_ack(peer_device, pcmd, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001880 if (pcmd == P_RS_WRITE_ACK)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001881 drbd_set_in_sync(device, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001882 } else {
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001883 err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001884 /* we expect it to be marked out of sync anyways...
1885 * maybe assert this? */
1886 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001887 dec_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001888 }
1889 /* we delete from the conflict detection hash _after_ we sent out the
1890 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */
Philipp Reisner302bdea2011-04-21 11:36:49 +02001891 if (peer_req->flags & EE_IN_INTERVAL_TREE) {
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001892 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001893 D_ASSERT(device, !drbd_interval_empty(&peer_req->i));
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001894 drbd_remove_epoch_entry_interval(device, peer_req);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001895 if (peer_req->flags & EE_RESTART_REQUESTS)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001896 restart_conflicting_writes(device, sector, peer_req->i.size);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001897 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +01001898 } else
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001899 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001900
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001901 drbd_may_finish_epoch(first_peer_device(device)->connection, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001902
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001903 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001904}
1905
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001906static int e_send_ack(struct drbd_work *w, enum drbd_packet ack)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001907{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001908 struct drbd_peer_request *peer_req =
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001909 container_of(w, struct drbd_peer_request, w);
1910 struct drbd_peer_device *peer_device = peer_req->peer_device;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001911 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001912
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001913 err = drbd_send_ack(peer_device, ack, peer_req);
1914 dec_unacked(peer_device->device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001915
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001916 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001917}
1918
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02001919static int e_send_superseded(struct drbd_work *w, int unused)
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001920{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001921 return e_send_ack(w, P_SUPERSEDED);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001922}
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001923
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001924static int e_send_retry_write(struct drbd_work *w, int unused)
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001925{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001926 struct drbd_peer_request *peer_req =
1927 container_of(w, struct drbd_peer_request, w);
1928 struct drbd_connection *connection = peer_req->peer_device->connection;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001929
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001930 return e_send_ack(w, connection->agreed_pro_version >= 100 ?
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02001931 P_RETRY_WRITE : P_SUPERSEDED);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001932}
1933
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001934static bool seq_greater(u32 a, u32 b)
1935{
1936 /*
1937 * We assume 32-bit wrap-around here.
1938 * For 24-bit wrap-around, we would have to shift:
1939 * a <<= 8; b <<= 8;
1940 */
1941 return (s32)a - (s32)b > 0;
1942}
1943
1944static u32 seq_max(u32 a, u32 b)
1945{
1946 return seq_greater(a, b) ? a : b;
1947}
1948
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001949static void update_peer_seq(struct drbd_peer_device *peer_device, unsigned int peer_seq)
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001950{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001951 struct drbd_device *device = peer_device->device;
Lars Ellenberg3c13b682011-02-23 16:10:01 +01001952 unsigned int newest_peer_seq;
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001953
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001954 if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001955 spin_lock(&device->peer_seq_lock);
1956 newest_peer_seq = seq_max(device->peer_seq, peer_seq);
1957 device->peer_seq = newest_peer_seq;
1958 spin_unlock(&device->peer_seq_lock);
1959 /* wake up only if we actually changed device->peer_seq */
Lars Ellenberg3c13b682011-02-23 16:10:01 +01001960 if (peer_seq == newest_peer_seq)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001961 wake_up(&device->seq_wait);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001962 }
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001963}
1964
Lars Ellenbergd93f6302012-03-26 15:49:13 +02001965static inline int overlaps(sector_t s1, int l1, sector_t s2, int l2)
1966{
1967 return !((s1 + (l1>>9) <= s2) || (s1 >= s2 + (l2>>9)));
1968}
1969
1970/* maybe change sync_ee into interval trees as well? */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001971static bool overlapping_resync_write(struct drbd_device *device, struct drbd_peer_request *peer_req)
Lars Ellenbergd93f6302012-03-26 15:49:13 +02001972{
1973 struct drbd_peer_request *rs_req;
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001974 bool rv = 0;
1975
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001976 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001977 list_for_each_entry(rs_req, &device->sync_ee, w.list) {
Lars Ellenbergd93f6302012-03-26 15:49:13 +02001978 if (overlaps(peer_req->i.sector, peer_req->i.size,
1979 rs_req->i.sector, rs_req->i.size)) {
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001980 rv = 1;
1981 break;
1982 }
1983 }
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001984 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001985
1986 return rv;
1987}
1988
Philipp Reisnerb411b362009-09-25 16:07:19 -07001989/* Called from receive_Data.
1990 * Synchronize packets on sock with packets on msock.
1991 *
1992 * This is here so even when a P_DATA packet traveling via sock overtook an Ack
1993 * packet traveling on msock, they are still processed in the order they have
1994 * been sent.
1995 *
1996 * Note: we don't care for Ack packets overtaking P_DATA packets.
1997 *
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001998 * In case packet_seq is larger than device->peer_seq number, there are
Philipp Reisnerb411b362009-09-25 16:07:19 -07001999 * outstanding packets on the msock. We wait for them to arrive.
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002000 * In case we are the logically next packet, we update device->peer_seq
Philipp Reisnerb411b362009-09-25 16:07:19 -07002001 * ourselves. Correctly handles 32bit wrap around.
2002 *
2003 * Assume we have a 10 GBit connection, that is about 1<<30 byte per second,
2004 * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds
2005 * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have
2006 * 1<<9 == 512 seconds aka ages for the 32bit wrap around...
2007 *
2008 * returns 0 if we may process the packet,
2009 * -ERESTARTSYS if we were interrupted (by disconnect signal). */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002010static int wait_for_and_update_peer_seq(struct drbd_peer_device *peer_device, const u32 peer_seq)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002011{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002012 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002013 DEFINE_WAIT(wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002014 long timeout;
Philipp Reisnerb874d232013-10-23 10:59:16 +02002015 int ret = 0, tp;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002016
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002017 if (!test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags))
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002018 return 0;
2019
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002020 spin_lock(&device->peer_seq_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002021 for (;;) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002022 if (!seq_greater(peer_seq - 1, device->peer_seq)) {
2023 device->peer_seq = seq_max(device->peer_seq, peer_seq);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002024 break;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002025 }
Philipp Reisnerb874d232013-10-23 10:59:16 +02002026
Philipp Reisnerb411b362009-09-25 16:07:19 -07002027 if (signal_pending(current)) {
2028 ret = -ERESTARTSYS;
2029 break;
2030 }
Philipp Reisnerb874d232013-10-23 10:59:16 +02002031
2032 rcu_read_lock();
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002033 tp = rcu_dereference(first_peer_device(device)->connection->net_conf)->two_primaries;
Philipp Reisnerb874d232013-10-23 10:59:16 +02002034 rcu_read_unlock();
2035
2036 if (!tp)
2037 break;
2038
2039 /* Only need to wait if two_primaries is enabled */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002040 prepare_to_wait(&device->seq_wait, &wait, TASK_INTERRUPTIBLE);
2041 spin_unlock(&device->peer_seq_lock);
Philipp Reisner44ed1672011-04-19 17:10:19 +02002042 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002043 timeout = rcu_dereference(peer_device->connection->net_conf)->ping_timeo*HZ/10;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002044 rcu_read_unlock();
Andreas Gruenbacher71b1c1e2011-03-01 15:40:43 +01002045 timeout = schedule_timeout(timeout);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002046 spin_lock(&device->peer_seq_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002047 if (!timeout) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002048 ret = -ETIMEDOUT;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002049 drbd_err(device, "Timed out waiting for missing ack packets; disconnecting\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002050 break;
2051 }
2052 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002053 spin_unlock(&device->peer_seq_lock);
2054 finish_wait(&device->seq_wait, &wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002055 return ret;
2056}
2057
Lars Ellenberg688593c2010-11-17 22:25:03 +01002058/* see also bio_flags_to_wire()
2059 * DRBD_REQ_*, because we need to semantically map the flags to data packet
2060 * flags and back. We may replicate to other kernel versions. */
Andreas Gruenbacher81f0ffd2011-08-30 16:22:33 +02002061static unsigned long wire_flags_to_bio(u32 dpf)
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02002062{
Lars Ellenberg688593c2010-11-17 22:25:03 +01002063 return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
2064 (dpf & DP_FUA ? REQ_FUA : 0) |
2065 (dpf & DP_FLUSH ? REQ_FLUSH : 0) |
2066 (dpf & DP_DISCARD ? REQ_DISCARD : 0);
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02002067}
2068
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002069static void fail_postponed_requests(struct drbd_device *device, sector_t sector,
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002070 unsigned int size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002071{
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002072 struct drbd_interval *i;
2073
2074 repeat:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002075 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002076 struct drbd_request *req;
2077 struct bio_and_error m;
2078
2079 if (!i->local)
2080 continue;
2081 req = container_of(i, struct drbd_request, i);
2082 if (!(req->rq_state & RQ_POSTPONED))
2083 continue;
2084 req->rq_state &= ~RQ_POSTPONED;
2085 __req_mod(req, NEG_ACKED, &m);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002086 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002087 if (m.bio)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002088 complete_master_bio(device, &m);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002089 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002090 goto repeat;
2091 }
2092}
2093
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002094static int handle_write_conflicts(struct drbd_device *device,
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002095 struct drbd_peer_request *peer_req)
2096{
Andreas Gruenbachere33b32d2011-08-30 15:38:04 +02002097 struct drbd_connection *connection = peer_req->peer_device->connection;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002098 bool resolve_conflicts = test_bit(RESOLVE_CONFLICTS, &connection->flags);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002099 sector_t sector = peer_req->i.sector;
2100 const unsigned int size = peer_req->i.size;
2101 struct drbd_interval *i;
2102 bool equal;
2103 int err;
2104
2105 /*
2106 * Inserting the peer request into the write_requests tree will prevent
2107 * new conflicting local requests from being added.
2108 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002109 drbd_insert_interval(&device->write_requests, &peer_req->i);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002110
2111 repeat:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002112 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002113 if (i == &peer_req->i)
2114 continue;
2115
2116 if (!i->local) {
2117 /*
2118 * Our peer has sent a conflicting remote request; this
2119 * should not happen in a two-node setup. Wait for the
2120 * earlier peer request to complete.
2121 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002122 err = drbd_wait_misc(device, i);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002123 if (err)
2124 goto out;
2125 goto repeat;
2126 }
2127
2128 equal = i->sector == sector && i->size == size;
2129 if (resolve_conflicts) {
2130 /*
2131 * If the peer request is fully contained within the
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002132 * overlapping request, it can be considered overwritten
2133 * and thus superseded; otherwise, it will be retried
2134 * once all overlapping requests have completed.
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002135 */
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002136 bool superseded = i->sector <= sector && i->sector +
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002137 (i->size >> 9) >= sector + (size >> 9);
2138
2139 if (!equal)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002140 drbd_alert(device, "Concurrent writes detected: "
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002141 "local=%llus +%u, remote=%llus +%u, "
2142 "assuming %s came first\n",
2143 (unsigned long long)i->sector, i->size,
2144 (unsigned long long)sector, size,
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002145 superseded ? "local" : "remote");
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002146
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002147 inc_unacked(device);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002148 peer_req->w.cb = superseded ? e_send_superseded :
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002149 e_send_retry_write;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002150 list_add_tail(&peer_req->w.list, &device->done_ee);
Andreas Gruenbachere33b32d2011-08-30 15:38:04 +02002151 wake_asender(connection);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002152
2153 err = -ENOENT;
2154 goto out;
2155 } else {
2156 struct drbd_request *req =
2157 container_of(i, struct drbd_request, i);
2158
2159 if (!equal)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002160 drbd_alert(device, "Concurrent writes detected: "
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002161 "local=%llus +%u, remote=%llus +%u\n",
2162 (unsigned long long)i->sector, i->size,
2163 (unsigned long long)sector, size);
2164
2165 if (req->rq_state & RQ_LOCAL_PENDING ||
2166 !(req->rq_state & RQ_POSTPONED)) {
2167 /*
2168 * Wait for the node with the discard flag to
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002169 * decide if this request has been superseded
2170 * or needs to be retried.
2171 * Requests that have been superseded will
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002172 * disappear from the write_requests tree.
2173 *
2174 * In addition, wait for the conflicting
2175 * request to finish locally before submitting
2176 * the conflicting peer request.
2177 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002178 err = drbd_wait_misc(device, &req->i);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002179 if (err) {
Andreas Gruenbachere33b32d2011-08-30 15:38:04 +02002180 _conn_request_state(connection, NS(conn, C_TIMEOUT), CS_HARD);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002181 fail_postponed_requests(device, sector, size);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002182 goto out;
2183 }
2184 goto repeat;
2185 }
2186 /*
2187 * Remember to restart the conflicting requests after
2188 * the new peer request has completed.
2189 */
2190 peer_req->flags |= EE_RESTART_REQUESTS;
2191 }
2192 }
2193 err = 0;
2194
2195 out:
2196 if (err)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002197 drbd_remove_epoch_entry_interval(device, peer_req);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002198 return err;
2199}
2200
Philipp Reisnerb411b362009-09-25 16:07:19 -07002201/* mirrored write */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002202static int receive_Data(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002203{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002204 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002205 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002206 sector_t sector;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002207 struct drbd_peer_request *peer_req;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02002208 struct p_data *p = pi->data;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002209 u32 peer_seq = be32_to_cpu(p->seq_num);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002210 int rw = WRITE;
2211 u32 dp_flags;
Philipp Reisner302bdea2011-04-21 11:36:49 +02002212 int err, tp;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002213
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002214 peer_device = conn_peer_device(connection, pi->vnr);
2215 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002216 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002217 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002218
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002219 if (!get_ldev(device)) {
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002220 int err2;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002221
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002222 err = wait_for_and_update_peer_seq(peer_device, peer_seq);
2223 drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002224 atomic_inc(&connection->current_epoch->epoch_size);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002225 err2 = drbd_drain_block(peer_device, pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002226 if (!err)
2227 err = err2;
2228 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002229 }
2230
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01002231 /*
2232 * Corresponding put_ldev done either below (on various errors), or in
2233 * drbd_peer_request_endio, if we successfully submit the data at the
2234 * end of this function.
2235 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002236
2237 sector = be64_to_cpu(p->sector);
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02002238 peer_req = read_in_block(peer_device, p->block_id, sector, pi);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002239 if (!peer_req) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002240 put_ldev(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002241 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002242 }
2243
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002244 peer_req->w.cb = e_end_block;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002245
Lars Ellenberg688593c2010-11-17 22:25:03 +01002246 dp_flags = be32_to_cpu(p->dp_flags);
Andreas Gruenbacher81f0ffd2011-08-30 16:22:33 +02002247 rw |= wire_flags_to_bio(dp_flags);
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02002248 if (pi->cmd == P_TRIM) {
2249 struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev);
2250 peer_req->flags |= EE_IS_TRIM;
2251 if (!blk_queue_discard(q))
2252 peer_req->flags |= EE_IS_TRIM_USE_ZEROOUT;
2253 D_ASSERT(peer_device, peer_req->i.size > 0);
2254 D_ASSERT(peer_device, rw & REQ_DISCARD);
2255 D_ASSERT(peer_device, peer_req->pages == NULL);
2256 } else if (peer_req->pages == NULL) {
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02002257 D_ASSERT(device, peer_req->i.size == 0);
2258 D_ASSERT(device, dp_flags & DP_FLUSH);
Lars Ellenberga73ff322012-06-25 19:15:38 +02002259 }
Lars Ellenberg688593c2010-11-17 22:25:03 +01002260
2261 if (dp_flags & DP_MAY_SET_IN_SYNC)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002262 peer_req->flags |= EE_MAY_SET_IN_SYNC;
Lars Ellenberg688593c2010-11-17 22:25:03 +01002263
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002264 spin_lock(&connection->epoch_lock);
2265 peer_req->epoch = connection->current_epoch;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002266 atomic_inc(&peer_req->epoch->epoch_size);
2267 atomic_inc(&peer_req->epoch->active);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002268 spin_unlock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002269
Philipp Reisner302bdea2011-04-21 11:36:49 +02002270 rcu_read_lock();
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002271 tp = rcu_dereference(peer_device->connection->net_conf)->two_primaries;
Philipp Reisner302bdea2011-04-21 11:36:49 +02002272 rcu_read_unlock();
2273 if (tp) {
2274 peer_req->flags |= EE_IN_INTERVAL_TREE;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002275 err = wait_for_and_update_peer_seq(peer_device, peer_seq);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002276 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002277 goto out_interrupted;
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002278 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002279 err = handle_write_conflicts(device, peer_req);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002280 if (err) {
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002281 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002282 if (err == -ENOENT) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002283 put_ldev(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002284 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002285 }
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002286 goto out_interrupted;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002287 }
Philipp Reisnerb874d232013-10-23 10:59:16 +02002288 } else {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002289 update_peer_seq(peer_device, peer_seq);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002290 spin_lock_irq(&device->resource->req_lock);
Philipp Reisnerb874d232013-10-23 10:59:16 +02002291 }
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02002292 /* if we use the zeroout fallback code, we process synchronously
2293 * and we wait for all pending requests, respectively wait for
2294 * active_ee to become empty in drbd_submit_peer_request();
2295 * better not add ourselves here. */
2296 if ((peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) == 0)
2297 list_add(&peer_req->w.list, &device->active_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002298 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002299
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002300 if (device->state.conn == C_SYNC_TARGET)
2301 wait_event(device->ee_wait, !overlapping_resync_write(device, peer_req));
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01002302
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002303 if (peer_device->connection->agreed_pro_version < 100) {
Philipp Reisner44ed1672011-04-19 17:10:19 +02002304 rcu_read_lock();
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002305 switch (rcu_dereference(peer_device->connection->net_conf)->wire_protocol) {
Philipp Reisner303d1442011-04-13 16:24:47 -07002306 case DRBD_PROT_C:
2307 dp_flags |= DP_SEND_WRITE_ACK;
2308 break;
2309 case DRBD_PROT_B:
2310 dp_flags |= DP_SEND_RECEIVE_ACK;
2311 break;
2312 }
Philipp Reisner44ed1672011-04-19 17:10:19 +02002313 rcu_read_unlock();
Philipp Reisner303d1442011-04-13 16:24:47 -07002314 }
2315
2316 if (dp_flags & DP_SEND_WRITE_ACK) {
2317 peer_req->flags |= EE_SEND_WRITE_ACK;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002318 inc_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002319 /* corresponding dec_unacked() in e_end_block()
2320 * respective _drbd_clear_done_ee */
Philipp Reisner303d1442011-04-13 16:24:47 -07002321 }
2322
2323 if (dp_flags & DP_SEND_RECEIVE_ACK) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002324 /* I really don't like it that the receiver thread
2325 * sends on the msock, but anyways */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002326 drbd_send_ack(first_peer_device(device), P_RECV_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002327 }
2328
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002329 if (device->state.pdsk < D_INCONSISTENT) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002330 /* In case we have the only disk of the cluster, */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002331 drbd_set_out_of_sync(device, peer_req->i.sector, peer_req->i.size);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002332 peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
2333 peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002334 drbd_al_begin_io(device, &peer_req->i, true);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002335 }
2336
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002337 err = drbd_submit_peer_request(device, peer_req, rw, DRBD_FAULT_DT_WR);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002338 if (!err)
2339 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002340
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002341 /* don't care for the reason here */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002342 drbd_err(device, "submit failed, triggering re-connect\n");
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002343 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002344 list_del(&peer_req->w.list);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002345 drbd_remove_epoch_entry_interval(device, peer_req);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002346 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002347 if (peer_req->flags & EE_CALL_AL_COMPLETE_IO)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002348 drbd_al_complete_io(device, &peer_req->i);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002349
Philipp Reisnerb411b362009-09-25 16:07:19 -07002350out_interrupted:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002351 drbd_may_finish_epoch(connection, peer_req->epoch, EV_PUT + EV_CLEANUP);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002352 put_ldev(device);
2353 drbd_free_peer_req(device, peer_req);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002354 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002355}
2356
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002357/* We may throttle resync, if the lower device seems to be busy,
2358 * and current sync rate is above c_min_rate.
2359 *
2360 * To decide whether or not the lower device is busy, we use a scheme similar
2361 * to MD RAID is_mddev_idle(): if the partition stats reveal "significant"
2362 * (more than 64 sectors) of activity we cannot account for with our own resync
2363 * activity, it obviously is "busy".
2364 *
2365 * The current sync rate used here uses only the most recent two step marks,
2366 * to have a short time average so we can react faster.
2367 */
Lars Ellenberge8299872014-04-28 18:43:19 +02002368bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector)
2369{
2370 struct lc_element *tmp;
2371 bool throttle = true;
2372
2373 if (!drbd_rs_c_min_rate_throttle(device))
2374 return false;
2375
2376 spin_lock_irq(&device->al_lock);
2377 tmp = lc_find(device->resync, BM_SECT_TO_EXT(sector));
2378 if (tmp) {
2379 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
2380 if (test_bit(BME_PRIORITY, &bm_ext->flags))
2381 throttle = false;
2382 /* Do not slow down if app IO is already waiting for this extent */
2383 }
2384 spin_unlock_irq(&device->al_lock);
2385
2386 return throttle;
2387}
2388
2389bool drbd_rs_c_min_rate_throttle(struct drbd_device *device)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002390{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002391 struct gendisk *disk = device->ldev->backing_bdev->bd_contains->bd_disk;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002392 unsigned long db, dt, dbdt;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002393 unsigned int c_min_rate;
Lars Ellenberge8299872014-04-28 18:43:19 +02002394 int curr_events;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002395
2396 rcu_read_lock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002397 c_min_rate = rcu_dereference(device->ldev->disk_conf)->c_min_rate;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002398 rcu_read_unlock();
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002399
2400 /* feature disabled? */
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002401 if (c_min_rate == 0)
Lars Ellenberge8299872014-04-28 18:43:19 +02002402 return false;
Philipp Reisnere3555d82010-11-07 15:56:29 +01002403
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002404 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
2405 (int)part_stat_read(&disk->part0, sectors[1]) -
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002406 atomic_read(&device->rs_sect_ev);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002407 if (!device->rs_last_events || curr_events - device->rs_last_events > 64) {
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002408 unsigned long rs_left;
2409 int i;
2410
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002411 device->rs_last_events = curr_events;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002412
2413 /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
2414 * approx. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002415 i = (device->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
Lars Ellenberg2649f082010-11-05 10:05:47 +01002416
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002417 if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T)
2418 rs_left = device->ov_left;
Lars Ellenberg2649f082010-11-05 10:05:47 +01002419 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002420 rs_left = drbd_bm_total_weight(device) - device->rs_failed;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002421
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002422 dt = ((long)jiffies - (long)device->rs_mark_time[i]) / HZ;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002423 if (!dt)
2424 dt++;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002425 db = device->rs_mark_left[i] - rs_left;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002426 dbdt = Bit2KB(db/dt);
2427
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002428 if (dbdt > c_min_rate)
Lars Ellenberge8299872014-04-28 18:43:19 +02002429 return true;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002430 }
Lars Ellenberge8299872014-04-28 18:43:19 +02002431 return false;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002432}
2433
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002434static int receive_DataRequest(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002435{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002436 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002437 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002438 sector_t sector;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002439 sector_t capacity;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002440 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002441 struct digest_info *di = NULL;
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002442 int size, verb;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002443 unsigned int fault_type;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02002444 struct p_block_req *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002445
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002446 peer_device = conn_peer_device(connection, pi->vnr);
2447 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002448 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002449 device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002450 capacity = drbd_get_capacity(device->this_bdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002451
2452 sector = be64_to_cpu(p->sector);
2453 size = be32_to_cpu(p->blksize);
2454
Andreas Gruenbacherc670a392011-02-21 12:41:39 +01002455 if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002456 drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
Philipp Reisnerb411b362009-09-25 16:07:19 -07002457 (unsigned long long)sector, size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002458 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002459 }
2460 if (sector + (size>>9) > capacity) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002461 drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
Philipp Reisnerb411b362009-09-25 16:07:19 -07002462 (unsigned long long)sector, size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002463 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002464 }
2465
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002466 if (!get_ldev_if_state(device, D_UP_TO_DATE)) {
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002467 verb = 1;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002468 switch (pi->cmd) {
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002469 case P_DATA_REQUEST:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002470 drbd_send_ack_rp(peer_device, P_NEG_DREPLY, p);
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002471 break;
2472 case P_RS_DATA_REQUEST:
2473 case P_CSUM_RS_REQUEST:
2474 case P_OV_REQUEST:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002475 drbd_send_ack_rp(peer_device, P_NEG_RS_DREPLY , p);
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002476 break;
2477 case P_OV_REPLY:
2478 verb = 0;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002479 dec_rs_pending(device);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002480 drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size, ID_IN_SYNC);
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002481 break;
2482 default:
Andreas Gruenbacher49ba9b12011-03-25 00:35:45 +01002483 BUG();
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002484 }
2485 if (verb && __ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002486 drbd_err(device, "Can not satisfy peer's read request, "
Philipp Reisnerb411b362009-09-25 16:07:19 -07002487 "no local data.\n");
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002488
Lars Ellenberga821cc42010-09-06 12:31:37 +02002489 /* drain possibly payload */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002490 return drbd_drain_block(peer_device, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002491 }
2492
2493 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
2494 * "criss-cross" setup, that might cause write-out on some other DRBD,
2495 * which in turn might block on the other node at this very place. */
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02002496 peer_req = drbd_alloc_peer_req(peer_device, p->block_id, sector, size,
2497 true /* has real payload */, GFP_NOIO);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002498 if (!peer_req) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002499 put_ldev(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002500 return -ENOMEM;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002501 }
2502
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002503 switch (pi->cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002504 case P_DATA_REQUEST:
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002505 peer_req->w.cb = w_e_end_data_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002506 fault_type = DRBD_FAULT_DT_RD;
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002507 /* application IO, don't drbd_rs_begin_io */
2508 goto submit;
2509
Philipp Reisnerb411b362009-09-25 16:07:19 -07002510 case P_RS_DATA_REQUEST:
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002511 peer_req->w.cb = w_e_end_rsdata_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002512 fault_type = DRBD_FAULT_RS_RD;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002513 /* used in the sector offset progress display */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002514 device->bm_resync_fo = BM_SECT_TO_BIT(sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002515 break;
2516
2517 case P_OV_REPLY:
2518 case P_CSUM_RS_REQUEST:
2519 fault_type = DRBD_FAULT_RS_RD;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002520 di = kmalloc(sizeof(*di) + pi->size, GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002521 if (!di)
2522 goto out_free_e;
2523
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002524 di->digest_size = pi->size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002525 di->digest = (((char *)di)+sizeof(struct digest_info));
2526
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002527 peer_req->digest = di;
2528 peer_req->flags |= EE_HAS_DIGEST;
Lars Ellenbergc36c3ce2010-08-11 20:42:55 +02002529
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002530 if (drbd_recv_all(peer_device->connection, di->digest, pi->size))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002531 goto out_free_e;
2532
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002533 if (pi->cmd == P_CSUM_RS_REQUEST) {
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002534 D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002535 peer_req->w.cb = w_e_end_csum_rs_req;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002536 /* used in the sector offset progress display */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002537 device->bm_resync_fo = BM_SECT_TO_BIT(sector);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002538 } else if (pi->cmd == P_OV_REPLY) {
Lars Ellenberg2649f082010-11-05 10:05:47 +01002539 /* track progress, we may need to throttle */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002540 atomic_add(size >> 9, &device->rs_sect_in);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002541 peer_req->w.cb = w_e_end_ov_reply;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002542 dec_rs_pending(device);
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002543 /* drbd_rs_begin_io done when we sent this request,
2544 * but accounting still needs to be done. */
2545 goto submit_for_resync;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002546 }
2547 break;
2548
2549 case P_OV_REQUEST:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002550 if (device->ov_start_sector == ~(sector_t)0 &&
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002551 peer_device->connection->agreed_pro_version >= 90) {
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002552 unsigned long now = jiffies;
2553 int i;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002554 device->ov_start_sector = sector;
2555 device->ov_position = sector;
2556 device->ov_left = drbd_bm_bits(device) - BM_SECT_TO_BIT(sector);
2557 device->rs_total = device->ov_left;
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002558 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002559 device->rs_mark_left[i] = device->ov_left;
2560 device->rs_mark_time[i] = now;
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002561 }
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002562 drbd_info(device, "Online Verify start sector: %llu\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07002563 (unsigned long long)sector);
2564 }
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002565 peer_req->w.cb = w_e_end_ov_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002566 fault_type = DRBD_FAULT_RS_RD;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002567 break;
2568
Philipp Reisnerb411b362009-09-25 16:07:19 -07002569 default:
Andreas Gruenbacher49ba9b12011-03-25 00:35:45 +01002570 BUG();
Philipp Reisnerb411b362009-09-25 16:07:19 -07002571 }
2572
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002573 /* Throttle, drbd_rs_begin_io and submit should become asynchronous
2574 * wrt the receiver, but it is not as straightforward as it may seem.
2575 * Various places in the resync start and stop logic assume resync
2576 * requests are processed in order, requeuing this on the worker thread
2577 * introduces a bunch of new code for synchronization between threads.
2578 *
2579 * Unlimited throttling before drbd_rs_begin_io may stall the resync
2580 * "forever", throttling after drbd_rs_begin_io will lock that extent
2581 * for application writes for the same time. For now, just throttle
2582 * here, where the rest of the code expects the receiver to sleep for
2583 * a while, anyways.
2584 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002585
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002586 /* Throttle before drbd_rs_begin_io, as that locks out application IO;
2587 * this defers syncer requests for some time, before letting at least
2588 * on request through. The resync controller on the receiving side
2589 * will adapt to the incoming rate accordingly.
2590 *
2591 * We cannot throttle here if remote is Primary/SyncTarget:
2592 * we would also throttle its application reads.
2593 * In that case, throttling is done on the SyncTarget only.
2594 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002595 if (device->state.peer != R_PRIMARY && drbd_rs_should_slow_down(device, sector))
Philipp Reisnere3555d82010-11-07 15:56:29 +01002596 schedule_timeout_uninterruptible(HZ/10);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002597 if (drbd_rs_begin_io(device, sector))
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002598 goto out_free_e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002599
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002600submit_for_resync:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002601 atomic_add(size >> 9, &device->rs_sect_ev);
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002602
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002603submit:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002604 inc_unacked(device);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002605 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002606 list_add_tail(&peer_req->w.list, &device->read_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002607 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002608
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002609 if (drbd_submit_peer_request(device, peer_req, READ, fault_type) == 0)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002610 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002611
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002612 /* don't care for the reason here */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002613 drbd_err(device, "submit failed, triggering re-connect\n");
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002614 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002615 list_del(&peer_req->w.list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002616 spin_unlock_irq(&device->resource->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002617 /* no drbd_rs_complete_io(), we are dropping the connection anyways */
2618
Philipp Reisnerb411b362009-09-25 16:07:19 -07002619out_free_e:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002620 put_ldev(device);
2621 drbd_free_peer_req(device, peer_req);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002622 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002623}
2624
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002625/**
2626 * drbd_asb_recover_0p - Recover after split-brain with no remaining primaries
2627 */
2628static int drbd_asb_recover_0p(struct drbd_peer_device *peer_device) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002629{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002630 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002631 int self, peer, rv = -100;
2632 unsigned long ch_self, ch_peer;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002633 enum drbd_after_sb_p after_sb_0p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002634
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002635 self = device->ldev->md.uuid[UI_BITMAP] & 1;
2636 peer = device->p_uuid[UI_BITMAP] & 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002637
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002638 ch_peer = device->p_uuid[UI_SIZE];
2639 ch_self = device->comm_bm_set;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002640
Philipp Reisner44ed1672011-04-19 17:10:19 +02002641 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002642 after_sb_0p = rcu_dereference(peer_device->connection->net_conf)->after_sb_0p;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002643 rcu_read_unlock();
2644 switch (after_sb_0p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002645 case ASB_CONSENSUS:
2646 case ASB_DISCARD_SECONDARY:
2647 case ASB_CALL_HELPER:
Philipp Reisner44ed1672011-04-19 17:10:19 +02002648 case ASB_VIOLENTLY:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002649 drbd_err(device, "Configuration error.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002650 break;
2651 case ASB_DISCONNECT:
2652 break;
2653 case ASB_DISCARD_YOUNGER_PRI:
2654 if (self == 0 && peer == 1) {
2655 rv = -1;
2656 break;
2657 }
2658 if (self == 1 && peer == 0) {
2659 rv = 1;
2660 break;
2661 }
2662 /* Else fall through to one of the other strategies... */
2663 case ASB_DISCARD_OLDER_PRI:
2664 if (self == 0 && peer == 1) {
2665 rv = 1;
2666 break;
2667 }
2668 if (self == 1 && peer == 0) {
2669 rv = -1;
2670 break;
2671 }
2672 /* Else fall through to one of the other strategies... */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002673 drbd_warn(device, "Discard younger/older primary did not find a decision\n"
Philipp Reisnerb411b362009-09-25 16:07:19 -07002674 "Using discard-least-changes instead\n");
2675 case ASB_DISCARD_ZERO_CHG:
2676 if (ch_peer == 0 && ch_self == 0) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002677 rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002678 ? -1 : 1;
2679 break;
2680 } else {
2681 if (ch_peer == 0) { rv = 1; break; }
2682 if (ch_self == 0) { rv = -1; break; }
2683 }
Philipp Reisner44ed1672011-04-19 17:10:19 +02002684 if (after_sb_0p == ASB_DISCARD_ZERO_CHG)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002685 break;
2686 case ASB_DISCARD_LEAST_CHG:
2687 if (ch_self < ch_peer)
2688 rv = -1;
2689 else if (ch_self > ch_peer)
2690 rv = 1;
2691 else /* ( ch_self == ch_peer ) */
2692 /* Well, then use something else. */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002693 rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002694 ? -1 : 1;
2695 break;
2696 case ASB_DISCARD_LOCAL:
2697 rv = -1;
2698 break;
2699 case ASB_DISCARD_REMOTE:
2700 rv = 1;
2701 }
2702
2703 return rv;
2704}
2705
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002706/**
2707 * drbd_asb_recover_1p - Recover after split-brain with one remaining primary
2708 */
2709static int drbd_asb_recover_1p(struct drbd_peer_device *peer_device) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002710{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002711 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002712 int hg, rv = -100;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002713 enum drbd_after_sb_p after_sb_1p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002714
Philipp Reisner44ed1672011-04-19 17:10:19 +02002715 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002716 after_sb_1p = rcu_dereference(peer_device->connection->net_conf)->after_sb_1p;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002717 rcu_read_unlock();
2718 switch (after_sb_1p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002719 case ASB_DISCARD_YOUNGER_PRI:
2720 case ASB_DISCARD_OLDER_PRI:
2721 case ASB_DISCARD_LEAST_CHG:
2722 case ASB_DISCARD_LOCAL:
2723 case ASB_DISCARD_REMOTE:
Philipp Reisner44ed1672011-04-19 17:10:19 +02002724 case ASB_DISCARD_ZERO_CHG:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002725 drbd_err(device, "Configuration error.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002726 break;
2727 case ASB_DISCONNECT:
2728 break;
2729 case ASB_CONSENSUS:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002730 hg = drbd_asb_recover_0p(peer_device);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002731 if (hg == -1 && device->state.role == R_SECONDARY)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002732 rv = hg;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002733 if (hg == 1 && device->state.role == R_PRIMARY)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002734 rv = hg;
2735 break;
2736 case ASB_VIOLENTLY:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002737 rv = drbd_asb_recover_0p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002738 break;
2739 case ASB_DISCARD_SECONDARY:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002740 return device->state.role == R_PRIMARY ? 1 : -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002741 case ASB_CALL_HELPER:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002742 hg = drbd_asb_recover_0p(peer_device);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002743 if (hg == -1 && device->state.role == R_PRIMARY) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002744 enum drbd_state_rv rv2;
2745
Philipp Reisnerb411b362009-09-25 16:07:19 -07002746 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2747 * we might be here in C_WF_REPORT_PARAMS which is transient.
2748 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002749 rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002750 if (rv2 != SS_SUCCESS) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002751 drbd_khelper(device, "pri-lost-after-sb");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002752 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002753 drbd_warn(device, "Successfully gave up primary role.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002754 rv = hg;
2755 }
2756 } else
2757 rv = hg;
2758 }
2759
2760 return rv;
2761}
2762
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002763/**
2764 * drbd_asb_recover_2p - Recover after split-brain with two remaining primaries
2765 */
2766static int drbd_asb_recover_2p(struct drbd_peer_device *peer_device) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002767{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002768 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002769 int hg, rv = -100;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002770 enum drbd_after_sb_p after_sb_2p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002771
Philipp Reisner44ed1672011-04-19 17:10:19 +02002772 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002773 after_sb_2p = rcu_dereference(peer_device->connection->net_conf)->after_sb_2p;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002774 rcu_read_unlock();
2775 switch (after_sb_2p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002776 case ASB_DISCARD_YOUNGER_PRI:
2777 case ASB_DISCARD_OLDER_PRI:
2778 case ASB_DISCARD_LEAST_CHG:
2779 case ASB_DISCARD_LOCAL:
2780 case ASB_DISCARD_REMOTE:
2781 case ASB_CONSENSUS:
2782 case ASB_DISCARD_SECONDARY:
Philipp Reisner44ed1672011-04-19 17:10:19 +02002783 case ASB_DISCARD_ZERO_CHG:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002784 drbd_err(device, "Configuration error.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002785 break;
2786 case ASB_VIOLENTLY:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002787 rv = drbd_asb_recover_0p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002788 break;
2789 case ASB_DISCONNECT:
2790 break;
2791 case ASB_CALL_HELPER:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002792 hg = drbd_asb_recover_0p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002793 if (hg == -1) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002794 enum drbd_state_rv rv2;
2795
Philipp Reisnerb411b362009-09-25 16:07:19 -07002796 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2797 * we might be here in C_WF_REPORT_PARAMS which is transient.
2798 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002799 rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002800 if (rv2 != SS_SUCCESS) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002801 drbd_khelper(device, "pri-lost-after-sb");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002802 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002803 drbd_warn(device, "Successfully gave up primary role.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002804 rv = hg;
2805 }
2806 } else
2807 rv = hg;
2808 }
2809
2810 return rv;
2811}
2812
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002813static void drbd_uuid_dump(struct drbd_device *device, char *text, u64 *uuid,
Philipp Reisnerb411b362009-09-25 16:07:19 -07002814 u64 bits, u64 flags)
2815{
2816 if (!uuid) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002817 drbd_info(device, "%s uuid info vanished while I was looking!\n", text);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002818 return;
2819 }
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002820 drbd_info(device, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07002821 text,
2822 (unsigned long long)uuid[UI_CURRENT],
2823 (unsigned long long)uuid[UI_BITMAP],
2824 (unsigned long long)uuid[UI_HISTORY_START],
2825 (unsigned long long)uuid[UI_HISTORY_END],
2826 (unsigned long long)bits,
2827 (unsigned long long)flags);
2828}
2829
2830/*
2831 100 after split brain try auto recover
2832 2 C_SYNC_SOURCE set BitMap
2833 1 C_SYNC_SOURCE use BitMap
2834 0 no Sync
2835 -1 C_SYNC_TARGET use BitMap
2836 -2 C_SYNC_TARGET set BitMap
2837 -100 after split brain, disconnect
2838-1000 unrelated data
Philipp Reisner4a23f262011-01-11 17:42:17 +01002839-1091 requires proto 91
2840-1096 requires proto 96
Philipp Reisnerb411b362009-09-25 16:07:19 -07002841 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002842static int drbd_uuid_compare(struct drbd_device *device, int *rule_nr) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002843{
2844 u64 self, peer;
2845 int i, j;
2846
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002847 self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2848 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002849
2850 *rule_nr = 10;
2851 if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED)
2852 return 0;
2853
2854 *rule_nr = 20;
2855 if ((self == UUID_JUST_CREATED || self == (u64)0) &&
2856 peer != UUID_JUST_CREATED)
2857 return -2;
2858
2859 *rule_nr = 30;
2860 if (self != UUID_JUST_CREATED &&
2861 (peer == UUID_JUST_CREATED || peer == (u64)0))
2862 return 2;
2863
2864 if (self == peer) {
2865 int rct, dc; /* roles at crash time */
2866
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002867 if (device->p_uuid[UI_BITMAP] == (u64)0 && device->ldev->md.uuid[UI_BITMAP] != (u64)0) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002868
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002869 if (first_peer_device(device)->connection->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002870 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002871
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002872 if ((device->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
2873 (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002874 drbd_info(device, "was SyncSource, missed the resync finished event, corrected myself:\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002875 drbd_uuid_move_history(device);
2876 device->ldev->md.uuid[UI_HISTORY_START] = device->ldev->md.uuid[UI_BITMAP];
2877 device->ldev->md.uuid[UI_BITMAP] = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002878
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002879 drbd_uuid_dump(device, "self", device->ldev->md.uuid,
2880 device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002881 *rule_nr = 34;
2882 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002883 drbd_info(device, "was SyncSource (peer failed to write sync_uuid)\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002884 *rule_nr = 36;
2885 }
2886
2887 return 1;
2888 }
2889
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002890 if (device->ldev->md.uuid[UI_BITMAP] == (u64)0 && device->p_uuid[UI_BITMAP] != (u64)0) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002891
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002892 if (first_peer_device(device)->connection->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002893 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002894
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002895 if ((device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_BITMAP] & ~((u64)1)) &&
2896 (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002897 drbd_info(device, "was SyncTarget, peer missed the resync finished event, corrected peer:\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002898
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002899 device->p_uuid[UI_HISTORY_START + 1] = device->p_uuid[UI_HISTORY_START];
2900 device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_BITMAP];
2901 device->p_uuid[UI_BITMAP] = 0UL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002902
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002903 drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002904 *rule_nr = 35;
2905 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002906 drbd_info(device, "was SyncTarget (failed to write sync_uuid)\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002907 *rule_nr = 37;
2908 }
2909
2910 return -1;
2911 }
2912
2913 /* Common power [off|failure] */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002914 rct = (test_bit(CRASHED_PRIMARY, &device->flags) ? 1 : 0) +
2915 (device->p_uuid[UI_FLAGS] & 2);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002916 /* lowest bit is set when we were primary,
2917 * next bit (weight 2) is set when peer was primary */
2918 *rule_nr = 40;
2919
2920 switch (rct) {
2921 case 0: /* !self_pri && !peer_pri */ return 0;
2922 case 1: /* self_pri && !peer_pri */ return 1;
2923 case 2: /* !self_pri && peer_pri */ return -1;
2924 case 3: /* self_pri && peer_pri */
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002925 dc = test_bit(RESOLVE_CONFLICTS, &first_peer_device(device)->connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002926 return dc ? -1 : 1;
2927 }
2928 }
2929
2930 *rule_nr = 50;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002931 peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002932 if (self == peer)
2933 return -1;
2934
2935 *rule_nr = 51;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002936 peer = device->p_uuid[UI_HISTORY_START] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002937 if (self == peer) {
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002938 if (first_peer_device(device)->connection->agreed_pro_version < 96 ?
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002939 (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
2940 (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
2941 peer + UUID_NEW_BM_OFFSET == (device->p_uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002942 /* The last P_SYNC_UUID did not get though. Undo the last start of
2943 resync as sync source modifications of the peer's UUIDs. */
2944
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002945 if (first_peer_device(device)->connection->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002946 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002947
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002948 device->p_uuid[UI_BITMAP] = device->p_uuid[UI_HISTORY_START];
2949 device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_HISTORY_START + 1];
Philipp Reisner4a23f262011-01-11 17:42:17 +01002950
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002951 drbd_info(device, "Lost last syncUUID packet, corrected:\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002952 drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
Philipp Reisner4a23f262011-01-11 17:42:17 +01002953
Philipp Reisnerb411b362009-09-25 16:07:19 -07002954 return -1;
2955 }
2956 }
2957
2958 *rule_nr = 60;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002959 self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002960 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002961 peer = device->p_uuid[i] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002962 if (self == peer)
2963 return -2;
2964 }
2965
2966 *rule_nr = 70;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002967 self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2968 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002969 if (self == peer)
2970 return 1;
2971
2972 *rule_nr = 71;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002973 self = device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002974 if (self == peer) {
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002975 if (first_peer_device(device)->connection->agreed_pro_version < 96 ?
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002976 (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
2977 (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
2978 self + UUID_NEW_BM_OFFSET == (device->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002979 /* The last P_SYNC_UUID did not get though. Undo the last start of
2980 resync as sync source modifications of our UUIDs. */
2981
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002982 if (first_peer_device(device)->connection->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002983 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002984
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002985 __drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_HISTORY_START]);
2986 __drbd_uuid_set(device, UI_HISTORY_START, device->ldev->md.uuid[UI_HISTORY_START + 1]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002987
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002988 drbd_info(device, "Last syncUUID did not get through, corrected:\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002989 drbd_uuid_dump(device, "self", device->ldev->md.uuid,
2990 device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002991
2992 return 1;
2993 }
2994 }
2995
2996
2997 *rule_nr = 80;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002998 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002999 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003000 self = device->ldev->md.uuid[i] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003001 if (self == peer)
3002 return 2;
3003 }
3004
3005 *rule_nr = 90;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003006 self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
3007 peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003008 if (self == peer && self != ((u64)0))
3009 return 100;
3010
3011 *rule_nr = 100;
3012 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003013 self = device->ldev->md.uuid[i] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003014 for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003015 peer = device->p_uuid[j] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003016 if (self == peer)
3017 return -100;
3018 }
3019 }
3020
3021 return -1000;
3022}
3023
3024/* drbd_sync_handshake() returns the new conn state on success, or
3025 CONN_MASK (-1) on failure.
3026 */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003027static enum drbd_conns drbd_sync_handshake(struct drbd_peer_device *peer_device,
3028 enum drbd_role peer_role,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003029 enum drbd_disk_state peer_disk) __must_hold(local)
3030{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003031 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003032 enum drbd_conns rv = C_MASK;
3033 enum drbd_disk_state mydisk;
Philipp Reisner44ed1672011-04-19 17:10:19 +02003034 struct net_conf *nc;
Andreas Gruenbacher6dff2902011-06-28 14:18:12 +02003035 int hg, rule_nr, rr_conflict, tentative;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003036
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003037 mydisk = device->state.disk;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003038 if (mydisk == D_NEGOTIATING)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003039 mydisk = device->new_state_tmp.disk;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003040
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003041 drbd_info(device, "drbd_sync_handshake:\n");
Philipp Reisner9f2247b2012-08-16 14:25:58 +02003042
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003043 spin_lock_irq(&device->ldev->md.uuid_lock);
3044 drbd_uuid_dump(device, "self", device->ldev->md.uuid, device->comm_bm_set, 0);
3045 drbd_uuid_dump(device, "peer", device->p_uuid,
3046 device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003047
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003048 hg = drbd_uuid_compare(device, &rule_nr);
3049 spin_unlock_irq(&device->ldev->md.uuid_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003050
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003051 drbd_info(device, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003052
3053 if (hg == -1000) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003054 drbd_alert(device, "Unrelated data, aborting!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003055 return C_MASK;
3056 }
Philipp Reisner4a23f262011-01-11 17:42:17 +01003057 if (hg < -1000) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003058 drbd_alert(device, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003059 return C_MASK;
3060 }
3061
3062 if ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) ||
3063 (peer_disk == D_INCONSISTENT && mydisk > D_INCONSISTENT)) {
3064 int f = (hg == -100) || abs(hg) == 2;
3065 hg = mydisk > D_INCONSISTENT ? 1 : -1;
3066 if (f)
3067 hg = hg*2;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003068 drbd_info(device, "Becoming sync %s due to disk states.\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07003069 hg > 0 ? "source" : "target");
3070 }
3071
Adam Gandelman3a11a482010-04-08 16:48:23 -07003072 if (abs(hg) == 100)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003073 drbd_khelper(device, "initial-split-brain");
Adam Gandelman3a11a482010-04-08 16:48:23 -07003074
Philipp Reisner44ed1672011-04-19 17:10:19 +02003075 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003076 nc = rcu_dereference(peer_device->connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +02003077
3078 if (hg == 100 || (hg == -100 && nc->always_asbp)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003079 int pcount = (device->state.role == R_PRIMARY)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003080 + (peer_role == R_PRIMARY);
3081 int forced = (hg == -100);
3082
3083 switch (pcount) {
3084 case 0:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003085 hg = drbd_asb_recover_0p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003086 break;
3087 case 1:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003088 hg = drbd_asb_recover_1p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003089 break;
3090 case 2:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003091 hg = drbd_asb_recover_2p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003092 break;
3093 }
3094 if (abs(hg) < 100) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003095 drbd_warn(device, "Split-Brain detected, %d primaries, "
Philipp Reisnerb411b362009-09-25 16:07:19 -07003096 "automatically solved. Sync from %s node\n",
3097 pcount, (hg < 0) ? "peer" : "this");
3098 if (forced) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003099 drbd_warn(device, "Doing a full sync, since"
Philipp Reisnerb411b362009-09-25 16:07:19 -07003100 " UUIDs where ambiguous.\n");
3101 hg = hg*2;
3102 }
3103 }
3104 }
3105
3106 if (hg == -100) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003107 if (test_bit(DISCARD_MY_DATA, &device->flags) && !(device->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003108 hg = -1;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003109 if (!test_bit(DISCARD_MY_DATA, &device->flags) && (device->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003110 hg = 1;
3111
3112 if (abs(hg) < 100)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003113 drbd_warn(device, "Split-Brain detected, manually solved. "
Philipp Reisnerb411b362009-09-25 16:07:19 -07003114 "Sync from %s node\n",
3115 (hg < 0) ? "peer" : "this");
3116 }
Philipp Reisner44ed1672011-04-19 17:10:19 +02003117 rr_conflict = nc->rr_conflict;
Andreas Gruenbacher6dff2902011-06-28 14:18:12 +02003118 tentative = nc->tentative;
Philipp Reisner44ed1672011-04-19 17:10:19 +02003119 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07003120
3121 if (hg == -100) {
Lars Ellenberg580b9762010-02-26 23:15:23 +01003122 /* FIXME this log message is not correct if we end up here
3123 * after an attempted attach on a diskless node.
3124 * We just refuse to attach -- well, we drop the "connection"
3125 * to that disk, in a way... */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003126 drbd_alert(device, "Split-Brain detected but unresolved, dropping connection!\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003127 drbd_khelper(device, "split-brain");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003128 return C_MASK;
3129 }
3130
3131 if (hg > 0 && mydisk <= D_INCONSISTENT) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003132 drbd_err(device, "I shall become SyncSource, but I am inconsistent!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003133 return C_MASK;
3134 }
3135
3136 if (hg < 0 && /* by intention we do not use mydisk here. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003137 device->state.role == R_PRIMARY && device->state.disk >= D_CONSISTENT) {
Philipp Reisner44ed1672011-04-19 17:10:19 +02003138 switch (rr_conflict) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003139 case ASB_CALL_HELPER:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003140 drbd_khelper(device, "pri-lost");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003141 /* fall through */
3142 case ASB_DISCONNECT:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003143 drbd_err(device, "I shall become SyncTarget, but I am primary!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003144 return C_MASK;
3145 case ASB_VIOLENTLY:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003146 drbd_warn(device, "Becoming SyncTarget, violating the stable-data"
Philipp Reisnerb411b362009-09-25 16:07:19 -07003147 "assumption\n");
3148 }
3149 }
3150
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003151 if (tentative || test_bit(CONN_DRY_RUN, &peer_device->connection->flags)) {
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003152 if (hg == 0)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003153 drbd_info(device, "dry-run connect: No resync, would become Connected immediately.\n");
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003154 else
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003155 drbd_info(device, "dry-run connect: Would become %s, doing a %s resync.",
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003156 drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET),
3157 abs(hg) >= 2 ? "full" : "bit-map based");
3158 return C_MASK;
3159 }
3160
Philipp Reisnerb411b362009-09-25 16:07:19 -07003161 if (abs(hg) >= 2) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003162 drbd_info(device, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003163 if (drbd_bitmap_io(device, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003164 BM_LOCKED_SET_ALLOWED))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003165 return C_MASK;
3166 }
3167
3168 if (hg > 0) { /* become sync source. */
3169 rv = C_WF_BITMAP_S;
3170 } else if (hg < 0) { /* become sync target */
3171 rv = C_WF_BITMAP_T;
3172 } else {
3173 rv = C_CONNECTED;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003174 if (drbd_bm_total_weight(device)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003175 drbd_info(device, "No resync, but %lu bits in bitmap!\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003176 drbd_bm_total_weight(device));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003177 }
3178 }
3179
3180 return rv;
3181}
3182
Philipp Reisnerf179d762011-05-16 17:31:47 +02003183static enum drbd_after_sb_p convert_after_sb(enum drbd_after_sb_p peer)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003184{
3185 /* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */
Philipp Reisnerf179d762011-05-16 17:31:47 +02003186 if (peer == ASB_DISCARD_REMOTE)
3187 return ASB_DISCARD_LOCAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003188
3189 /* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */
Philipp Reisnerf179d762011-05-16 17:31:47 +02003190 if (peer == ASB_DISCARD_LOCAL)
3191 return ASB_DISCARD_REMOTE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003192
3193 /* everything else is valid if they are equal on both sides. */
Philipp Reisnerf179d762011-05-16 17:31:47 +02003194 return peer;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003195}
3196
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003197static int receive_protocol(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003198{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003199 struct p_protocol *p = pi->data;
Philipp Reisner036b17e2011-05-16 17:38:11 +02003200 enum drbd_after_sb_p p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
3201 int p_proto, p_discard_my_data, p_two_primaries, cf;
3202 struct net_conf *nc, *old_net_conf, *new_net_conf = NULL;
3203 char integrity_alg[SHARED_SECRET_MAX] = "";
Andreas Gruenbacheraccdbcc2011-07-15 17:41:09 +02003204 struct crypto_hash *peer_integrity_tfm = NULL;
Philipp Reisner7aca6c72011-05-17 10:12:56 +02003205 void *int_dig_in = NULL, *int_dig_vv = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003206
Philipp Reisnerb411b362009-09-25 16:07:19 -07003207 p_proto = be32_to_cpu(p->protocol);
3208 p_after_sb_0p = be32_to_cpu(p->after_sb_0p);
3209 p_after_sb_1p = be32_to_cpu(p->after_sb_1p);
3210 p_after_sb_2p = be32_to_cpu(p->after_sb_2p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003211 p_two_primaries = be32_to_cpu(p->two_primaries);
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003212 cf = be32_to_cpu(p->conn_flags);
Andreas Gruenbacher6139f602011-05-06 20:00:02 +02003213 p_discard_my_data = cf & CF_DISCARD_MY_DATA;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003214
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003215 if (connection->agreed_pro_version >= 87) {
Andreas Gruenbacher86db0612011-04-28 15:24:18 +02003216 int err;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003217
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02003218 if (pi->size > sizeof(integrity_alg))
Andreas Gruenbacher86db0612011-04-28 15:24:18 +02003219 return -EIO;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003220 err = drbd_recv_all(connection, integrity_alg, pi->size);
Andreas Gruenbacher86db0612011-04-28 15:24:18 +02003221 if (err)
3222 return err;
Philipp Reisner036b17e2011-05-16 17:38:11 +02003223 integrity_alg[SHARED_SECRET_MAX - 1] = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003224 }
3225
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003226 if (pi->cmd != P_PROTOCOL_UPDATE) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003227 clear_bit(CONN_DRY_RUN, &connection->flags);
Philipp Reisner036b17e2011-05-16 17:38:11 +02003228
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003229 if (cf & CF_DRY_RUN)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003230 set_bit(CONN_DRY_RUN, &connection->flags);
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003231
3232 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003233 nc = rcu_dereference(connection->net_conf);
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003234
3235 if (p_proto != nc->wire_protocol) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003236 drbd_err(connection, "incompatible %s settings\n", "protocol");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003237 goto disconnect_rcu_unlock;
3238 }
3239
3240 if (convert_after_sb(p_after_sb_0p) != nc->after_sb_0p) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003241 drbd_err(connection, "incompatible %s settings\n", "after-sb-0pri");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003242 goto disconnect_rcu_unlock;
3243 }
3244
3245 if (convert_after_sb(p_after_sb_1p) != nc->after_sb_1p) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003246 drbd_err(connection, "incompatible %s settings\n", "after-sb-1pri");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003247 goto disconnect_rcu_unlock;
3248 }
3249
3250 if (convert_after_sb(p_after_sb_2p) != nc->after_sb_2p) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003251 drbd_err(connection, "incompatible %s settings\n", "after-sb-2pri");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003252 goto disconnect_rcu_unlock;
3253 }
3254
3255 if (p_discard_my_data && nc->discard_my_data) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003256 drbd_err(connection, "incompatible %s settings\n", "discard-my-data");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003257 goto disconnect_rcu_unlock;
3258 }
3259
3260 if (p_two_primaries != nc->two_primaries) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003261 drbd_err(connection, "incompatible %s settings\n", "allow-two-primaries");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003262 goto disconnect_rcu_unlock;
3263 }
3264
3265 if (strcmp(integrity_alg, nc->integrity_alg)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003266 drbd_err(connection, "incompatible %s settings\n", "data-integrity-alg");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003267 goto disconnect_rcu_unlock;
3268 }
3269
3270 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07003271 }
3272
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003273 if (integrity_alg[0]) {
3274 int hash_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003275
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003276 /*
3277 * We can only change the peer data integrity algorithm
3278 * here. Changing our own data integrity algorithm
3279 * requires that we send a P_PROTOCOL_UPDATE packet at
3280 * the same time; otherwise, the peer has no way to
3281 * tell between which packets the algorithm should
3282 * change.
3283 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003284
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003285 peer_integrity_tfm = crypto_alloc_hash(integrity_alg, 0, CRYPTO_ALG_ASYNC);
3286 if (!peer_integrity_tfm) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003287 drbd_err(connection, "peer data-integrity-alg %s not supported\n",
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003288 integrity_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003289 goto disconnect;
3290 }
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003291
3292 hash_size = crypto_hash_digestsize(peer_integrity_tfm);
3293 int_dig_in = kmalloc(hash_size, GFP_KERNEL);
3294 int_dig_vv = kmalloc(hash_size, GFP_KERNEL);
3295 if (!(int_dig_in && int_dig_vv)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003296 drbd_err(connection, "Allocation of buffers for data integrity checking failed\n");
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003297 goto disconnect;
3298 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003299 }
3300
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003301 new_net_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL);
3302 if (!new_net_conf) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003303 drbd_err(connection, "Allocation of new net_conf failed\n");
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003304 goto disconnect;
3305 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003306
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003307 mutex_lock(&connection->data.mutex);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003308 mutex_lock(&connection->resource->conf_update);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003309 old_net_conf = connection->net_conf;
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003310 *new_net_conf = *old_net_conf;
3311
3312 new_net_conf->wire_protocol = p_proto;
3313 new_net_conf->after_sb_0p = convert_after_sb(p_after_sb_0p);
3314 new_net_conf->after_sb_1p = convert_after_sb(p_after_sb_1p);
3315 new_net_conf->after_sb_2p = convert_after_sb(p_after_sb_2p);
3316 new_net_conf->two_primaries = p_two_primaries;
3317
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003318 rcu_assign_pointer(connection->net_conf, new_net_conf);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003319 mutex_unlock(&connection->resource->conf_update);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003320 mutex_unlock(&connection->data.mutex);
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003321
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003322 crypto_free_hash(connection->peer_integrity_tfm);
3323 kfree(connection->int_dig_in);
3324 kfree(connection->int_dig_vv);
3325 connection->peer_integrity_tfm = peer_integrity_tfm;
3326 connection->int_dig_in = int_dig_in;
3327 connection->int_dig_vv = int_dig_vv;
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003328
3329 if (strcmp(old_net_conf->integrity_alg, integrity_alg))
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003330 drbd_info(connection, "peer data-integrity-alg: %s\n",
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003331 integrity_alg[0] ? integrity_alg : "(none)");
3332
3333 synchronize_rcu();
3334 kfree(old_net_conf);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003335 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003336
Philipp Reisner44ed1672011-04-19 17:10:19 +02003337disconnect_rcu_unlock:
3338 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07003339disconnect:
Andreas Gruenbacherb792c352011-07-15 16:48:49 +02003340 crypto_free_hash(peer_integrity_tfm);
Philipp Reisner036b17e2011-05-16 17:38:11 +02003341 kfree(int_dig_in);
3342 kfree(int_dig_vv);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003343 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003344 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003345}
3346
3347/* helper function
3348 * input: alg name, feature name
3349 * return: NULL (alg name was "")
3350 * ERR_PTR(error) if something goes wrong
3351 * or the crypto hash ptr, if it worked out ok. */
Rashika Kheriaf63e6312013-12-19 15:11:09 +05303352static
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003353struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_device *device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003354 const char *alg, const char *name)
3355{
3356 struct crypto_hash *tfm;
3357
3358 if (!alg[0])
3359 return NULL;
3360
3361 tfm = crypto_alloc_hash(alg, 0, CRYPTO_ALG_ASYNC);
3362 if (IS_ERR(tfm)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003363 drbd_err(device, "Can not allocate \"%s\" as %s (reason: %ld)\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07003364 alg, name, PTR_ERR(tfm));
3365 return tfm;
3366 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003367 return tfm;
3368}
3369
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003370static int ignore_remaining_packet(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003371{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003372 void *buffer = connection->data.rbuf;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003373 int size = pi->size;
3374
3375 while (size) {
3376 int s = min_t(int, size, DRBD_SOCKET_BUFFER_SIZE);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003377 s = drbd_recv(connection, buffer, s);
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003378 if (s <= 0) {
3379 if (s < 0)
3380 return s;
3381 break;
3382 }
3383 size -= s;
3384 }
3385 if (size)
3386 return -EIO;
3387 return 0;
3388}
3389
3390/*
3391 * config_unknown_volume - device configuration command for unknown volume
3392 *
3393 * When a device is added to an existing connection, the node on which the
3394 * device is added first will send configuration commands to its peer but the
3395 * peer will not know about the device yet. It will warn and ignore these
3396 * commands. Once the device is added on the second node, the second node will
3397 * send the same device configuration commands, but in the other direction.
3398 *
3399 * (We can also end up here if drbd is misconfigured.)
3400 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003401static int config_unknown_volume(struct drbd_connection *connection, struct packet_info *pi)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003402{
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003403 drbd_warn(connection, "%s packet received for volume %u, which is not configured locally\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02003404 cmdname(pi->cmd), pi->vnr);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003405 return ignore_remaining_packet(connection, pi);
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003406}
3407
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003408static int receive_SyncParam(struct drbd_connection *connection, struct packet_info *pi)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003409{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003410 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003411 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003412 struct p_rs_param_95 *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003413 unsigned int header_size, data_size, exp_max_sz;
3414 struct crypto_hash *verify_tfm = NULL;
3415 struct crypto_hash *csums_tfm = NULL;
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003416 struct net_conf *old_net_conf, *new_net_conf = NULL;
Philipp Reisner813472c2011-05-03 16:47:02 +02003417 struct disk_conf *old_disk_conf = NULL, *new_disk_conf = NULL;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003418 const int apv = connection->agreed_pro_version;
Philipp Reisner813472c2011-05-03 16:47:02 +02003419 struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
Philipp Reisner778f2712010-07-06 11:14:00 +02003420 int fifo_size = 0;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003421 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003422
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003423 peer_device = conn_peer_device(connection, pi->vnr);
3424 if (!peer_device)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003425 return config_unknown_volume(connection, pi);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003426 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003427
3428 exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param)
3429 : apv == 88 ? sizeof(struct p_rs_param)
3430 + SHARED_SECRET_MAX
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003431 : apv <= 94 ? sizeof(struct p_rs_param_89)
3432 : /* apv >= 95 */ sizeof(struct p_rs_param_95);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003433
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003434 if (pi->size > exp_max_sz) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003435 drbd_err(device, "SyncParam packet too long: received %u, expected <= %u bytes\n",
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003436 pi->size, exp_max_sz);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003437 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003438 }
3439
3440 if (apv <= 88) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003441 header_size = sizeof(struct p_rs_param);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003442 data_size = pi->size - header_size;
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003443 } else if (apv <= 94) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003444 header_size = sizeof(struct p_rs_param_89);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003445 data_size = pi->size - header_size;
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02003446 D_ASSERT(device, data_size == 0);
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003447 } else {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003448 header_size = sizeof(struct p_rs_param_95);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003449 data_size = pi->size - header_size;
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02003450 D_ASSERT(device, data_size == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003451 }
3452
3453 /* initialize verify_alg and csums_alg */
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003454 p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003455 memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
3456
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003457 err = drbd_recv_all(peer_device->connection, p, header_size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003458 if (err)
3459 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003460
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003461 mutex_lock(&connection->resource->conf_update);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003462 old_net_conf = peer_device->connection->net_conf;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003463 if (get_ldev(device)) {
Philipp Reisner813472c2011-05-03 16:47:02 +02003464 new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3465 if (!new_disk_conf) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003466 put_ldev(device);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003467 mutex_unlock(&connection->resource->conf_update);
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003468 drbd_err(device, "Allocation of new disk_conf failed\n");
Philipp Reisner813472c2011-05-03 16:47:02 +02003469 return -ENOMEM;
3470 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003471
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003472 old_disk_conf = device->ldev->disk_conf;
Philipp Reisner813472c2011-05-03 16:47:02 +02003473 *new_disk_conf = *old_disk_conf;
3474
Andreas Gruenbacher6394b932011-05-11 14:29:52 +02003475 new_disk_conf->resync_rate = be32_to_cpu(p->resync_rate);
Philipp Reisner813472c2011-05-03 16:47:02 +02003476 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003477
3478 if (apv >= 88) {
3479 if (apv == 88) {
Philipp Reisner5de73822012-03-28 10:17:32 +02003480 if (data_size > SHARED_SECRET_MAX || data_size == 0) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003481 drbd_err(device, "verify-alg of wrong size, "
Philipp Reisner5de73822012-03-28 10:17:32 +02003482 "peer wants %u, accepting only up to %u byte\n",
3483 data_size, SHARED_SECRET_MAX);
Philipp Reisner813472c2011-05-03 16:47:02 +02003484 err = -EIO;
3485 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003486 }
3487
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003488 err = drbd_recv_all(peer_device->connection, p->verify_alg, data_size);
Philipp Reisner813472c2011-05-03 16:47:02 +02003489 if (err)
3490 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003491 /* we expect NUL terminated string */
3492 /* but just in case someone tries to be evil */
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02003493 D_ASSERT(device, p->verify_alg[data_size-1] == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003494 p->verify_alg[data_size-1] = 0;
3495
3496 } else /* apv >= 89 */ {
3497 /* we still expect NUL terminated strings */
3498 /* but just in case someone tries to be evil */
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02003499 D_ASSERT(device, p->verify_alg[SHARED_SECRET_MAX-1] == 0);
3500 D_ASSERT(device, p->csums_alg[SHARED_SECRET_MAX-1] == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003501 p->verify_alg[SHARED_SECRET_MAX-1] = 0;
3502 p->csums_alg[SHARED_SECRET_MAX-1] = 0;
3503 }
3504
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003505 if (strcmp(old_net_conf->verify_alg, p->verify_alg)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003506 if (device->state.conn == C_WF_REPORT_PARAMS) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003507 drbd_err(device, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003508 old_net_conf->verify_alg, p->verify_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003509 goto disconnect;
3510 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003511 verify_tfm = drbd_crypto_alloc_digest_safe(device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003512 p->verify_alg, "verify-alg");
3513 if (IS_ERR(verify_tfm)) {
3514 verify_tfm = NULL;
3515 goto disconnect;
3516 }
3517 }
3518
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003519 if (apv >= 89 && strcmp(old_net_conf->csums_alg, p->csums_alg)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003520 if (device->state.conn == C_WF_REPORT_PARAMS) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003521 drbd_err(device, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003522 old_net_conf->csums_alg, p->csums_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003523 goto disconnect;
3524 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003525 csums_tfm = drbd_crypto_alloc_digest_safe(device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003526 p->csums_alg, "csums-alg");
3527 if (IS_ERR(csums_tfm)) {
3528 csums_tfm = NULL;
3529 goto disconnect;
3530 }
3531 }
3532
Philipp Reisner813472c2011-05-03 16:47:02 +02003533 if (apv > 94 && new_disk_conf) {
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003534 new_disk_conf->c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
3535 new_disk_conf->c_delay_target = be32_to_cpu(p->c_delay_target);
3536 new_disk_conf->c_fill_target = be32_to_cpu(p->c_fill_target);
3537 new_disk_conf->c_max_rate = be32_to_cpu(p->c_max_rate);
Philipp Reisner778f2712010-07-06 11:14:00 +02003538
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003539 fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003540 if (fifo_size != device->rs_plan_s->size) {
Philipp Reisner813472c2011-05-03 16:47:02 +02003541 new_plan = fifo_alloc(fifo_size);
3542 if (!new_plan) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003543 drbd_err(device, "kmalloc of fifo_buffer failed");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003544 put_ldev(device);
Philipp Reisner778f2712010-07-06 11:14:00 +02003545 goto disconnect;
3546 }
3547 }
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003548 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003549
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003550 if (verify_tfm || csums_tfm) {
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003551 new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
3552 if (!new_net_conf) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003553 drbd_err(device, "Allocation of new net_conf failed\n");
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003554 goto disconnect;
3555 }
3556
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003557 *new_net_conf = *old_net_conf;
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003558
3559 if (verify_tfm) {
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003560 strcpy(new_net_conf->verify_alg, p->verify_alg);
3561 new_net_conf->verify_alg_len = strlen(p->verify_alg) + 1;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003562 crypto_free_hash(peer_device->connection->verify_tfm);
3563 peer_device->connection->verify_tfm = verify_tfm;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003564 drbd_info(device, "using verify-alg: \"%s\"\n", p->verify_alg);
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003565 }
3566 if (csums_tfm) {
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003567 strcpy(new_net_conf->csums_alg, p->csums_alg);
3568 new_net_conf->csums_alg_len = strlen(p->csums_alg) + 1;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003569 crypto_free_hash(peer_device->connection->csums_tfm);
3570 peer_device->connection->csums_tfm = csums_tfm;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003571 drbd_info(device, "using csums-alg: \"%s\"\n", p->csums_alg);
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003572 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003573 rcu_assign_pointer(connection->net_conf, new_net_conf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003574 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003575 }
3576
Philipp Reisner813472c2011-05-03 16:47:02 +02003577 if (new_disk_conf) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003578 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
3579 put_ldev(device);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003580 }
Philipp Reisner813472c2011-05-03 16:47:02 +02003581
3582 if (new_plan) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003583 old_plan = device->rs_plan_s;
3584 rcu_assign_pointer(device->rs_plan_s, new_plan);
Philipp Reisner813472c2011-05-03 16:47:02 +02003585 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003586
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003587 mutex_unlock(&connection->resource->conf_update);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003588 synchronize_rcu();
3589 if (new_net_conf)
3590 kfree(old_net_conf);
3591 kfree(old_disk_conf);
Philipp Reisner813472c2011-05-03 16:47:02 +02003592 kfree(old_plan);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003593
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003594 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003595
Philipp Reisner813472c2011-05-03 16:47:02 +02003596reconnect:
3597 if (new_disk_conf) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003598 put_ldev(device);
Philipp Reisner813472c2011-05-03 16:47:02 +02003599 kfree(new_disk_conf);
3600 }
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003601 mutex_unlock(&connection->resource->conf_update);
Philipp Reisner813472c2011-05-03 16:47:02 +02003602 return -EIO;
3603
Philipp Reisnerb411b362009-09-25 16:07:19 -07003604disconnect:
Philipp Reisner813472c2011-05-03 16:47:02 +02003605 kfree(new_plan);
3606 if (new_disk_conf) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003607 put_ldev(device);
Philipp Reisner813472c2011-05-03 16:47:02 +02003608 kfree(new_disk_conf);
3609 }
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003610 mutex_unlock(&connection->resource->conf_update);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003611 /* just for completeness: actually not needed,
3612 * as this is not reached if csums_tfm was ok. */
3613 crypto_free_hash(csums_tfm);
3614 /* but free the verify_tfm again, if csums_tfm did not work out */
3615 crypto_free_hash(verify_tfm);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003616 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003617 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003618}
3619
Philipp Reisnerb411b362009-09-25 16:07:19 -07003620/* warn if the arguments differ by more than 12.5% */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003621static void warn_if_differ_considerably(struct drbd_device *device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003622 const char *s, sector_t a, sector_t b)
3623{
3624 sector_t d;
3625 if (a == 0 || b == 0)
3626 return;
3627 d = (a > b) ? (a - b) : (b - a);
3628 if (d > (a>>3) || d > (b>>3))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003629 drbd_warn(device, "Considerable difference in %s: %llus vs. %llus\n", s,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003630 (unsigned long long)a, (unsigned long long)b);
3631}
3632
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003633static int receive_sizes(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003634{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003635 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003636 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003637 struct p_sizes *p = pi->data;
Philipp Reisnere96c9632013-06-25 16:50:07 +02003638 enum determine_dev_size dd = DS_UNCHANGED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003639 sector_t p_size, p_usize, my_usize;
3640 int ldsc = 0; /* local disk size changed */
Philipp Reisnere89b5912010-03-24 17:11:33 +01003641 enum dds_flags ddsf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003642
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003643 peer_device = conn_peer_device(connection, pi->vnr);
3644 if (!peer_device)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003645 return config_unknown_volume(connection, pi);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003646 device = peer_device->device;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003647
Philipp Reisnerb411b362009-09-25 16:07:19 -07003648 p_size = be64_to_cpu(p->d_size);
3649 p_usize = be64_to_cpu(p->u_size);
3650
Philipp Reisnerb411b362009-09-25 16:07:19 -07003651 /* just store the peer's disk size for now.
3652 * we still need to figure out whether we accept that. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003653 device->p_size = p_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003654
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003655 if (get_ldev(device)) {
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003656 rcu_read_lock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003657 my_usize = rcu_dereference(device->ldev->disk_conf)->disk_size;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003658 rcu_read_unlock();
3659
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003660 warn_if_differ_considerably(device, "lower level device sizes",
3661 p_size, drbd_get_max_capacity(device->ldev));
3662 warn_if_differ_considerably(device, "user requested size",
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003663 p_usize, my_usize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003664
3665 /* if this is the first connect, or an otherwise expected
3666 * param exchange, choose the minimum */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003667 if (device->state.conn == C_WF_REPORT_PARAMS)
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003668 p_usize = min_not_zero(my_usize, p_usize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003669
3670 /* Never shrink a device with usable data during connect.
3671 But allow online shrinking if we are connected. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003672 if (drbd_new_dev_size(device, device->ldev, p_usize, 0) <
3673 drbd_get_capacity(device->this_bdev) &&
3674 device->state.disk >= D_OUTDATED &&
3675 device->state.conn < C_CONNECTED) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003676 drbd_err(device, "The peer's disk size is too small!\n");
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003677 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003678 put_ldev(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003679 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003680 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003681
3682 if (my_usize != p_usize) {
3683 struct disk_conf *old_disk_conf, *new_disk_conf = NULL;
3684
3685 new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3686 if (!new_disk_conf) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003687 drbd_err(device, "Allocation of new disk_conf failed\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003688 put_ldev(device);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003689 return -ENOMEM;
3690 }
3691
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003692 mutex_lock(&connection->resource->conf_update);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003693 old_disk_conf = device->ldev->disk_conf;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003694 *new_disk_conf = *old_disk_conf;
3695 new_disk_conf->disk_size = p_usize;
3696
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003697 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003698 mutex_unlock(&connection->resource->conf_update);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003699 synchronize_rcu();
3700 kfree(old_disk_conf);
3701
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003702 drbd_info(device, "Peer sets u_size to %lu sectors\n",
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003703 (unsigned long)my_usize);
3704 }
3705
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003706 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003707 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003708
Lars Ellenberg20c68fd2014-04-28 18:43:25 +02003709 device->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
3710 drbd_reconsider_max_bio_size(device);
3711 /* Leave drbd_reconsider_max_bio_size() before drbd_determine_dev_size().
3712 In case we cleared the QUEUE_FLAG_DISCARD from our queue in
3713 drbd_reconsider_max_bio_size(), we can be sure that after
3714 drbd_determine_dev_size() no REQ_DISCARDs are in the queue. */
3715
Philipp Reisnere89b5912010-03-24 17:11:33 +01003716 ddsf = be16_to_cpu(p->dds_flags);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003717 if (get_ldev(device)) {
3718 dd = drbd_determine_dev_size(device, ddsf, NULL);
3719 put_ldev(device);
Philipp Reisnere96c9632013-06-25 16:50:07 +02003720 if (dd == DS_ERROR)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003721 return -EIO;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003722 drbd_md_sync(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003723 } else {
3724 /* I am diskless, need to accept the peer's size. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003725 drbd_set_my_capacity(device, p_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003726 }
3727
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003728 if (get_ldev(device)) {
3729 if (device->ldev->known_size != drbd_get_capacity(device->ldev->backing_bdev)) {
3730 device->ldev->known_size = drbd_get_capacity(device->ldev->backing_bdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003731 ldsc = 1;
3732 }
3733
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003734 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003735 }
3736
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003737 if (device->state.conn > C_WF_REPORT_PARAMS) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003738 if (be64_to_cpu(p->c_size) !=
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003739 drbd_get_capacity(device->this_bdev) || ldsc) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003740 /* we have different sizes, probably peer
3741 * needs to know my new size... */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003742 drbd_send_sizes(peer_device, 0, ddsf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003743 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003744 if (test_and_clear_bit(RESIZE_PENDING, &device->flags) ||
3745 (dd == DS_GREW && device->state.conn == C_CONNECTED)) {
3746 if (device->state.pdsk >= D_INCONSISTENT &&
3747 device->state.disk >= D_INCONSISTENT) {
Philipp Reisnere89b5912010-03-24 17:11:33 +01003748 if (ddsf & DDSF_NO_RESYNC)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003749 drbd_info(device, "Resync of new storage suppressed with --assume-clean\n");
Philipp Reisnere89b5912010-03-24 17:11:33 +01003750 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003751 resync_after_online_grow(device);
Philipp Reisnere89b5912010-03-24 17:11:33 +01003752 } else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003753 set_bit(RESYNC_AFTER_NEG, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003754 }
3755 }
3756
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003757 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003758}
3759
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003760static int receive_uuids(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003761{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003762 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003763 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003764 struct p_uuids *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003765 u64 *p_uuid;
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003766 int i, updated_uuids = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003767
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003768 peer_device = conn_peer_device(connection, pi->vnr);
3769 if (!peer_device)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003770 return config_unknown_volume(connection, pi);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003771 device = peer_device->device;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003772
Philipp Reisnerb411b362009-09-25 16:07:19 -07003773 p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO);
Jing Wang063eacf2012-10-25 15:00:56 +08003774 if (!p_uuid) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003775 drbd_err(device, "kmalloc of p_uuid failed\n");
Jing Wang063eacf2012-10-25 15:00:56 +08003776 return false;
3777 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003778
3779 for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
3780 p_uuid[i] = be64_to_cpu(p->uuid[i]);
3781
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003782 kfree(device->p_uuid);
3783 device->p_uuid = p_uuid;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003784
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003785 if (device->state.conn < C_CONNECTED &&
3786 device->state.disk < D_INCONSISTENT &&
3787 device->state.role == R_PRIMARY &&
3788 (device->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003789 drbd_err(device, "Can only connect to data with current UUID=%016llX\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003790 (unsigned long long)device->ed_uuid);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003791 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003792 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003793 }
3794
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003795 if (get_ldev(device)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003796 int skip_initial_sync =
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003797 device->state.conn == C_CONNECTED &&
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003798 peer_device->connection->agreed_pro_version >= 90 &&
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003799 device->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003800 (p_uuid[UI_FLAGS] & 8);
3801 if (skip_initial_sync) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003802 drbd_info(device, "Accepted new current UUID, preparing to skip initial sync\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003803 drbd_bitmap_io(device, &drbd_bmio_clear_n_write,
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003804 "clear_n_write from receive_uuids",
3805 BM_LOCKED_TEST_ALLOWED);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003806 _drbd_uuid_set(device, UI_CURRENT, p_uuid[UI_CURRENT]);
3807 _drbd_uuid_set(device, UI_BITMAP, 0);
3808 _drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
Philipp Reisnerb411b362009-09-25 16:07:19 -07003809 CS_VERBOSE, NULL);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003810 drbd_md_sync(device);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003811 updated_uuids = 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003812 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003813 put_ldev(device);
3814 } else if (device->state.disk < D_INCONSISTENT &&
3815 device->state.role == R_PRIMARY) {
Philipp Reisner18a50fa2010-06-21 14:14:15 +02003816 /* I am a diskless primary, the peer just created a new current UUID
3817 for me. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003818 updated_uuids = drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003819 }
3820
3821 /* Before we test for the disk state, we should wait until an eventually
3822 ongoing cluster wide state change is finished. That is important if
3823 we are primary and are detaching from our disk. We need to see the
3824 new disk state... */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003825 mutex_lock(device->state_mutex);
3826 mutex_unlock(device->state_mutex);
3827 if (device->state.conn >= C_CONNECTED && device->state.disk < D_INCONSISTENT)
3828 updated_uuids |= drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003829
3830 if (updated_uuids)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003831 drbd_print_uuids(device, "receiver updated UUIDs to");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003832
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003833 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003834}
3835
3836/**
3837 * convert_state() - Converts the peer's view of the cluster state to our point of view
3838 * @ps: The state as seen by the peer.
3839 */
3840static union drbd_state convert_state(union drbd_state ps)
3841{
3842 union drbd_state ms;
3843
3844 static enum drbd_conns c_tab[] = {
Philipp Reisner369bea62011-07-06 23:04:44 +02003845 [C_WF_REPORT_PARAMS] = C_WF_REPORT_PARAMS,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003846 [C_CONNECTED] = C_CONNECTED,
3847
3848 [C_STARTING_SYNC_S] = C_STARTING_SYNC_T,
3849 [C_STARTING_SYNC_T] = C_STARTING_SYNC_S,
3850 [C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */
3851 [C_VERIFY_S] = C_VERIFY_T,
3852 [C_MASK] = C_MASK,
3853 };
3854
3855 ms.i = ps.i;
3856
3857 ms.conn = c_tab[ps.conn];
3858 ms.peer = ps.role;
3859 ms.role = ps.peer;
3860 ms.pdsk = ps.disk;
3861 ms.disk = ps.pdsk;
3862 ms.peer_isp = (ps.aftr_isp | ps.user_isp);
3863
3864 return ms;
3865}
3866
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003867static int receive_req_state(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003868{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003869 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003870 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003871 struct p_req_state *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003872 union drbd_state mask, val;
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +01003873 enum drbd_state_rv rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003874
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003875 peer_device = conn_peer_device(connection, pi->vnr);
3876 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003877 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003878 device = peer_device->device;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003879
Philipp Reisnerb411b362009-09-25 16:07:19 -07003880 mask.i = be32_to_cpu(p->mask);
3881 val.i = be32_to_cpu(p->val);
3882
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003883 if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags) &&
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003884 mutex_is_locked(device->state_mutex)) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003885 drbd_send_sr_reply(peer_device, SS_CONCURRENT_ST_CHG);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003886 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003887 }
3888
3889 mask = convert_state(mask);
3890 val = convert_state(val);
3891
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003892 rv = drbd_change_state(device, CS_VERBOSE, mask, val);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003893 drbd_send_sr_reply(peer_device, rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003894
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003895 drbd_md_sync(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003896
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003897 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003898}
3899
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003900static int receive_req_conn_state(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003901{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003902 struct p_req_state *p = pi->data;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003903 union drbd_state mask, val;
3904 enum drbd_state_rv rv;
3905
3906 mask.i = be32_to_cpu(p->mask);
3907 val.i = be32_to_cpu(p->val);
3908
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003909 if (test_bit(RESOLVE_CONFLICTS, &connection->flags) &&
3910 mutex_is_locked(&connection->cstate_mutex)) {
3911 conn_send_sr_reply(connection, SS_CONCURRENT_ST_CHG);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003912 return 0;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003913 }
3914
3915 mask = convert_state(mask);
3916 val = convert_state(val);
3917
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003918 rv = conn_request_state(connection, mask, val, CS_VERBOSE | CS_LOCAL_ONLY | CS_IGN_OUTD_FAIL);
3919 conn_send_sr_reply(connection, rv);
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003920
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003921 return 0;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003922}
3923
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003924static int receive_state(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003925{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003926 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003927 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003928 struct p_state *p = pi->data;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003929 union drbd_state os, ns, peer_state;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003930 enum drbd_disk_state real_peer_disk;
Philipp Reisner65d922c2010-06-16 16:18:09 +02003931 enum chg_state_flags cs_flags;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003932 int rv;
3933
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003934 peer_device = conn_peer_device(connection, pi->vnr);
3935 if (!peer_device)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003936 return config_unknown_volume(connection, pi);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003937 device = peer_device->device;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003938
Philipp Reisnerb411b362009-09-25 16:07:19 -07003939 peer_state.i = be32_to_cpu(p->state);
3940
3941 real_peer_disk = peer_state.disk;
3942 if (peer_state.disk == D_NEGOTIATING) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003943 real_peer_disk = device->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003944 drbd_info(device, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003945 }
3946
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003947 spin_lock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003948 retry:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003949 os = ns = drbd_read_state(device);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003950 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003951
Lars Ellenberg545752d2011-12-05 14:39:25 +01003952 /* If some other part of the code (asender thread, timeout)
3953 * already decided to close the connection again,
3954 * we must not "re-establish" it here. */
3955 if (os.conn <= C_TEAR_DOWN)
Lars Ellenberg58ffa582012-07-26 14:09:49 +02003956 return -ECONNRESET;
Lars Ellenberg545752d2011-12-05 14:39:25 +01003957
Lars Ellenberg40424e42011-09-26 15:24:56 +02003958 /* If this is the "end of sync" confirmation, usually the peer disk
3959 * transitions from D_INCONSISTENT to D_UP_TO_DATE. For empty (0 bits
3960 * set) resync started in PausedSyncT, or if the timing of pause-/
3961 * unpause-sync events has been "just right", the peer disk may
3962 * transition from D_CONSISTENT to D_UP_TO_DATE as well.
3963 */
3964 if ((os.pdsk == D_INCONSISTENT || os.pdsk == D_CONSISTENT) &&
3965 real_peer_disk == D_UP_TO_DATE &&
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003966 os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
3967 /* If we are (becoming) SyncSource, but peer is still in sync
3968 * preparation, ignore its uptodate-ness to avoid flapping, it
3969 * will change to inconsistent once the peer reaches active
3970 * syncing states.
3971 * It may have changed syncer-paused flags, however, so we
3972 * cannot ignore this completely. */
3973 if (peer_state.conn > C_CONNECTED &&
3974 peer_state.conn < C_SYNC_SOURCE)
3975 real_peer_disk = D_INCONSISTENT;
3976
3977 /* if peer_state changes to connected at the same time,
3978 * it explicitly notifies us that it finished resync.
3979 * Maybe we should finish it up, too? */
3980 else if (os.conn >= C_SYNC_SOURCE &&
3981 peer_state.conn == C_CONNECTED) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003982 if (drbd_bm_total_weight(device) <= device->rs_failed)
3983 drbd_resync_finished(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003984 return 0;
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003985 }
3986 }
3987
Lars Ellenberg02b91b52012-06-28 18:26:52 +02003988 /* explicit verify finished notification, stop sector reached. */
3989 if (os.conn == C_VERIFY_T && os.disk == D_UP_TO_DATE &&
3990 peer_state.conn == C_CONNECTED && real_peer_disk == D_UP_TO_DATE) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003991 ov_out_of_sync_print(device);
3992 drbd_resync_finished(device);
Lars Ellenberg58ffa582012-07-26 14:09:49 +02003993 return 0;
Lars Ellenberg02b91b52012-06-28 18:26:52 +02003994 }
3995
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003996 /* peer says his disk is inconsistent, while we think it is uptodate,
3997 * and this happens while the peer still thinks we have a sync going on,
3998 * but we think we are already done with the sync.
3999 * We ignore this to avoid flapping pdsk.
4000 * This should not happen, if the peer is a recent version of drbd. */
4001 if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT &&
4002 os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE)
4003 real_peer_disk = D_UP_TO_DATE;
4004
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004005 if (ns.conn == C_WF_REPORT_PARAMS)
4006 ns.conn = C_CONNECTED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004007
Philipp Reisner67531712010-10-27 12:21:30 +02004008 if (peer_state.conn == C_AHEAD)
4009 ns.conn = C_BEHIND;
4010
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004011 if (device->p_uuid && peer_state.disk >= D_NEGOTIATING &&
4012 get_ldev_if_state(device, D_NEGOTIATING)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004013 int cr; /* consider resync */
4014
4015 /* if we established a new connection */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004016 cr = (os.conn < C_CONNECTED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004017 /* if we had an established connection
4018 * and one of the nodes newly attaches a disk */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004019 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07004020 (peer_state.disk == D_NEGOTIATING ||
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004021 os.disk == D_NEGOTIATING));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004022 /* if we have both been inconsistent, and the peer has been
4023 * forced to be UpToDate with --overwrite-data */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004024 cr |= test_bit(CONSIDER_RESYNC, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004025 /* if we had been plain connected, and the admin requested to
4026 * start a sync by "invalidate" or "invalidate-remote" */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004027 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07004028 (peer_state.conn >= C_STARTING_SYNC_S &&
4029 peer_state.conn <= C_WF_BITMAP_T));
4030
4031 if (cr)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004032 ns.conn = drbd_sync_handshake(peer_device, peer_state.role, real_peer_disk);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004033
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004034 put_ldev(device);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004035 if (ns.conn == C_MASK) {
4036 ns.conn = C_CONNECTED;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004037 if (device->state.disk == D_NEGOTIATING) {
4038 drbd_force_state(device, NS(disk, D_FAILED));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004039 } else if (peer_state.disk == D_NEGOTIATING) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004040 drbd_err(device, "Disk attach process on the peer node was aborted.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004041 peer_state.disk = D_DISKLESS;
Lars Ellenberg580b9762010-02-26 23:15:23 +01004042 real_peer_disk = D_DISKLESS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004043 } else {
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004044 if (test_and_clear_bit(CONN_DRY_RUN, &peer_device->connection->flags))
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004045 return -EIO;
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02004046 D_ASSERT(device, os.conn == C_WF_REPORT_PARAMS);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004047 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004048 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004049 }
4050 }
4051 }
4052
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004053 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004054 if (os.i != drbd_read_state(device).i)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004055 goto retry;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004056 clear_bit(CONSIDER_RESYNC, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004057 ns.peer = peer_state.role;
4058 ns.pdsk = real_peer_disk;
4059 ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004060 if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004061 ns.disk = device->new_state_tmp.disk;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004062 cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004063 if (ns.pdsk == D_CONSISTENT && drbd_suspended(device) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
4064 test_bit(NEW_CUR_UUID, &device->flags)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004065 /* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this
Philipp Reisner481c6f52010-06-22 14:03:27 +02004066 for temporal network outages! */
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004067 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004068 drbd_err(device, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004069 tl_clear(peer_device->connection);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004070 drbd_uuid_new_current(device);
4071 clear_bit(NEW_CUR_UUID, &device->flags);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004072 conn_request_state(peer_device->connection, NS2(conn, C_PROTOCOL_ERROR, susp, 0), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004073 return -EIO;
Philipp Reisner481c6f52010-06-22 14:03:27 +02004074 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004075 rv = _drbd_set_state(device, ns, cs_flags, NULL);
4076 ns = drbd_read_state(device);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004077 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004078
4079 if (rv < SS_SUCCESS) {
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004080 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004081 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004082 }
4083
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004084 if (os.conn > C_WF_REPORT_PARAMS) {
4085 if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07004086 peer_state.disk != D_NEGOTIATING ) {
4087 /* we want resync, peer has not yet decided to sync... */
4088 /* Nowadays only used when forcing a node into primary role and
4089 setting its disk to UpToDate with that */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004090 drbd_send_uuids(peer_device);
4091 drbd_send_current_state(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004092 }
4093 }
4094
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004095 clear_bit(DISCARD_MY_DATA, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004096
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004097 drbd_md_sync(device); /* update connected indicator, la_size_sect, ... */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004098
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004099 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004100}
4101
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004102static int receive_sync_uuid(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004103{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004104 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004105 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004106 struct p_rs_uuid *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004107
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004108 peer_device = conn_peer_device(connection, pi->vnr);
4109 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004110 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004111 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004112
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004113 wait_event(device->misc_wait,
4114 device->state.conn == C_WF_SYNC_UUID ||
4115 device->state.conn == C_BEHIND ||
4116 device->state.conn < C_CONNECTED ||
4117 device->state.disk < D_NEGOTIATING);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004118
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02004119 /* D_ASSERT(device, device->state.conn == C_WF_SYNC_UUID ); */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004120
Philipp Reisnerb411b362009-09-25 16:07:19 -07004121 /* Here the _drbd_uuid_ functions are right, current should
4122 _not_ be rotated into the history */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004123 if (get_ldev_if_state(device, D_NEGOTIATING)) {
4124 _drbd_uuid_set(device, UI_CURRENT, be64_to_cpu(p->uuid));
4125 _drbd_uuid_set(device, UI_BITMAP, 0UL);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004126
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004127 drbd_print_uuids(device, "updated sync uuid");
4128 drbd_start_resync(device, C_SYNC_TARGET);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004129
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004130 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004131 } else
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004132 drbd_err(device, "Ignoring SyncUUID packet!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004133
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004134 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004135}
4136
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004137/**
4138 * receive_bitmap_plain
4139 *
4140 * Return 0 when done, 1 when another iteration is needed, and a negative error
4141 * code upon failure.
4142 */
4143static int
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004144receive_bitmap_plain(struct drbd_peer_device *peer_device, unsigned int size,
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004145 unsigned long *p, struct bm_xfer_ctx *c)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004146{
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004147 unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE -
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004148 drbd_header_size(peer_device->connection);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004149 unsigned int num_words = min_t(size_t, data_size / sizeof(*p),
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004150 c->bm_words - c->word_offset);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004151 unsigned int want = num_words * sizeof(*p);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004152 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004153
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004154 if (want != size) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004155 drbd_err(peer_device, "%s:want (%u) != size (%u)\n", __func__, want, size);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004156 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004157 }
4158 if (want == 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004159 return 0;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004160 err = drbd_recv_all(peer_device->connection, p, want);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004161 if (err)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004162 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004163
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004164 drbd_bm_merge_lel(peer_device->device, c->word_offset, num_words, p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004165
4166 c->word_offset += num_words;
4167 c->bit_offset = c->word_offset * BITS_PER_LONG;
4168 if (c->bit_offset > c->bm_bits)
4169 c->bit_offset = c->bm_bits;
4170
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004171 return 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004172}
4173
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004174static enum drbd_bitmap_code dcbp_get_code(struct p_compressed_bm *p)
4175{
4176 return (enum drbd_bitmap_code)(p->encoding & 0x0f);
4177}
4178
4179static int dcbp_get_start(struct p_compressed_bm *p)
4180{
4181 return (p->encoding & 0x80) != 0;
4182}
4183
4184static int dcbp_get_pad_bits(struct p_compressed_bm *p)
4185{
4186 return (p->encoding >> 4) & 0x7;
4187}
4188
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004189/**
4190 * recv_bm_rle_bits
4191 *
4192 * Return 0 when done, 1 when another iteration is needed, and a negative error
4193 * code upon failure.
4194 */
4195static int
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004196recv_bm_rle_bits(struct drbd_peer_device *peer_device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07004197 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01004198 struct bm_xfer_ctx *c,
4199 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004200{
4201 struct bitstream bs;
4202 u64 look_ahead;
4203 u64 rl;
4204 u64 tmp;
4205 unsigned long s = c->bit_offset;
4206 unsigned long e;
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004207 int toggle = dcbp_get_start(p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004208 int have;
4209 int bits;
4210
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004211 bitstream_init(&bs, p->code, len, dcbp_get_pad_bits(p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004212
4213 bits = bitstream_get_bits(&bs, &look_ahead, 64);
4214 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004215 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004216
4217 for (have = bits; have > 0; s += rl, toggle = !toggle) {
4218 bits = vli_decode_bits(&rl, look_ahead);
4219 if (bits <= 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004220 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004221
4222 if (toggle) {
4223 e = s + rl -1;
4224 if (e >= c->bm_bits) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004225 drbd_err(peer_device, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004226 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004227 }
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004228 _drbd_bm_set_bits(peer_device->device, s, e);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004229 }
4230
4231 if (have < bits) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004232 drbd_err(peer_device, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07004233 have, bits, look_ahead,
4234 (unsigned int)(bs.cur.b - p->code),
4235 (unsigned int)bs.buf_len);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004236 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004237 }
Lars Ellenbergd2da5b02013-10-23 10:59:18 +02004238 /* if we consumed all 64 bits, assign 0; >> 64 is "undefined"; */
4239 if (likely(bits < 64))
4240 look_ahead >>= bits;
4241 else
4242 look_ahead = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004243 have -= bits;
4244
4245 bits = bitstream_get_bits(&bs, &tmp, 64 - have);
4246 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004247 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004248 look_ahead |= tmp << have;
4249 have += bits;
4250 }
4251
4252 c->bit_offset = s;
4253 bm_xfer_ctx_bit_to_word_offset(c);
4254
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004255 return (s != c->bm_bits);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004256}
4257
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004258/**
4259 * decode_bitmap_c
4260 *
4261 * Return 0 when done, 1 when another iteration is needed, and a negative error
4262 * code upon failure.
4263 */
4264static int
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004265decode_bitmap_c(struct drbd_peer_device *peer_device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07004266 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01004267 struct bm_xfer_ctx *c,
4268 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004269{
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004270 if (dcbp_get_code(p) == RLE_VLI_Bits)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004271 return recv_bm_rle_bits(peer_device, p, c, len - sizeof(*p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004272
4273 /* other variants had been implemented for evaluation,
4274 * but have been dropped as this one turned out to be "best"
4275 * during all our tests. */
4276
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004277 drbd_err(peer_device, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
4278 conn_request_state(peer_device->connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004279 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004280}
4281
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004282void INFO_bm_xfer_stats(struct drbd_device *device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07004283 const char *direction, struct bm_xfer_ctx *c)
4284{
4285 /* what would it take to transfer it "plaintext" */
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02004286 unsigned int header_size = drbd_header_size(first_peer_device(device)->connection);
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004287 unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE - header_size;
4288 unsigned int plain =
4289 header_size * (DIV_ROUND_UP(c->bm_words, data_size) + 1) +
4290 c->bm_words * sizeof(unsigned long);
4291 unsigned int total = c->bytes[0] + c->bytes[1];
4292 unsigned int r;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004293
4294 /* total can not be zero. but just in case: */
4295 if (total == 0)
4296 return;
4297
4298 /* don't report if not compressed */
4299 if (total >= plain)
4300 return;
4301
4302 /* total < plain. check for overflow, still */
4303 r = (total > UINT_MAX/1000) ? (total / (plain/1000))
4304 : (1000 * total / plain);
4305
4306 if (r > 1000)
4307 r = 1000;
4308
4309 r = 1000 - r;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004310 drbd_info(device, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
Philipp Reisnerb411b362009-09-25 16:07:19 -07004311 "total %u; compression: %u.%u%%\n",
4312 direction,
4313 c->bytes[1], c->packets[1],
4314 c->bytes[0], c->packets[0],
4315 total, r/10, r % 10);
4316}
4317
4318/* Since we are processing the bitfield from lower addresses to higher,
4319 it does not matter if the process it in 32 bit chunks or 64 bit
4320 chunks as long as it is little endian. (Understand it as byte stream,
4321 beginning with the lowest byte...) If we would use big endian
4322 we would need to process it from the highest address to the lowest,
4323 in order to be agnostic to the 32 vs 64 bits issue.
4324
4325 returns 0 on failure, 1 if we successfully received it. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004326static int receive_bitmap(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004327{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004328 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004329 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004330 struct bm_xfer_ctx c;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004331 int err;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004332
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004333 peer_device = conn_peer_device(connection, pi->vnr);
4334 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004335 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004336 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004337
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004338 drbd_bm_lock(device, "receive bitmap", BM_LOCKED_SET_ALLOWED);
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004339 /* you are supposed to send additional out-of-sync information
4340 * if you actually set bits during this phase */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004341
Philipp Reisnerb411b362009-09-25 16:07:19 -07004342 c = (struct bm_xfer_ctx) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004343 .bm_bits = drbd_bm_bits(device),
4344 .bm_words = drbd_bm_words(device),
Philipp Reisnerb411b362009-09-25 16:07:19 -07004345 };
4346
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004347 for(;;) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004348 if (pi->cmd == P_BITMAP)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004349 err = receive_bitmap_plain(peer_device, pi->size, pi->data, &c);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004350 else if (pi->cmd == P_COMPRESSED_BITMAP) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004351 /* MAYBE: sanity check that we speak proto >= 90,
4352 * and the feature is enabled! */
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004353 struct p_compressed_bm *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004354
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004355 if (pi->size > DRBD_SOCKET_BUFFER_SIZE - drbd_header_size(connection)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004356 drbd_err(device, "ReportCBitmap packet too large\n");
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004357 err = -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004358 goto out;
4359 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004360 if (pi->size <= sizeof(*p)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004361 drbd_err(device, "ReportCBitmap packet too small (l:%u)\n", pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004362 err = -EIO;
Andreas Gruenbacher78fcbda2010-12-10 22:18:27 +01004363 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004364 }
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004365 err = drbd_recv_all(peer_device->connection, p, pi->size);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004366 if (err)
4367 goto out;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004368 err = decode_bitmap_c(peer_device, p, &c, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004369 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004370 drbd_warn(device, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", pi->cmd);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004371 err = -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004372 goto out;
4373 }
4374
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004375 c.packets[pi->cmd == P_BITMAP]++;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004376 c.bytes[pi->cmd == P_BITMAP] += drbd_header_size(connection) + pi->size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004377
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004378 if (err <= 0) {
4379 if (err < 0)
4380 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004381 break;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004382 }
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004383 err = drbd_recv_header(peer_device->connection, pi);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004384 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004385 goto out;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004386 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004387
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004388 INFO_bm_xfer_stats(device, "receive", &c);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004389
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004390 if (device->state.conn == C_WF_BITMAP_T) {
Andreas Gruenbacherde1f8e42010-12-10 21:04:00 +01004391 enum drbd_state_rv rv;
4392
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004393 err = drbd_send_bitmap(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004394 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004395 goto out;
4396 /* Omit CS_ORDERED with this state transition to avoid deadlocks. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004397 rv = _drbd_request_state(device, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02004398 D_ASSERT(device, rv == SS_SUCCESS);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004399 } else if (device->state.conn != C_WF_BITMAP_S) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004400 /* admin may have requested C_DISCONNECTING,
4401 * other threads may have noticed network errors */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004402 drbd_info(device, "unexpected cstate (%s) in receive_bitmap\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004403 drbd_conn_str(device->state.conn));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004404 }
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004405 err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004406
Philipp Reisnerb411b362009-09-25 16:07:19 -07004407 out:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004408 drbd_bm_unlock(device);
4409 if (!err && device->state.conn == C_WF_BITMAP_S)
4410 drbd_start_resync(device, C_SYNC_SOURCE);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004411 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004412}
4413
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004414static int receive_skip(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004415{
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004416 drbd_warn(connection, "skipping unknown optional packet type %d, l: %d!\n",
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004417 pi->cmd, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004418
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004419 return ignore_remaining_packet(connection, pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004420}
4421
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004422static int receive_UnplugRemote(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004423{
Philipp Reisnerb411b362009-09-25 16:07:19 -07004424 /* Make sure we've acked all the TCP data associated
4425 * with the data requests being unplugged */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004426 drbd_tcp_quickack(connection->data.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004427
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004428 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004429}
4430
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004431static int receive_out_of_sync(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisner73a01a12010-10-27 14:33:00 +02004432{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004433 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004434 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004435 struct p_block_desc *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004436
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004437 peer_device = conn_peer_device(connection, pi->vnr);
4438 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004439 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004440 device = peer_device->device;
Philipp Reisner73a01a12010-10-27 14:33:00 +02004441
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004442 switch (device->state.conn) {
Lars Ellenbergf735e3632010-12-17 21:06:18 +01004443 case C_WF_SYNC_UUID:
4444 case C_WF_BITMAP_T:
4445 case C_BEHIND:
4446 break;
4447 default:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004448 drbd_err(device, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004449 drbd_conn_str(device->state.conn));
Lars Ellenbergf735e3632010-12-17 21:06:18 +01004450 }
4451
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004452 drbd_set_out_of_sync(device, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
Philipp Reisner73a01a12010-10-27 14:33:00 +02004453
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004454 return 0;
Philipp Reisner73a01a12010-10-27 14:33:00 +02004455}
4456
Philipp Reisner02918be2010-08-20 14:35:10 +02004457struct data_cmd {
4458 int expect_payload;
4459 size_t pkt_size;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004460 int (*fn)(struct drbd_connection *, struct packet_info *);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004461};
4462
Philipp Reisner02918be2010-08-20 14:35:10 +02004463static struct data_cmd drbd_cmd_handler[] = {
4464 [P_DATA] = { 1, sizeof(struct p_data), receive_Data },
4465 [P_DATA_REPLY] = { 1, sizeof(struct p_data), receive_DataReply },
4466 [P_RS_DATA_REPLY] = { 1, sizeof(struct p_data), receive_RSDataReply } ,
4467 [P_BARRIER] = { 0, sizeof(struct p_barrier), receive_Barrier } ,
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004468 [P_BITMAP] = { 1, 0, receive_bitmap } ,
4469 [P_COMPRESSED_BITMAP] = { 1, 0, receive_bitmap } ,
4470 [P_UNPLUG_REMOTE] = { 0, 0, receive_UnplugRemote },
Philipp Reisner02918be2010-08-20 14:35:10 +02004471 [P_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
4472 [P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004473 [P_SYNC_PARAM] = { 1, 0, receive_SyncParam },
4474 [P_SYNC_PARAM89] = { 1, 0, receive_SyncParam },
Philipp Reisner02918be2010-08-20 14:35:10 +02004475 [P_PROTOCOL] = { 1, sizeof(struct p_protocol), receive_protocol },
4476 [P_UUIDS] = { 0, sizeof(struct p_uuids), receive_uuids },
4477 [P_SIZES] = { 0, sizeof(struct p_sizes), receive_sizes },
4478 [P_STATE] = { 0, sizeof(struct p_state), receive_state },
4479 [P_STATE_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_state },
4480 [P_SYNC_UUID] = { 0, sizeof(struct p_rs_uuid), receive_sync_uuid },
4481 [P_OV_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
4482 [P_OV_REPLY] = { 1, sizeof(struct p_block_req), receive_DataRequest },
4483 [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest },
4484 [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip },
Philipp Reisner73a01a12010-10-27 14:33:00 +02004485 [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004486 [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_conn_state },
Philipp Reisner036b17e2011-05-16 17:38:11 +02004487 [P_PROTOCOL_UPDATE] = { 1, sizeof(struct p_protocol), receive_protocol },
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02004488 [P_TRIM] = { 0, sizeof(struct p_trim), receive_Data },
Philipp Reisner02918be2010-08-20 14:35:10 +02004489};
4490
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004491static void drbdd(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004492{
Philipp Reisner77351055b2011-02-07 17:24:26 +01004493 struct packet_info pi;
Philipp Reisner02918be2010-08-20 14:35:10 +02004494 size_t shs; /* sub header size */
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004495 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004496
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004497 while (get_t_state(&connection->receiver) == RUNNING) {
Andreas Gruenbacherdeebe192011-03-25 00:01:04 +01004498 struct data_cmd *cmd;
4499
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004500 drbd_thread_current_set_cpu(&connection->receiver);
4501 if (drbd_recv_header(connection, &pi))
Philipp Reisner02918be2010-08-20 14:35:10 +02004502 goto err_out;
4503
Andreas Gruenbacherdeebe192011-03-25 00:01:04 +01004504 cmd = &drbd_cmd_handler[pi.cmd];
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004505 if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) || !cmd->fn)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004506 drbd_err(connection, "Unexpected data packet %s (0x%04x)",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004507 cmdname(pi.cmd), pi.cmd);
Philipp Reisner02918be2010-08-20 14:35:10 +02004508 goto err_out;
Lars Ellenberg0b33a912009-11-16 15:58:04 +01004509 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004510
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004511 shs = cmd->pkt_size;
4512 if (pi.size > shs && !cmd->expect_payload) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004513 drbd_err(connection, "No payload expected %s l:%d\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004514 cmdname(pi.cmd), pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02004515 goto err_out;
4516 }
4517
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02004518 if (shs) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004519 err = drbd_recv_all_warn(connection, pi.data, shs);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004520 if (err)
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02004521 goto err_out;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004522 pi.size -= shs;
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02004523 }
4524
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004525 err = cmd->fn(connection, &pi);
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004526 if (err) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004527 drbd_err(connection, "error receiving %s, e: %d l: %d!\n",
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004528 cmdname(pi.cmd), err, pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02004529 goto err_out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004530 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004531 }
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004532 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004533
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004534 err_out:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004535 conn_request_state(connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004536}
4537
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004538static void conn_disconnect(struct drbd_connection *connection)
Philipp Reisnerf70b35112010-06-24 14:34:40 +02004539{
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02004540 struct drbd_peer_device *peer_device;
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004541 enum drbd_conns oc;
Philipp Reisner376694a2011-11-07 10:54:28 +01004542 int vnr;
Philipp Reisnerf70b35112010-06-24 14:34:40 +02004543
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004544 if (connection->cstate == C_STANDALONE)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004545 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004546
Lars Ellenberg545752d2011-12-05 14:39:25 +01004547 /* We are about to start the cleanup after connection loss.
4548 * Make sure drbd_make_request knows about that.
4549 * Usually we should be in some network failure state already,
4550 * but just in case we are not, we fix it up here.
4551 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004552 conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
Lars Ellenberg545752d2011-12-05 14:39:25 +01004553
Philipp Reisnerb411b362009-09-25 16:07:19 -07004554 /* asender does not clean up anything. it must not interfere, either */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004555 drbd_thread_stop(&connection->asender);
4556 drbd_free_sock(connection);
Philipp Reisner360cc742011-02-08 14:29:53 +01004557
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02004558 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02004559 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
4560 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004561 kref_get(&device->kref);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02004562 rcu_read_unlock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004563 drbd_disconnected(peer_device);
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02004564 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02004565 rcu_read_lock();
4566 }
4567 rcu_read_unlock();
4568
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004569 if (!list_empty(&connection->current_epoch->list))
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004570 drbd_err(connection, "ASSERTION FAILED: connection->current_epoch->list not empty\n");
Philipp Reisner12038a32011-11-09 19:18:00 +01004571 /* ok, no more ee's on the fly, it is safe to reset the epoch_size */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004572 atomic_set(&connection->current_epoch->epoch_size, 0);
4573 connection->send.seen_any_write_yet = false;
Philipp Reisner12038a32011-11-09 19:18:00 +01004574
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004575 drbd_info(connection, "Connection closed\n");
Philipp Reisner360cc742011-02-08 14:29:53 +01004576
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004577 if (conn_highest_role(connection) == R_PRIMARY && conn_highest_pdsk(connection) >= D_UNKNOWN)
4578 conn_try_outdate_peer_async(connection);
Philipp Reisnercb703452011-03-24 11:03:07 +01004579
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004580 spin_lock_irq(&connection->resource->req_lock);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004581 oc = connection->cstate;
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004582 if (oc >= C_UNCONNECTED)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004583 _conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE);
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004584
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004585 spin_unlock_irq(&connection->resource->req_lock);
Philipp Reisner360cc742011-02-08 14:29:53 +01004586
Lars Ellenbergf3dfa402011-05-02 10:45:05 +02004587 if (oc == C_DISCONNECTING)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004588 conn_request_state(connection, NS(conn, C_STANDALONE), CS_VERBOSE | CS_HARD);
Philipp Reisner360cc742011-02-08 14:29:53 +01004589}
4590
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004591static int drbd_disconnected(struct drbd_peer_device *peer_device)
Philipp Reisner360cc742011-02-08 14:29:53 +01004592{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004593 struct drbd_device *device = peer_device->device;
Philipp Reisner360cc742011-02-08 14:29:53 +01004594 unsigned int i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004595
Philipp Reisner85719572010-07-21 10:20:17 +02004596 /* wait for current activity to cease. */
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004597 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004598 _drbd_wait_ee_list_empty(device, &device->active_ee);
4599 _drbd_wait_ee_list_empty(device, &device->sync_ee);
4600 _drbd_wait_ee_list_empty(device, &device->read_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004601 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004602
4603 /* We do not have data structures that would allow us to
4604 * get the rs_pending_cnt down to 0 again.
4605 * * On C_SYNC_TARGET we do not have any data structures describing
4606 * the pending RSDataRequest's we have sent.
4607 * * On C_SYNC_SOURCE there is no data structure that tracks
4608 * the P_RS_DATA_REPLY blocks that we sent to the SyncTarget.
4609 * And no, it is not the sum of the reference counts in the
4610 * resync_LRU. The resync_LRU tracks the whole operation including
4611 * the disk-IO, while the rs_pending_cnt only tracks the blocks
4612 * on the fly. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004613 drbd_rs_cancel_all(device);
4614 device->rs_total = 0;
4615 device->rs_failed = 0;
4616 atomic_set(&device->rs_pending_cnt, 0);
4617 wake_up(&device->misc_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004618
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004619 del_timer_sync(&device->resync_timer);
4620 resync_timer_fn((unsigned long)device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004621
Philipp Reisnerb411b362009-09-25 16:07:19 -07004622 /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
4623 * w_make_resync_request etc. which may still be on the worker queue
4624 * to be "canceled" */
Andreas Gruenbacherb5043c52011-07-28 15:56:02 +02004625 drbd_flush_workqueue(&peer_device->connection->sender_work);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004626
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004627 drbd_finish_peer_reqs(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004628
Philipp Reisnerd10b4ea2011-11-30 23:25:36 +01004629 /* This second workqueue flush is necessary, since drbd_finish_peer_reqs()
4630 might have issued a work again. The one before drbd_finish_peer_reqs() is
4631 necessary to reclain net_ee in drbd_finish_peer_reqs(). */
Andreas Gruenbacherb5043c52011-07-28 15:56:02 +02004632 drbd_flush_workqueue(&peer_device->connection->sender_work);
Philipp Reisnerd10b4ea2011-11-30 23:25:36 +01004633
Lars Ellenberg08332d72012-08-17 15:09:13 +02004634 /* need to do it again, drbd_finish_peer_reqs() may have populated it
4635 * again via drbd_try_clear_on_disk_bm(). */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004636 drbd_rs_cancel_all(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004637
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004638 kfree(device->p_uuid);
4639 device->p_uuid = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004640
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004641 if (!drbd_suspended(device))
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004642 tl_clear(peer_device->connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004643
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004644 drbd_md_sync(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004645
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004646 /* serialize with bitmap writeout triggered by the state change,
4647 * if any. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004648 wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004649
Philipp Reisnerb411b362009-09-25 16:07:19 -07004650 /* tcp_close and release of sendpage pages can be deferred. I don't
4651 * want to use SO_LINGER, because apparently it can be deferred for
4652 * more than 20 seconds (longest time I checked).
4653 *
4654 * Actually we don't care for exactly when the network stack does its
4655 * put_page(), but release our reference on these pages right here.
4656 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004657 i = drbd_free_peer_reqs(device, &device->net_ee);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004658 if (i)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004659 drbd_info(device, "net_ee not empty, killed %u entries\n", i);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004660 i = atomic_read(&device->pp_in_use_by_net);
Lars Ellenberg435f0742010-09-06 12:30:25 +02004661 if (i)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004662 drbd_info(device, "pp_in_use_by_net = %d, expected 0\n", i);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004663 i = atomic_read(&device->pp_in_use);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004664 if (i)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004665 drbd_info(device, "pp_in_use = %d, expected 0\n", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004666
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02004667 D_ASSERT(device, list_empty(&device->read_ee));
4668 D_ASSERT(device, list_empty(&device->active_ee));
4669 D_ASSERT(device, list_empty(&device->sync_ee));
4670 D_ASSERT(device, list_empty(&device->done_ee));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004671
Philipp Reisner360cc742011-02-08 14:29:53 +01004672 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004673}
4674
4675/*
4676 * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version
4677 * we can agree on is stored in agreed_pro_version.
4678 *
4679 * feature flags and the reserved array should be enough room for future
4680 * enhancements of the handshake protocol, and possible plugins...
4681 *
4682 * for now, they are expected to be zero, but ignored.
4683 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004684static int drbd_send_features(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004685{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004686 struct drbd_socket *sock;
4687 struct p_connection_features *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004688
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004689 sock = &connection->data;
4690 p = conn_prepare_command(connection, sock);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004691 if (!p)
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004692 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004693 memset(p, 0, sizeof(*p));
4694 p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
4695 p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
Lars Ellenberg20c68fd2014-04-28 18:43:25 +02004696 p->feature_flags = cpu_to_be32(PRO_FEATURES);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004697 return conn_send_command(connection, sock, P_CONNECTION_FEATURES, sizeof(*p), NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004698}
4699
4700/*
4701 * return values:
4702 * 1 yes, we have a valid connection
4703 * 0 oops, did not work out, please try again
4704 * -1 peer talks different language,
4705 * no point in trying again, please go standalone.
4706 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004707static int drbd_do_features(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004708{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004709 /* ASSERT current == connection->receiver ... */
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004710 struct p_connection_features *p;
4711 const int expect = sizeof(struct p_connection_features);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004712 struct packet_info pi;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004713 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004714
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004715 err = drbd_send_features(connection);
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004716 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004717 return 0;
4718
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004719 err = drbd_recv_header(connection, &pi);
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004720 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004721 return 0;
4722
Andreas Gruenbacher60381782011-03-28 17:05:50 +02004723 if (pi.cmd != P_CONNECTION_FEATURES) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004724 drbd_err(connection, "expected ConnectionFeatures packet, received: %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004725 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004726 return -1;
4727 }
4728
Philipp Reisner77351055b2011-02-07 17:24:26 +01004729 if (pi.size != expect) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004730 drbd_err(connection, "expected ConnectionFeatures length: %u, received: %u\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004731 expect, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004732 return -1;
4733 }
4734
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004735 p = pi.data;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004736 err = drbd_recv_all_warn(connection, p, expect);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004737 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004738 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004739
Philipp Reisnerb411b362009-09-25 16:07:19 -07004740 p->protocol_min = be32_to_cpu(p->protocol_min);
4741 p->protocol_max = be32_to_cpu(p->protocol_max);
4742 if (p->protocol_max == 0)
4743 p->protocol_max = p->protocol_min;
4744
4745 if (PRO_VERSION_MAX < p->protocol_min ||
4746 PRO_VERSION_MIN > p->protocol_max)
4747 goto incompat;
4748
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004749 connection->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
Lars Ellenberg20c68fd2014-04-28 18:43:25 +02004750 connection->agreed_features = PRO_FEATURES & be32_to_cpu(p->feature_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004751
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004752 drbd_info(connection, "Handshake successful: "
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004753 "Agreed network protocol version %d\n", connection->agreed_pro_version);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004754
Lars Ellenberg20c68fd2014-04-28 18:43:25 +02004755 drbd_info(connection, "Agreed to%ssupport TRIM on protocol level\n",
4756 connection->agreed_features & FF_TRIM ? " " : " not ");
4757
Philipp Reisnerb411b362009-09-25 16:07:19 -07004758 return 1;
4759
4760 incompat:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004761 drbd_err(connection, "incompatible DRBD dialects: "
Philipp Reisnerb411b362009-09-25 16:07:19 -07004762 "I support %d-%d, peer supports %d-%d\n",
4763 PRO_VERSION_MIN, PRO_VERSION_MAX,
4764 p->protocol_min, p->protocol_max);
4765 return -1;
4766}
4767
4768#if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004769static int drbd_do_auth(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004770{
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004771 drbd_err(connection, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
4772 drbd_err(connection, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004773 return -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004774}
4775#else
4776#define CHALLENGE_LEN 64
Johannes Thomab10d96c2010-01-07 16:02:50 +01004777
4778/* Return value:
4779 1 - auth succeeded,
4780 0 - failed, try again (network error),
4781 -1 - auth failed, don't try again.
4782*/
4783
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004784static int drbd_do_auth(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004785{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004786 struct drbd_socket *sock;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004787 char my_challenge[CHALLENGE_LEN]; /* 64 Bytes... */
4788 struct scatterlist sg;
4789 char *response = NULL;
4790 char *right_response = NULL;
4791 char *peers_ch = NULL;
Philipp Reisner44ed1672011-04-19 17:10:19 +02004792 unsigned int key_len;
4793 char secret[SHARED_SECRET_MAX]; /* 64 byte */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004794 unsigned int resp_size;
4795 struct hash_desc desc;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004796 struct packet_info pi;
Philipp Reisner44ed1672011-04-19 17:10:19 +02004797 struct net_conf *nc;
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004798 int err, rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004799
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004800 /* FIXME: Put the challenge/response into the preallocated socket buffer. */
4801
Philipp Reisner44ed1672011-04-19 17:10:19 +02004802 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004803 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +02004804 key_len = strlen(nc->shared_secret);
4805 memcpy(secret, nc->shared_secret, key_len);
4806 rcu_read_unlock();
4807
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004808 desc.tfm = connection->cram_hmac_tfm;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004809 desc.flags = 0;
4810
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004811 rv = crypto_hash_setkey(connection->cram_hmac_tfm, (u8 *)secret, key_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004812 if (rv) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004813 drbd_err(connection, "crypto_hash_setkey() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004814 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004815 goto fail;
4816 }
4817
4818 get_random_bytes(my_challenge, CHALLENGE_LEN);
4819
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004820 sock = &connection->data;
4821 if (!conn_prepare_command(connection, sock)) {
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004822 rv = 0;
4823 goto fail;
4824 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004825 rv = !conn_send_command(connection, sock, P_AUTH_CHALLENGE, 0,
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004826 my_challenge, CHALLENGE_LEN);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004827 if (!rv)
4828 goto fail;
4829
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004830 err = drbd_recv_header(connection, &pi);
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004831 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004832 rv = 0;
4833 goto fail;
4834 }
4835
Philipp Reisner77351055b2011-02-07 17:24:26 +01004836 if (pi.cmd != P_AUTH_CHALLENGE) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004837 drbd_err(connection, "expected AuthChallenge packet, received: %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004838 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004839 rv = 0;
4840 goto fail;
4841 }
4842
Philipp Reisner77351055b2011-02-07 17:24:26 +01004843 if (pi.size > CHALLENGE_LEN * 2) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004844 drbd_err(connection, "expected AuthChallenge payload too big.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004845 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004846 goto fail;
4847 }
4848
Philipp Reisner67cca282014-04-28 18:43:30 +02004849 if (pi.size < CHALLENGE_LEN) {
4850 drbd_err(connection, "AuthChallenge payload too small.\n");
4851 rv = -1;
4852 goto fail;
4853 }
4854
Philipp Reisner77351055b2011-02-07 17:24:26 +01004855 peers_ch = kmalloc(pi.size, GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004856 if (peers_ch == NULL) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004857 drbd_err(connection, "kmalloc of peers_ch failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004858 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004859 goto fail;
4860 }
4861
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004862 err = drbd_recv_all_warn(connection, peers_ch, pi.size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004863 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004864 rv = 0;
4865 goto fail;
4866 }
4867
Philipp Reisner67cca282014-04-28 18:43:30 +02004868 if (!memcmp(my_challenge, peers_ch, CHALLENGE_LEN)) {
4869 drbd_err(connection, "Peer presented the same challenge!\n");
4870 rv = -1;
4871 goto fail;
4872 }
4873
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004874 resp_size = crypto_hash_digestsize(connection->cram_hmac_tfm);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004875 response = kmalloc(resp_size, GFP_NOIO);
4876 if (response == NULL) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004877 drbd_err(connection, "kmalloc of response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004878 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004879 goto fail;
4880 }
4881
4882 sg_init_table(&sg, 1);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004883 sg_set_buf(&sg, peers_ch, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004884
4885 rv = crypto_hash_digest(&desc, &sg, sg.length, response);
4886 if (rv) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004887 drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004888 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004889 goto fail;
4890 }
4891
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004892 if (!conn_prepare_command(connection, sock)) {
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004893 rv = 0;
4894 goto fail;
4895 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004896 rv = !conn_send_command(connection, sock, P_AUTH_RESPONSE, 0,
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004897 response, resp_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004898 if (!rv)
4899 goto fail;
4900
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004901 err = drbd_recv_header(connection, &pi);
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004902 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004903 rv = 0;
4904 goto fail;
4905 }
4906
Philipp Reisner77351055b2011-02-07 17:24:26 +01004907 if (pi.cmd != P_AUTH_RESPONSE) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004908 drbd_err(connection, "expected AuthResponse packet, received: %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004909 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004910 rv = 0;
4911 goto fail;
4912 }
4913
Philipp Reisner77351055b2011-02-07 17:24:26 +01004914 if (pi.size != resp_size) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004915 drbd_err(connection, "expected AuthResponse payload of wrong size\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004916 rv = 0;
4917 goto fail;
4918 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004919
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004920 err = drbd_recv_all_warn(connection, response , resp_size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004921 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004922 rv = 0;
4923 goto fail;
4924 }
4925
4926 right_response = kmalloc(resp_size, GFP_NOIO);
Julia Lawall2d1ee872009-12-27 22:27:11 +01004927 if (right_response == NULL) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004928 drbd_err(connection, "kmalloc of right_response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004929 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004930 goto fail;
4931 }
4932
4933 sg_set_buf(&sg, my_challenge, CHALLENGE_LEN);
4934
4935 rv = crypto_hash_digest(&desc, &sg, sg.length, right_response);
4936 if (rv) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004937 drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004938 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004939 goto fail;
4940 }
4941
4942 rv = !memcmp(response, right_response, resp_size);
4943
4944 if (rv)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004945 drbd_info(connection, "Peer authenticated using %d bytes HMAC\n",
Philipp Reisner44ed1672011-04-19 17:10:19 +02004946 resp_size);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004947 else
4948 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004949
4950 fail:
4951 kfree(peers_ch);
4952 kfree(response);
4953 kfree(right_response);
4954
4955 return rv;
4956}
4957#endif
4958
Andreas Gruenbacher8fe60552011-07-22 11:04:36 +02004959int drbd_receiver(struct drbd_thread *thi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004960{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004961 struct drbd_connection *connection = thi->connection;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004962 int h;
4963
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004964 drbd_info(connection, "receiver (re)started\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004965
4966 do {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004967 h = conn_connect(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004968 if (h == 0) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004969 conn_disconnect(connection);
Philipp Reisner20ee6392011-01-18 15:28:59 +01004970 schedule_timeout_interruptible(HZ);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004971 }
4972 if (h == -1) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004973 drbd_warn(connection, "Discarding network configuration.\n");
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004974 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004975 }
4976 } while (h == 0);
4977
Philipp Reisner91fd4da2011-04-20 17:47:29 +02004978 if (h > 0)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004979 drbdd(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004980
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004981 conn_disconnect(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004982
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004983 drbd_info(connection, "receiver terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004984 return 0;
4985}
4986
4987/* ********* acknowledge sender ******** */
4988
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004989static int got_conn_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004990{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004991 struct p_req_state_reply *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004992 int retcode = be32_to_cpu(p->retcode);
4993
4994 if (retcode >= SS_SUCCESS) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004995 set_bit(CONN_WD_ST_CHG_OKAY, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004996 } else {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004997 set_bit(CONN_WD_ST_CHG_FAIL, &connection->flags);
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004998 drbd_err(connection, "Requested state change failed by peer: %s (%d)\n",
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004999 drbd_set_st_err_str(retcode), retcode);
5000 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005001 wake_up(&connection->ping_wait);
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01005002
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005003 return 0;
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01005004}
5005
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005006static int got_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005007{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005008 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005009 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005010 struct p_req_state_reply *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005011 int retcode = be32_to_cpu(p->retcode);
5012
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005013 peer_device = conn_peer_device(connection, pi->vnr);
5014 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005015 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005016 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005017
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005018 if (test_bit(CONN_WD_ST_CHG_REQ, &connection->flags)) {
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02005019 D_ASSERT(device, connection->agreed_pro_version < 100);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005020 return got_conn_RqSReply(connection, pi);
Philipp Reisner4d0fc3f2012-01-20 13:52:27 +01005021 }
5022
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01005023 if (retcode >= SS_SUCCESS) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005024 set_bit(CL_ST_CHG_SUCCESS, &device->flags);
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01005025 } else {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005026 set_bit(CL_ST_CHG_FAIL, &device->flags);
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02005027 drbd_err(device, "Requested state change failed by peer: %s (%d)\n",
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01005028 drbd_set_st_err_str(retcode), retcode);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005029 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005030 wake_up(&device->state_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005031
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005032 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005033}
5034
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005035static int got_Ping(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005036{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005037 return drbd_send_ping_ack(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005038
5039}
5040
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005041static int got_PingAck(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005042{
5043 /* restore idle timeout */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005044 connection->meta.socket->sk->sk_rcvtimeo = connection->net_conf->ping_int*HZ;
5045 if (!test_and_set_bit(GOT_PING_ACK, &connection->flags))
5046 wake_up(&connection->ping_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005047
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005048 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005049}
5050
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005051static int got_IsInSync(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005052{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005053 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005054 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005055 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005056 sector_t sector = be64_to_cpu(p->sector);
5057 int blksize = be32_to_cpu(p->blksize);
5058
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005059 peer_device = conn_peer_device(connection, pi->vnr);
5060 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005061 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005062 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005063
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005064 D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005065
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005066 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005067
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005068 if (get_ldev(device)) {
5069 drbd_rs_complete_io(device, sector);
5070 drbd_set_in_sync(device, sector, blksize);
Lars Ellenberg1d53f092010-09-05 01:13:24 +02005071 /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005072 device->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
5073 put_ldev(device);
Lars Ellenberg1d53f092010-09-05 01:13:24 +02005074 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005075 dec_rs_pending(device);
5076 atomic_add(blksize >> 9, &device->rs_sect_in);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005077
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005078 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005079}
5080
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01005081static int
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005082validate_req_change_req_state(struct drbd_device *device, u64 id, sector_t sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01005083 struct rb_root *root, const char *func,
5084 enum drbd_req_event what, bool missing_ok)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005085{
5086 struct drbd_request *req;
5087 struct bio_and_error m;
5088
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005089 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005090 req = find_request(device, root, id, sector, missing_ok, func);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005091 if (unlikely(!req)) {
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005092 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005093 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005094 }
5095 __req_mod(req, what, &m);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005096 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005097
5098 if (m.bio)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005099 complete_master_bio(device, &m);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005100 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005101}
5102
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005103static int got_BlockAck(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005104{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005105 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005106 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005107 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005108 sector_t sector = be64_to_cpu(p->sector);
5109 int blksize = be32_to_cpu(p->blksize);
5110 enum drbd_req_event what;
5111
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005112 peer_device = conn_peer_device(connection, pi->vnr);
5113 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005114 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005115 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005116
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005117 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005118
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01005119 if (p->block_id == ID_SYNCER) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005120 drbd_set_in_sync(device, sector, blksize);
5121 dec_rs_pending(device);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005122 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005123 }
Andreas Gruenbachere05e1e52011-03-25 15:16:26 +01005124 switch (pi->cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07005125 case P_RS_WRITE_ACK:
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01005126 what = WRITE_ACKED_BY_PEER_AND_SIS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005127 break;
5128 case P_WRITE_ACK:
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01005129 what = WRITE_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005130 break;
5131 case P_RECV_ACK:
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01005132 what = RECV_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005133 break;
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02005134 case P_SUPERSEDED:
5135 what = CONFLICT_RESOLVED;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01005136 break;
5137 case P_RETRY_WRITE:
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01005138 what = POSTPONE_WRITE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005139 break;
5140 default:
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005141 BUG();
Philipp Reisnerb411b362009-09-25 16:07:19 -07005142 }
5143
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005144 return validate_req_change_req_state(device, p->block_id, sector,
5145 &device->write_requests, __func__,
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005146 what, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005147}
5148
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005149static int got_NegAck(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005150{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005151 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005152 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005153 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005154 sector_t sector = be64_to_cpu(p->sector);
Philipp Reisner2deb8332011-01-17 18:39:18 +01005155 int size = be32_to_cpu(p->blksize);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005156 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005157
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005158 peer_device = conn_peer_device(connection, pi->vnr);
5159 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005160 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005161 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005162
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005163 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005164
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01005165 if (p->block_id == ID_SYNCER) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005166 dec_rs_pending(device);
5167 drbd_rs_failed_io(device, sector, size);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005168 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005169 }
Philipp Reisner2deb8332011-01-17 18:39:18 +01005170
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005171 err = validate_req_change_req_state(device, p->block_id, sector,
5172 &device->write_requests, __func__,
Philipp Reisner303d1442011-04-13 16:24:47 -07005173 NEG_ACKED, true);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005174 if (err) {
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01005175 /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs.
5176 The master bio might already be completed, therefore the
5177 request is no longer in the collision hash. */
5178 /* In Protocol B we might already have got a P_RECV_ACK
5179 but then get a P_NEG_ACK afterwards. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005180 drbd_set_out_of_sync(device, sector, size);
Philipp Reisner2deb8332011-01-17 18:39:18 +01005181 }
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005182 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005183}
5184
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005185static int got_NegDReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005186{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005187 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005188 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005189 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005190 sector_t sector = be64_to_cpu(p->sector);
5191
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005192 peer_device = conn_peer_device(connection, pi->vnr);
5193 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005194 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005195 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005196
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005197 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01005198
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02005199 drbd_err(device, "Got NegDReply; Sector %llus, len %u.\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07005200 (unsigned long long)sector, be32_to_cpu(p->blksize));
5201
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005202 return validate_req_change_req_state(device, p->block_id, sector,
5203 &device->read_requests, __func__,
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005204 NEG_ACKED, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005205}
5206
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005207static int got_NegRSDReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005208{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005209 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005210 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005211 sector_t sector;
5212 int size;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005213 struct p_block_ack *p = pi->data;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005214
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005215 peer_device = conn_peer_device(connection, pi->vnr);
5216 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005217 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005218 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005219
5220 sector = be64_to_cpu(p->sector);
5221 size = be32_to_cpu(p->blksize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005222
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005223 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005224
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005225 dec_rs_pending(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005226
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005227 if (get_ldev_if_state(device, D_FAILED)) {
5228 drbd_rs_complete_io(device, sector);
Andreas Gruenbachere05e1e52011-03-25 15:16:26 +01005229 switch (pi->cmd) {
Philipp Reisnerd612d302010-12-27 10:53:28 +01005230 case P_NEG_RS_DREPLY:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005231 drbd_rs_failed_io(device, sector, size);
Philipp Reisnerd612d302010-12-27 10:53:28 +01005232 case P_RS_CANCEL:
5233 break;
5234 default:
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005235 BUG();
Philipp Reisnerd612d302010-12-27 10:53:28 +01005236 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005237 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005238 }
5239
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005240 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005241}
5242
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005243static int got_BarrierAck(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005244{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005245 struct p_barrier_ack *p = pi->data;
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005246 struct drbd_peer_device *peer_device;
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005247 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005248
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005249 tl_release(connection, p->barrier, be32_to_cpu(p->set_size));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005250
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005251 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005252 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5253 struct drbd_device *device = peer_device->device;
5254
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005255 if (device->state.conn == C_AHEAD &&
5256 atomic_read(&device->ap_in_flight) == 0 &&
5257 !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &device->flags)) {
5258 device->start_resync_timer.expires = jiffies + HZ;
5259 add_timer(&device->start_resync_timer);
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005260 }
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02005261 }
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005262 rcu_read_unlock();
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02005263
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005264 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005265}
5266
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005267static int got_OVResult(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005268{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005269 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005270 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005271 struct p_block_ack *p = pi->data;
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02005272 struct drbd_device_work *dw;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005273 sector_t sector;
5274 int size;
5275
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005276 peer_device = conn_peer_device(connection, pi->vnr);
5277 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005278 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005279 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005280
Philipp Reisnerb411b362009-09-25 16:07:19 -07005281 sector = be64_to_cpu(p->sector);
5282 size = be32_to_cpu(p->blksize);
5283
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005284 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005285
5286 if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005287 drbd_ov_out_of_sync_found(device, sector, size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005288 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005289 ov_out_of_sync_print(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005290
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005291 if (!get_ldev(device))
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005292 return 0;
Lars Ellenberg1d53f092010-09-05 01:13:24 +02005293
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005294 drbd_rs_complete_io(device, sector);
5295 dec_rs_pending(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005296
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005297 --device->ov_left;
Lars Ellenbergea5442a2010-11-05 09:48:01 +01005298
5299 /* let's advance progress step marks only for every other megabyte */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005300 if ((device->ov_left & 0x200) == 0x200)
5301 drbd_advance_rs_marks(device, device->ov_left);
Lars Ellenbergea5442a2010-11-05 09:48:01 +01005302
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005303 if (device->ov_left == 0) {
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02005304 dw = kmalloc(sizeof(*dw), GFP_NOIO);
5305 if (dw) {
5306 dw->w.cb = w_ov_finished;
5307 dw->device = device;
5308 drbd_queue_work(&peer_device->connection->sender_work, &dw->w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005309 } else {
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02005310 drbd_err(device, "kmalloc(dw) failed.");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005311 ov_out_of_sync_print(device);
5312 drbd_resync_finished(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005313 }
5314 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005315 put_ldev(device);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005316 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005317}
5318
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005319static int got_skip(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisner0ced55a2010-04-30 15:26:20 +02005320{
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005321 return 0;
Philipp Reisner0ced55a2010-04-30 15:26:20 +02005322}
5323
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005324static int connection_finish_peer_reqs(struct drbd_connection *connection)
Philipp Reisner32862ec2011-02-08 16:41:01 +01005325{
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005326 struct drbd_peer_device *peer_device;
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005327 int vnr, not_empty = 0;
Philipp Reisner32862ec2011-02-08 16:41:01 +01005328
5329 do {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005330 clear_bit(SIGNAL_ASENDER, &connection->flags);
Philipp Reisner32862ec2011-02-08 16:41:01 +01005331 flush_signals(current);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005332
5333 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005334 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5335 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005336 kref_get(&device->kref);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005337 rcu_read_unlock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005338 if (drbd_finish_peer_reqs(device)) {
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02005339 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005340 return 1;
Philipp Reisnerd3fcb492011-04-13 14:46:05 -07005341 }
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02005342 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005343 rcu_read_lock();
Philipp Reisner082a3432011-03-15 16:05:42 +01005344 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005345 set_bit(SIGNAL_ASENDER, &connection->flags);
Philipp Reisner082a3432011-03-15 16:05:42 +01005346
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005347 spin_lock_irq(&connection->resource->req_lock);
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005348 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5349 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005350 not_empty = !list_empty(&device->done_ee);
Philipp Reisner082a3432011-03-15 16:05:42 +01005351 if (not_empty)
5352 break;
5353 }
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005354 spin_unlock_irq(&connection->resource->req_lock);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005355 rcu_read_unlock();
Philipp Reisner32862ec2011-02-08 16:41:01 +01005356 } while (not_empty);
5357
5358 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005359}
5360
5361struct asender_cmd {
5362 size_t pkt_size;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005363 int (*fn)(struct drbd_connection *connection, struct packet_info *);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005364};
5365
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01005366static struct asender_cmd asender_tbl[] = {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005367 [P_PING] = { 0, got_Ping },
5368 [P_PING_ACK] = { 0, got_PingAck },
Philipp Reisnerb411b362009-09-25 16:07:19 -07005369 [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
5370 [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
5371 [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02005372 [P_SUPERSEDED] = { sizeof(struct p_block_ack), got_BlockAck },
Philipp Reisnerb411b362009-09-25 16:07:19 -07005373 [P_NEG_ACK] = { sizeof(struct p_block_ack), got_NegAck },
5374 [P_NEG_DREPLY] = { sizeof(struct p_block_ack), got_NegDReply },
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005375 [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), got_NegRSDReply },
Philipp Reisnerb411b362009-09-25 16:07:19 -07005376 [P_OV_RESULT] = { sizeof(struct p_block_ack), got_OVResult },
5377 [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck },
5378 [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
5379 [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync },
Philipp Reisner02918be2010-08-20 14:35:10 +02005380 [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip },
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005381 [P_RS_CANCEL] = { sizeof(struct p_block_ack), got_NegRSDReply },
5382 [P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), got_conn_RqSReply },
5383 [P_RETRY_WRITE] = { sizeof(struct p_block_ack), got_BlockAck },
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01005384};
Philipp Reisnerb411b362009-09-25 16:07:19 -07005385
5386int drbd_asender(struct drbd_thread *thi)
5387{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005388 struct drbd_connection *connection = thi->connection;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005389 struct asender_cmd *cmd = NULL;
Philipp Reisner77351055b2011-02-07 17:24:26 +01005390 struct packet_info pi;
Philipp Reisner257d0af2011-01-26 12:15:29 +01005391 int rv;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005392 void *buf = connection->meta.rbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005393 int received = 0;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005394 unsigned int header_size = drbd_header_size(connection);
Andreas Gruenbacher52b061a2011-03-30 11:38:49 +02005395 int expect = header_size;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005396 bool ping_timeout_active = false;
5397 struct net_conf *nc;
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005398 int ping_timeo, tcp_cork, ping_int;
Philipp Reisner3990e042013-03-27 14:08:48 +01005399 struct sched_param param = { .sched_priority = 2 };
Philipp Reisnerb411b362009-09-25 16:07:19 -07005400
Philipp Reisner3990e042013-03-27 14:08:48 +01005401 rv = sched_setscheduler(current, SCHED_RR, &param);
5402 if (rv < 0)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005403 drbd_err(connection, "drbd_asender: ERROR set priority, ret=%d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005404
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +01005405 while (get_t_state(thi) == RUNNING) {
Philipp Reisner80822282011-02-08 12:46:30 +01005406 drbd_thread_current_set_cpu(thi);
Philipp Reisner44ed1672011-04-19 17:10:19 +02005407
5408 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005409 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +02005410 ping_timeo = nc->ping_timeo;
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005411 tcp_cork = nc->tcp_cork;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005412 ping_int = nc->ping_int;
5413 rcu_read_unlock();
5414
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005415 if (test_and_clear_bit(SEND_PING, &connection->flags)) {
5416 if (drbd_send_ping(connection)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005417 drbd_err(connection, "drbd_send_ping has failed\n");
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01005418 goto reconnect;
5419 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005420 connection->meta.socket->sk->sk_rcvtimeo = ping_timeo * HZ / 10;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005421 ping_timeout_active = true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005422 }
5423
Philipp Reisner32862ec2011-02-08 16:41:01 +01005424 /* TODO: conditionally cork; it may hurt latency if we cork without
5425 much to send */
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005426 if (tcp_cork)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005427 drbd_tcp_cork(connection->meta.socket);
5428 if (connection_finish_peer_reqs(connection)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005429 drbd_err(connection, "connection_finish_peer_reqs() failed\n");
Philipp Reisner32862ec2011-02-08 16:41:01 +01005430 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005431 }
5432 /* but unconditionally uncork unless disabled */
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005433 if (tcp_cork)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005434 drbd_tcp_uncork(connection->meta.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005435
5436 /* short circuit, recv_msg would return EINTR anyways. */
5437 if (signal_pending(current))
5438 continue;
5439
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005440 rv = drbd_recv_short(connection->meta.socket, buf, expect-received, 0);
5441 clear_bit(SIGNAL_ASENDER, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005442
5443 flush_signals(current);
5444
5445 /* Note:
5446 * -EINTR (on meta) we got a signal
5447 * -EAGAIN (on meta) rcvtimeo expired
5448 * -ECONNRESET other side closed the connection
5449 * -ERESTARTSYS (on data) we got a signal
5450 * rv < 0 other than above: unexpected error!
5451 * rv == expected: full header or command
5452 * rv < expected: "woken" by signal during receive
5453 * rv == 0 : "connection shut down by peer"
5454 */
5455 if (likely(rv > 0)) {
5456 received += rv;
5457 buf += rv;
5458 } else if (rv == 0) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005459 if (test_bit(DISCONNECT_SENT, &connection->flags)) {
Philipp Reisnerb66623e2012-08-08 21:19:09 +02005460 long t;
5461 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005462 t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
Philipp Reisnerb66623e2012-08-08 21:19:09 +02005463 rcu_read_unlock();
5464
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005465 t = wait_event_timeout(connection->ping_wait,
5466 connection->cstate < C_WF_REPORT_PARAMS,
Philipp Reisnerb66623e2012-08-08 21:19:09 +02005467 t);
Philipp Reisner599377a2012-08-17 14:50:22 +02005468 if (t)
5469 break;
5470 }
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005471 drbd_err(connection, "meta connection shut down by peer.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005472 goto reconnect;
5473 } else if (rv == -EAGAIN) {
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02005474 /* If the data socket received something meanwhile,
5475 * that is good enough: peer is still alive. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005476 if (time_after(connection->last_received,
5477 jiffies - connection->meta.socket->sk->sk_rcvtimeo))
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02005478 continue;
Lars Ellenbergf36af182011-03-09 22:44:55 +01005479 if (ping_timeout_active) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005480 drbd_err(connection, "PingAck did not arrive in time.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005481 goto reconnect;
5482 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005483 set_bit(SEND_PING, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005484 continue;
5485 } else if (rv == -EINTR) {
5486 continue;
5487 } else {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005488 drbd_err(connection, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005489 goto reconnect;
5490 }
5491
5492 if (received == expect && cmd == NULL) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005493 if (decode_header(connection, connection->meta.rbuf, &pi))
Philipp Reisnerb411b362009-09-25 16:07:19 -07005494 goto reconnect;
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01005495 cmd = &asender_tbl[pi.cmd];
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005496 if (pi.cmd >= ARRAY_SIZE(asender_tbl) || !cmd->fn) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005497 drbd_err(connection, "Unexpected meta packet %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02005498 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005499 goto disconnect;
5500 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005501 expect = header_size + cmd->pkt_size;
Andreas Gruenbacher52b061a2011-03-30 11:38:49 +02005502 if (pi.size != expect - header_size) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005503 drbd_err(connection, "Wrong packet size on meta (c: %d, l: %d)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01005504 pi.cmd, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005505 goto reconnect;
Philipp Reisner257d0af2011-01-26 12:15:29 +01005506 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005507 }
5508 if (received == expect) {
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005509 bool err;
Philipp Reisnera4fbda82011-03-16 11:13:17 +01005510
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005511 err = cmd->fn(connection, &pi);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005512 if (err) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005513 drbd_err(connection, "%pf failed\n", cmd->fn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005514 goto reconnect;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005515 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005516
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005517 connection->last_received = jiffies;
Lars Ellenbergf36af182011-03-09 22:44:55 +01005518
Philipp Reisner44ed1672011-04-19 17:10:19 +02005519 if (cmd == &asender_tbl[P_PING_ACK]) {
5520 /* restore idle timeout */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005521 connection->meta.socket->sk->sk_rcvtimeo = ping_int * HZ;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005522 ping_timeout_active = false;
5523 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005524
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005525 buf = connection->meta.rbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005526 received = 0;
Andreas Gruenbacher52b061a2011-03-30 11:38:49 +02005527 expect = header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005528 cmd = NULL;
5529 }
5530 }
5531
5532 if (0) {
5533reconnect:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005534 conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
5535 conn_md_sync(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005536 }
5537 if (0) {
5538disconnect:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005539 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005540 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005541 clear_bit(SIGNAL_ASENDER, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005542
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005543 drbd_info(connection, "asender terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005544
5545 return 0;
5546}