blob: 7da83f3a61eb392e666d5b12ca5e682f1906e2ac [file] [log] [blame]
Philipp Reisnerb411b362009-09-25 16:07:19 -07001/*
2 drbd_receiver.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23 */
24
25
Philipp Reisnerb411b362009-09-25 16:07:19 -070026#include <linux/module.h>
27
28#include <asm/uaccess.h>
29#include <net/sock.h>
30
Philipp Reisnerb411b362009-09-25 16:07:19 -070031#include <linux/drbd.h>
32#include <linux/fs.h>
33#include <linux/file.h>
34#include <linux/in.h>
35#include <linux/mm.h>
36#include <linux/memcontrol.h>
37#include <linux/mm_inline.h>
38#include <linux/slab.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070039#include <linux/pkt_sched.h>
40#define __KERNEL_SYSCALLS__
41#include <linux/unistd.h>
42#include <linux/vmalloc.h>
43#include <linux/random.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070044#include <linux/string.h>
45#include <linux/scatterlist.h>
46#include "drbd_int.h"
Andreas Gruenbachera3603a62011-05-30 11:47:37 +020047#include "drbd_protocol.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070048#include "drbd_req.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070049#include "drbd_vli.h"
50
Lars Ellenberg20c68fd2014-04-28 18:43:25 +020051#define PRO_FEATURES (FF_TRIM)
52
Philipp Reisner77351055b2011-02-07 17:24:26 +010053struct packet_info {
54 enum drbd_packet cmd;
Andreas Gruenbachere2857212011-03-25 00:57:38 +010055 unsigned int size;
56 unsigned int vnr;
Andreas Gruenbachere6589832011-03-30 12:54:42 +020057 void *data;
Philipp Reisner77351055b2011-02-07 17:24:26 +010058};
59
Philipp Reisnerb411b362009-09-25 16:07:19 -070060enum finish_epoch {
61 FE_STILL_LIVE,
62 FE_DESTROYED,
63 FE_RECYCLED,
64};
65
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +020066static int drbd_do_features(struct drbd_connection *connection);
67static int drbd_do_auth(struct drbd_connection *connection);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +020068static int drbd_disconnected(struct drbd_peer_device *);
Lars Ellenberga0fb3c42014-04-28 18:43:23 +020069static void conn_wait_active_ee_empty(struct drbd_connection *connection);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +020070static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *, struct drbd_epoch *, enum epoch_event);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +010071static int e_end_block(struct drbd_work *, int);
Philipp Reisnerb411b362009-09-25 16:07:19 -070072
Philipp Reisnerb411b362009-09-25 16:07:19 -070073
74#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
75
Lars Ellenberg45bb9122010-05-14 17:10:48 +020076/*
77 * some helper functions to deal with single linked page lists,
78 * page->private being our "next" pointer.
79 */
80
81/* If at least n pages are linked at head, get n pages off.
82 * Otherwise, don't modify head, and return NULL.
83 * Locking is the responsibility of the caller.
84 */
85static struct page *page_chain_del(struct page **head, int n)
86{
87 struct page *page;
88 struct page *tmp;
89
90 BUG_ON(!n);
91 BUG_ON(!head);
92
93 page = *head;
Philipp Reisner23ce4222010-05-20 13:35:31 +020094
95 if (!page)
96 return NULL;
97
Lars Ellenberg45bb9122010-05-14 17:10:48 +020098 while (page) {
99 tmp = page_chain_next(page);
100 if (--n == 0)
101 break; /* found sufficient pages */
102 if (tmp == NULL)
103 /* insufficient pages, don't use any of them. */
104 return NULL;
105 page = tmp;
106 }
107
108 /* add end of list marker for the returned list */
109 set_page_private(page, 0);
110 /* actual return value, and adjustment of head */
111 page = *head;
112 *head = tmp;
113 return page;
114}
115
116/* may be used outside of locks to find the tail of a (usually short)
117 * "private" page chain, before adding it back to a global chain head
118 * with page_chain_add() under a spinlock. */
119static struct page *page_chain_tail(struct page *page, int *len)
120{
121 struct page *tmp;
122 int i = 1;
123 while ((tmp = page_chain_next(page)))
124 ++i, page = tmp;
125 if (len)
126 *len = i;
127 return page;
128}
129
130static int page_chain_free(struct page *page)
131{
132 struct page *tmp;
133 int i = 0;
134 page_chain_for_each_safe(page, tmp) {
135 put_page(page);
136 ++i;
137 }
138 return i;
139}
140
141static void page_chain_add(struct page **head,
142 struct page *chain_first, struct page *chain_last)
143{
144#if 1
145 struct page *tmp;
146 tmp = page_chain_tail(chain_first, NULL);
147 BUG_ON(tmp != chain_last);
148#endif
149
150 /* add chain to head */
151 set_page_private(chain_last, (unsigned long)*head);
152 *head = chain_first;
153}
154
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200155static struct page *__drbd_alloc_pages(struct drbd_device *device,
Andreas Gruenbacher18c2d522011-04-07 21:08:50 +0200156 unsigned int number)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700157{
158 struct page *page = NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200159 struct page *tmp = NULL;
Andreas Gruenbacher18c2d522011-04-07 21:08:50 +0200160 unsigned int i = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700161
162 /* Yes, testing drbd_pp_vacant outside the lock is racy.
163 * So what. It saves a spin_lock. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200164 if (drbd_pp_vacant >= number) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700165 spin_lock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200166 page = page_chain_del(&drbd_pp_pool, number);
167 if (page)
168 drbd_pp_vacant -= number;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700169 spin_unlock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200170 if (page)
171 return page;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700172 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200173
Philipp Reisnerb411b362009-09-25 16:07:19 -0700174 /* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD
175 * "criss-cross" setup, that might cause write-out on some other DRBD,
176 * which in turn might block on the other node at this very place. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200177 for (i = 0; i < number; i++) {
178 tmp = alloc_page(GFP_TRY);
179 if (!tmp)
180 break;
181 set_page_private(tmp, (unsigned long)page);
182 page = tmp;
183 }
184
185 if (i == number)
186 return page;
187
188 /* Not enough pages immediately available this time.
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200189 * No need to jump around here, drbd_alloc_pages will retry this
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200190 * function "soon". */
191 if (page) {
192 tmp = page_chain_tail(page, NULL);
193 spin_lock(&drbd_pp_lock);
194 page_chain_add(&drbd_pp_pool, page, tmp);
195 drbd_pp_vacant += i;
196 spin_unlock(&drbd_pp_lock);
197 }
198 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700199}
200
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200201static void reclaim_finished_net_peer_reqs(struct drbd_device *device,
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200202 struct list_head *to_be_freed)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700203{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200204 struct drbd_peer_request *peer_req, *tmp;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700205
206 /* The EEs are always appended to the end of the list. Since
207 they are sent in order over the wire, they have to finish
208 in order. As soon as we see the first not finished we can
209 stop to examine the list... */
210
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200211 list_for_each_entry_safe(peer_req, tmp, &device->net_ee, w.list) {
Andreas Gruenbacher045417f2011-04-07 21:34:24 +0200212 if (drbd_peer_req_has_active_page(peer_req))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700213 break;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200214 list_move(&peer_req->w.list, to_be_freed);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700215 }
216}
217
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200218static void drbd_kick_lo_and_reclaim_net(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700219{
220 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100221 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700222
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200223 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200224 reclaim_finished_net_peer_reqs(device, &reclaimed);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200225 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700226
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200227 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200228 drbd_free_net_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700229}
230
231/**
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200232 * drbd_alloc_pages() - Returns @number pages, retries forever (or until signalled)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200233 * @device: DRBD device.
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200234 * @number: number of pages requested
235 * @retry: whether to retry, if not enough pages are available right now
Philipp Reisnerb411b362009-09-25 16:07:19 -0700236 *
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200237 * Tries to allocate number pages, first from our own page pool, then from
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200238 * the kernel.
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200239 * Possibly retry until DRBD frees sufficient pages somewhere else.
240 *
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200241 * If this allocation would exceed the max_buffers setting, we throttle
242 * allocation (schedule_timeout) to give the system some room to breathe.
243 *
244 * We do not use max-buffers as hard limit, because it could lead to
245 * congestion and further to a distributed deadlock during online-verify or
246 * (checksum based) resync, if the max-buffers, socket buffer sizes and
247 * resync-rate settings are mis-configured.
248 *
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200249 * Returns a page chain linked via page->private.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700250 */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200251struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int number,
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200252 bool retry)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700253{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200254 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700255 struct page *page = NULL;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200256 struct net_conf *nc;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700257 DEFINE_WAIT(wait);
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200258 unsigned int mxb;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700259
Philipp Reisner44ed1672011-04-19 17:10:19 +0200260 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200261 nc = rcu_dereference(peer_device->connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200262 mxb = nc ? nc->max_buffers : 1000000;
263 rcu_read_unlock();
264
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200265 if (atomic_read(&device->pp_in_use) < mxb)
266 page = __drbd_alloc_pages(device, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700267
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200268 while (page == NULL) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700269 prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
270
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200271 drbd_kick_lo_and_reclaim_net(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700272
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200273 if (atomic_read(&device->pp_in_use) < mxb) {
274 page = __drbd_alloc_pages(device, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700275 if (page)
276 break;
277 }
278
279 if (!retry)
280 break;
281
282 if (signal_pending(current)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200283 drbd_warn(device, "drbd_alloc_pages interrupted!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700284 break;
285 }
286
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200287 if (schedule_timeout(HZ/10) == 0)
288 mxb = UINT_MAX;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700289 }
290 finish_wait(&drbd_pp_wait, &wait);
291
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200292 if (page)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200293 atomic_add(number, &device->pp_in_use);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700294 return page;
295}
296
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200297/* Must not be used from irq, as that may deadlock: see drbd_alloc_pages.
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200298 * Is also used from inside an other spin_lock_irq(&resource->req_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200299 * Either links the page chain back to the global pool,
300 * or returns all pages to the system. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200301static void drbd_free_pages(struct drbd_device *device, struct page *page, int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700302{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200303 atomic_t *a = is_net ? &device->pp_in_use_by_net : &device->pp_in_use;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700304 int i;
Lars Ellenberg435f0742010-09-06 12:30:25 +0200305
Lars Ellenberga73ff322012-06-25 19:15:38 +0200306 if (page == NULL)
307 return;
308
Philipp Reisner81a5d602011-02-22 19:53:16 -0500309 if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200310 i = page_chain_free(page);
311 else {
312 struct page *tmp;
313 tmp = page_chain_tail(page, &i);
314 spin_lock(&drbd_pp_lock);
315 page_chain_add(&drbd_pp_pool, page, tmp);
316 drbd_pp_vacant += i;
317 spin_unlock(&drbd_pp_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700318 }
Lars Ellenberg435f0742010-09-06 12:30:25 +0200319 i = atomic_sub_return(i, a);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200320 if (i < 0)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200321 drbd_warn(device, "ASSERTION FAILED: %s: %d < 0\n",
Lars Ellenberg435f0742010-09-06 12:30:25 +0200322 is_net ? "pp_in_use_by_net" : "pp_in_use", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700323 wake_up(&drbd_pp_wait);
324}
325
326/*
327You need to hold the req_lock:
328 _drbd_wait_ee_list_empty()
329
330You must not have the req_lock:
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +0200331 drbd_free_peer_req()
Andreas Gruenbacher0db55362011-04-06 16:09:15 +0200332 drbd_alloc_peer_req()
Andreas Gruenbacher7721f562011-04-06 17:14:02 +0200333 drbd_free_peer_reqs()
Philipp Reisnerb411b362009-09-25 16:07:19 -0700334 drbd_ee_fix_bhs()
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200335 drbd_finish_peer_reqs()
Philipp Reisnerb411b362009-09-25 16:07:19 -0700336 drbd_clear_done_ee()
337 drbd_wait_ee_list_empty()
338*/
339
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100340struct drbd_peer_request *
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200341drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
Lars Ellenberga0fb3c42014-04-28 18:43:23 +0200342 unsigned int data_size, bool has_payload, gfp_t gfp_mask) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700343{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200344 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100345 struct drbd_peer_request *peer_req;
Lars Ellenberga73ff322012-06-25 19:15:38 +0200346 struct page *page = NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200347 unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700348
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200349 if (drbd_insert_fault(device, DRBD_FAULT_AL_EE))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700350 return NULL;
351
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100352 peer_req = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
353 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700354 if (!(gfp_mask & __GFP_NOWARN))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200355 drbd_err(device, "%s: allocation failed\n", __func__);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700356 return NULL;
357 }
358
Lars Ellenberga0fb3c42014-04-28 18:43:23 +0200359 if (has_payload && data_size) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200360 page = drbd_alloc_pages(peer_device, nr_pages, (gfp_mask & __GFP_WAIT));
Lars Ellenberga73ff322012-06-25 19:15:38 +0200361 if (!page)
362 goto fail;
363 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700364
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100365 drbd_clear_interval(&peer_req->i);
366 peer_req->i.size = data_size;
367 peer_req->i.sector = sector;
368 peer_req->i.local = false;
369 peer_req->i.waiting = false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700370
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100371 peer_req->epoch = NULL;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200372 peer_req->peer_device = peer_device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100373 peer_req->pages = page;
374 atomic_set(&peer_req->pending_bios, 0);
375 peer_req->flags = 0;
Andreas Gruenbacher9a8e7752011-01-11 14:04:09 +0100376 /*
377 * The block_id is opaque to the receiver. It is not endianness
378 * converted, and sent back to the sender unchanged.
379 */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100380 peer_req->block_id = id;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700381
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100382 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700383
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200384 fail:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100385 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700386 return NULL;
387}
388
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200389void __drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request *peer_req,
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100390 int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700391{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100392 if (peer_req->flags & EE_HAS_DIGEST)
393 kfree(peer_req->digest);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200394 drbd_free_pages(device, peer_req->pages, is_net);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +0200395 D_ASSERT(device, atomic_read(&peer_req->pending_bios) == 0);
396 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100397 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700398}
399
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200400int drbd_free_peer_reqs(struct drbd_device *device, struct list_head *list)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700401{
402 LIST_HEAD(work_list);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100403 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700404 int count = 0;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200405 int is_net = list == &device->net_ee;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700406
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200407 spin_lock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700408 list_splice_init(list, &work_list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200409 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700410
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200411 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200412 __drbd_free_peer_req(device, peer_req, is_net);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700413 count++;
414 }
415 return count;
416}
417
Philipp Reisnerb411b362009-09-25 16:07:19 -0700418/*
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200419 * See also comments in _req_mod(,BARRIER_ACKED) and receive_Barrier.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700420 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200421static int drbd_finish_peer_reqs(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700422{
423 LIST_HEAD(work_list);
424 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100425 struct drbd_peer_request *peer_req, *t;
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100426 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700427
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200428 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200429 reclaim_finished_net_peer_reqs(device, &reclaimed);
430 list_splice_init(&device->done_ee, &work_list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200431 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700432
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200433 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200434 drbd_free_net_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700435
436 /* possible callbacks here:
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +0200437 * e_end_block, and e_end_resync_block, e_send_superseded.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700438 * all ignore the last argument.
439 */
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200440 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100441 int err2;
442
Philipp Reisnerb411b362009-09-25 16:07:19 -0700443 /* list_del not necessary, next/prev members not touched */
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200444 err2 = peer_req->w.cb(&peer_req->w, !!err);
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100445 if (!err)
446 err = err2;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200447 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700448 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200449 wake_up(&device->ee_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700450
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100451 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700452}
453
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200454static void _drbd_wait_ee_list_empty(struct drbd_device *device,
Andreas Gruenbacherd4da1532011-04-07 00:06:56 +0200455 struct list_head *head)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700456{
457 DEFINE_WAIT(wait);
458
459 /* avoids spin_lock/unlock
460 * and calling prepare_to_wait in the fast path */
461 while (!list_empty(head)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200462 prepare_to_wait(&device->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200463 spin_unlock_irq(&device->resource->req_lock);
Jens Axboe7eaceac2011-03-10 08:52:07 +0100464 io_schedule();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200465 finish_wait(&device->ee_wait, &wait);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200466 spin_lock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700467 }
468}
469
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200470static void drbd_wait_ee_list_empty(struct drbd_device *device,
Andreas Gruenbacherd4da1532011-04-07 00:06:56 +0200471 struct list_head *head)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700472{
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200473 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200474 _drbd_wait_ee_list_empty(device, head);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200475 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700476}
477
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100478static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700479{
Philipp Reisnerb411b362009-09-25 16:07:19 -0700480 struct kvec iov = {
481 .iov_base = buf,
482 .iov_len = size,
483 };
484 struct msghdr msg = {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700485 .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
486 };
Al Virof730c842014-02-08 21:07:38 -0500487 return kernel_recvmsg(sock, &msg, &iov, 1, size, msg.msg_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700488}
489
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200490static int drbd_recv(struct drbd_connection *connection, void *buf, size_t size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700491{
Philipp Reisnerb411b362009-09-25 16:07:19 -0700492 int rv;
493
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200494 rv = drbd_recv_short(connection->data.socket, buf, size, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700495
Philipp Reisnerdbd08202012-08-17 16:55:47 +0200496 if (rv < 0) {
497 if (rv == -ECONNRESET)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200498 drbd_info(connection, "sock was reset by peer\n");
Philipp Reisnerdbd08202012-08-17 16:55:47 +0200499 else if (rv != -ERESTARTSYS)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200500 drbd_err(connection, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerdbd08202012-08-17 16:55:47 +0200501 } else if (rv == 0) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200502 if (test_bit(DISCONNECT_SENT, &connection->flags)) {
Philipp Reisnerb66623e2012-08-08 21:19:09 +0200503 long t;
504 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200505 t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
Philipp Reisnerb66623e2012-08-08 21:19:09 +0200506 rcu_read_unlock();
507
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200508 t = wait_event_timeout(connection->ping_wait, connection->cstate < C_WF_REPORT_PARAMS, t);
Philipp Reisnerb66623e2012-08-08 21:19:09 +0200509
Philipp Reisner599377a2012-08-17 14:50:22 +0200510 if (t)
511 goto out;
512 }
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200513 drbd_info(connection, "sock was shut down by peer\n");
Philipp Reisner599377a2012-08-17 14:50:22 +0200514 }
515
Philipp Reisnerb411b362009-09-25 16:07:19 -0700516 if (rv != size)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200517 conn_request_state(connection, NS(conn, C_BROKEN_PIPE), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700518
Philipp Reisner599377a2012-08-17 14:50:22 +0200519out:
Philipp Reisnerb411b362009-09-25 16:07:19 -0700520 return rv;
521}
522
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200523static int drbd_recv_all(struct drbd_connection *connection, void *buf, size_t size)
Andreas Gruenbacherc6967742011-03-17 17:15:20 +0100524{
525 int err;
526
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200527 err = drbd_recv(connection, buf, size);
Andreas Gruenbacherc6967742011-03-17 17:15:20 +0100528 if (err != size) {
529 if (err >= 0)
530 err = -EIO;
531 } else
532 err = 0;
533 return err;
534}
535
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200536static int drbd_recv_all_warn(struct drbd_connection *connection, void *buf, size_t size)
Andreas Gruenbachera5c31902011-03-24 03:28:04 +0100537{
538 int err;
539
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200540 err = drbd_recv_all(connection, buf, size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +0100541 if (err && !signal_pending(current))
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200542 drbd_warn(connection, "short read (expected size %d)\n", (int)size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +0100543 return err;
544}
545
Lars Ellenberg5dbf1672010-05-25 16:18:01 +0200546/* quoting tcp(7):
547 * On individual connections, the socket buffer size must be set prior to the
548 * listen(2) or connect(2) calls in order to have it take effect.
549 * This is our wrapper to do so.
550 */
551static void drbd_setbufsize(struct socket *sock, unsigned int snd,
552 unsigned int rcv)
553{
554 /* open coded SO_SNDBUF, SO_RCVBUF */
555 if (snd) {
556 sock->sk->sk_sndbuf = snd;
557 sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
558 }
559 if (rcv) {
560 sock->sk->sk_rcvbuf = rcv;
561 sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
562 }
563}
564
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200565static struct socket *drbd_try_connect(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700566{
567 const char *what;
568 struct socket *sock;
569 struct sockaddr_in6 src_in6;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200570 struct sockaddr_in6 peer_in6;
571 struct net_conf *nc;
572 int err, peer_addr_len, my_addr_len;
Andreas Gruenbacher69ef82d2011-05-11 14:34:35 +0200573 int sndbuf_size, rcvbuf_size, connect_int;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700574 int disconnect_on_error = 1;
575
Philipp Reisner44ed1672011-04-19 17:10:19 +0200576 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200577 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200578 if (!nc) {
579 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700580 return NULL;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200581 }
Philipp Reisner44ed1672011-04-19 17:10:19 +0200582 sndbuf_size = nc->sndbuf_size;
583 rcvbuf_size = nc->rcvbuf_size;
Andreas Gruenbacher69ef82d2011-05-11 14:34:35 +0200584 connect_int = nc->connect_int;
Andreas Gruenbacher089c0752011-06-14 18:28:09 +0200585 rcu_read_unlock();
Philipp Reisner44ed1672011-04-19 17:10:19 +0200586
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200587 my_addr_len = min_t(int, connection->my_addr_len, sizeof(src_in6));
588 memcpy(&src_in6, &connection->my_addr, my_addr_len);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200589
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200590 if (((struct sockaddr *)&connection->my_addr)->sa_family == AF_INET6)
Philipp Reisner44ed1672011-04-19 17:10:19 +0200591 src_in6.sin6_port = 0;
592 else
593 ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
594
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200595 peer_addr_len = min_t(int, connection->peer_addr_len, sizeof(src_in6));
596 memcpy(&peer_in6, &connection->peer_addr, peer_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700597
598 what = "sock_create_kern";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200599 err = sock_create_kern(((struct sockaddr *)&src_in6)->sa_family,
600 SOCK_STREAM, IPPROTO_TCP, &sock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700601 if (err < 0) {
602 sock = NULL;
603 goto out;
604 }
605
606 sock->sk->sk_rcvtimeo =
Andreas Gruenbacher69ef82d2011-05-11 14:34:35 +0200607 sock->sk->sk_sndtimeo = connect_int * HZ;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200608 drbd_setbufsize(sock, sndbuf_size, rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700609
610 /* explicitly bind to the configured IP as source IP
611 * for the outgoing connections.
612 * This is needed for multihomed hosts and to be
613 * able to use lo: interfaces for drbd.
614 * Make sure to use 0 as port number, so linux selects
615 * a free one dynamically.
616 */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700617 what = "bind before connect";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200618 err = sock->ops->bind(sock, (struct sockaddr *) &src_in6, my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700619 if (err < 0)
620 goto out;
621
622 /* connect may fail, peer not yet available.
623 * stay C_WF_CONNECTION, don't go Disconnecting! */
624 disconnect_on_error = 0;
625 what = "connect";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200626 err = sock->ops->connect(sock, (struct sockaddr *) &peer_in6, peer_addr_len, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700627
628out:
629 if (err < 0) {
630 if (sock) {
631 sock_release(sock);
632 sock = NULL;
633 }
634 switch (-err) {
635 /* timeout, busy, signal pending */
636 case ETIMEDOUT: case EAGAIN: case EINPROGRESS:
637 case EINTR: case ERESTARTSYS:
638 /* peer not (yet) available, network problem */
639 case ECONNREFUSED: case ENETUNREACH:
640 case EHOSTDOWN: case EHOSTUNREACH:
641 disconnect_on_error = 0;
642 break;
643 default:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200644 drbd_err(connection, "%s failed, err = %d\n", what, err);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700645 }
646 if (disconnect_on_error)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200647 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700648 }
Philipp Reisner44ed1672011-04-19 17:10:19 +0200649
Philipp Reisnerb411b362009-09-25 16:07:19 -0700650 return sock;
651}
652
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200653struct accept_wait_data {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200654 struct drbd_connection *connection;
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200655 struct socket *s_listen;
656 struct completion door_bell;
657 void (*original_sk_state_change)(struct sock *sk);
658
659};
660
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200661static void drbd_incoming_connection(struct sock *sk)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700662{
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200663 struct accept_wait_data *ad = sk->sk_user_data;
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200664 void (*state_change)(struct sock *sk);
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200665
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200666 state_change = ad->original_sk_state_change;
667 if (sk->sk_state == TCP_ESTABLISHED)
668 complete(&ad->door_bell);
669 state_change(sk);
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200670}
671
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200672static int prepare_listen_socket(struct drbd_connection *connection, struct accept_wait_data *ad)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700673{
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200674 int err, sndbuf_size, rcvbuf_size, my_addr_len;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200675 struct sockaddr_in6 my_addr;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200676 struct socket *s_listen;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200677 struct net_conf *nc;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700678 const char *what;
679
Philipp Reisner44ed1672011-04-19 17:10:19 +0200680 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200681 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200682 if (!nc) {
683 rcu_read_unlock();
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200684 return -EIO;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200685 }
Philipp Reisner44ed1672011-04-19 17:10:19 +0200686 sndbuf_size = nc->sndbuf_size;
687 rcvbuf_size = nc->rcvbuf_size;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200688 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700689
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200690 my_addr_len = min_t(int, connection->my_addr_len, sizeof(struct sockaddr_in6));
691 memcpy(&my_addr, &connection->my_addr, my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700692
693 what = "sock_create_kern";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200694 err = sock_create_kern(((struct sockaddr *)&my_addr)->sa_family,
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200695 SOCK_STREAM, IPPROTO_TCP, &s_listen);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700696 if (err) {
697 s_listen = NULL;
698 goto out;
699 }
700
Philipp Reisner98683652012-11-09 14:18:43 +0100701 s_listen->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
Philipp Reisner44ed1672011-04-19 17:10:19 +0200702 drbd_setbufsize(s_listen, sndbuf_size, rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700703
704 what = "bind before listen";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200705 err = s_listen->ops->bind(s_listen, (struct sockaddr *)&my_addr, my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700706 if (err < 0)
707 goto out;
708
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200709 ad->s_listen = s_listen;
710 write_lock_bh(&s_listen->sk->sk_callback_lock);
711 ad->original_sk_state_change = s_listen->sk->sk_state_change;
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200712 s_listen->sk->sk_state_change = drbd_incoming_connection;
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200713 s_listen->sk->sk_user_data = ad;
714 write_unlock_bh(&s_listen->sk->sk_callback_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700715
Philipp Reisner2820fd32012-07-12 10:22:48 +0200716 what = "listen";
717 err = s_listen->ops->listen(s_listen, 5);
718 if (err < 0)
719 goto out;
720
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200721 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700722out:
723 if (s_listen)
724 sock_release(s_listen);
725 if (err < 0) {
726 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200727 drbd_err(connection, "%s failed, err = %d\n", what, err);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200728 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700729 }
730 }
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200731
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200732 return -EIO;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200733}
734
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200735static void unregister_state_change(struct sock *sk, struct accept_wait_data *ad)
736{
737 write_lock_bh(&sk->sk_callback_lock);
738 sk->sk_state_change = ad->original_sk_state_change;
739 sk->sk_user_data = NULL;
740 write_unlock_bh(&sk->sk_callback_lock);
741}
742
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200743static struct socket *drbd_wait_for_connect(struct drbd_connection *connection, struct accept_wait_data *ad)
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200744{
745 int timeo, connect_int, err = 0;
746 struct socket *s_estab = NULL;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200747 struct net_conf *nc;
748
749 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200750 nc = rcu_dereference(connection->net_conf);
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200751 if (!nc) {
752 rcu_read_unlock();
753 return NULL;
754 }
755 connect_int = nc->connect_int;
756 rcu_read_unlock();
757
758 timeo = connect_int * HZ;
Akinobu Mita38b682b22013-04-29 16:21:31 -0700759 /* 28.5% random jitter */
760 timeo += (prandom_u32() & 1) ? timeo / 7 : -timeo / 7;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200761
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200762 err = wait_for_completion_interruptible_timeout(&ad->door_bell, timeo);
763 if (err <= 0)
764 return NULL;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200765
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200766 err = kernel_accept(ad->s_listen, &s_estab, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700767 if (err < 0) {
768 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200769 drbd_err(connection, "accept failed, err = %d\n", err);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200770 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700771 }
772 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700773
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200774 if (s_estab)
775 unregister_state_change(s_estab->sk, ad);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700776
777 return s_estab;
778}
779
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200780static int decode_header(struct drbd_connection *, void *, struct packet_info *);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700781
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200782static int send_first_packet(struct drbd_connection *connection, struct drbd_socket *sock,
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200783 enum drbd_packet cmd)
784{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200785 if (!conn_prepare_command(connection, sock))
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200786 return -EIO;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200787 return conn_send_command(connection, sock, cmd, 0, NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700788}
789
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200790static int receive_first_packet(struct drbd_connection *connection, struct socket *sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700791{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200792 unsigned int header_size = drbd_header_size(connection);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200793 struct packet_info pi;
Philipp Reisner4920e372014-03-18 14:40:13 +0100794 struct net_conf *nc;
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200795 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700796
Philipp Reisner4920e372014-03-18 14:40:13 +0100797 rcu_read_lock();
798 nc = rcu_dereference(connection->net_conf);
799 if (!nc) {
800 rcu_read_unlock();
801 return -EIO;
802 }
803 sock->sk->sk_rcvtimeo = nc->ping_timeo * 4 * HZ / 10;
804 rcu_read_unlock();
805
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200806 err = drbd_recv_short(sock, connection->data.rbuf, header_size, 0);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200807 if (err != header_size) {
808 if (err >= 0)
809 err = -EIO;
810 return err;
811 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200812 err = decode_header(connection, connection->data.rbuf, &pi);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200813 if (err)
814 return err;
815 return pi.cmd;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700816}
817
818/**
819 * drbd_socket_okay() - Free the socket if its connection is not okay
Philipp Reisnerb411b362009-09-25 16:07:19 -0700820 * @sock: pointer to the pointer to the socket.
821 */
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100822static int drbd_socket_okay(struct socket **sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700823{
824 int rr;
825 char tb[4];
826
827 if (!*sock)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100828 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700829
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100830 rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700831
832 if (rr > 0 || rr == -EAGAIN) {
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100833 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700834 } else {
835 sock_release(*sock);
836 *sock = NULL;
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100837 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700838 }
839}
Philipp Reisner2325eb62011-03-15 16:56:18 +0100840/* Gets called if a connection is established, or if a new minor gets created
841 in a connection */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200842int drbd_connected(struct drbd_peer_device *peer_device)
Philipp Reisner907599e2011-02-08 11:25:37 +0100843{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200844 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100845 int err;
Philipp Reisner907599e2011-02-08 11:25:37 +0100846
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200847 atomic_set(&device->packet_seq, 0);
848 device->peer_seq = 0;
Philipp Reisner907599e2011-02-08 11:25:37 +0100849
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200850 device->state_mutex = peer_device->connection->agreed_pro_version < 100 ?
851 &peer_device->connection->cstate_mutex :
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200852 &device->own_state_mutex;
Philipp Reisner8410da82011-02-11 20:11:10 +0100853
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200854 err = drbd_send_sync_param(peer_device);
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100855 if (!err)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200856 err = drbd_send_sizes(peer_device, 0, 0);
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100857 if (!err)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200858 err = drbd_send_uuids(peer_device);
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100859 if (!err)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200860 err = drbd_send_current_state(peer_device);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200861 clear_bit(USE_DEGR_WFC_T, &device->flags);
862 clear_bit(RESIZE_PENDING, &device->flags);
863 atomic_set(&device->ap_in_flight, 0);
864 mod_timer(&device->request_timer, jiffies + HZ); /* just start it here. */
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100865 return err;
Philipp Reisner907599e2011-02-08 11:25:37 +0100866}
Philipp Reisnerb411b362009-09-25 16:07:19 -0700867
868/*
869 * return values:
870 * 1 yes, we have a valid connection
871 * 0 oops, did not work out, please try again
872 * -1 peer talks different language,
873 * no point in trying again, please go standalone.
874 * -2 We do not have a network config...
875 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200876static int conn_connect(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700877{
Philipp Reisner7da35862011-12-19 22:42:56 +0100878 struct drbd_socket sock, msock;
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +0200879 struct drbd_peer_device *peer_device;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200880 struct net_conf *nc;
Philipp Reisner92f14952012-08-01 11:41:01 +0200881 int vnr, timeout, h, ok;
Philipp Reisner08b165b2011-09-05 16:22:33 +0200882 bool discard_my_data;
Philipp Reisner197296f2012-03-26 16:47:11 +0200883 enum drbd_state_rv rv;
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200884 struct accept_wait_data ad = {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200885 .connection = connection,
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200886 .door_bell = COMPLETION_INITIALIZER_ONSTACK(ad.door_bell),
887 };
Philipp Reisnerb411b362009-09-25 16:07:19 -0700888
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200889 clear_bit(DISCONNECT_SENT, &connection->flags);
890 if (conn_request_state(connection, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700891 return -2;
892
Philipp Reisner7da35862011-12-19 22:42:56 +0100893 mutex_init(&sock.mutex);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200894 sock.sbuf = connection->data.sbuf;
895 sock.rbuf = connection->data.rbuf;
Philipp Reisner7da35862011-12-19 22:42:56 +0100896 sock.socket = NULL;
897 mutex_init(&msock.mutex);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200898 msock.sbuf = connection->meta.sbuf;
899 msock.rbuf = connection->meta.rbuf;
Philipp Reisner7da35862011-12-19 22:42:56 +0100900 msock.socket = NULL;
901
Andreas Gruenbacher0916e0e2011-03-21 14:10:15 +0100902 /* Assume that the peer only understands protocol 80 until we know better. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200903 connection->agreed_pro_version = 80;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700904
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200905 if (prepare_listen_socket(connection, &ad))
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200906 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700907
908 do {
Andreas Gruenbacher2bf89622011-03-28 16:33:12 +0200909 struct socket *s;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700910
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200911 s = drbd_try_connect(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700912 if (s) {
Philipp Reisner7da35862011-12-19 22:42:56 +0100913 if (!sock.socket) {
914 sock.socket = s;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200915 send_first_packet(connection, &sock, P_INITIAL_DATA);
Philipp Reisner7da35862011-12-19 22:42:56 +0100916 } else if (!msock.socket) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200917 clear_bit(RESOLVE_CONFLICTS, &connection->flags);
Philipp Reisner7da35862011-12-19 22:42:56 +0100918 msock.socket = s;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200919 send_first_packet(connection, &msock, P_INITIAL_META);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700920 } else {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200921 drbd_err(connection, "Logic error in conn_connect()\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700922 goto out_release_sockets;
923 }
924 }
925
Philipp Reisner7da35862011-12-19 22:42:56 +0100926 if (sock.socket && msock.socket) {
927 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200928 nc = rcu_dereference(connection->net_conf);
Philipp Reisner7da35862011-12-19 22:42:56 +0100929 timeout = nc->ping_timeo * HZ / 10;
930 rcu_read_unlock();
931 schedule_timeout_interruptible(timeout);
932 ok = drbd_socket_okay(&sock.socket);
933 ok = drbd_socket_okay(&msock.socket) && ok;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700934 if (ok)
935 break;
936 }
937
938retry:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200939 s = drbd_wait_for_connect(connection, &ad);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700940 if (s) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200941 int fp = receive_first_packet(connection, s);
Philipp Reisner7da35862011-12-19 22:42:56 +0100942 drbd_socket_okay(&sock.socket);
943 drbd_socket_okay(&msock.socket);
Philipp Reisner92f14952012-08-01 11:41:01 +0200944 switch (fp) {
Andreas Gruenbachere5d6f332011-03-28 16:44:40 +0200945 case P_INITIAL_DATA:
Philipp Reisner7da35862011-12-19 22:42:56 +0100946 if (sock.socket) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200947 drbd_warn(connection, "initial packet S crossed\n");
Philipp Reisner7da35862011-12-19 22:42:56 +0100948 sock_release(sock.socket);
Philipp Reisner80c6eed2012-08-01 14:53:39 +0200949 sock.socket = s;
950 goto randomize;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700951 }
Philipp Reisner7da35862011-12-19 22:42:56 +0100952 sock.socket = s;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700953 break;
Andreas Gruenbachere5d6f332011-03-28 16:44:40 +0200954 case P_INITIAL_META:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200955 set_bit(RESOLVE_CONFLICTS, &connection->flags);
Philipp Reisner7da35862011-12-19 22:42:56 +0100956 if (msock.socket) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200957 drbd_warn(connection, "initial packet M crossed\n");
Philipp Reisner7da35862011-12-19 22:42:56 +0100958 sock_release(msock.socket);
Philipp Reisner80c6eed2012-08-01 14:53:39 +0200959 msock.socket = s;
960 goto randomize;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700961 }
Philipp Reisner7da35862011-12-19 22:42:56 +0100962 msock.socket = s;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700963 break;
964 default:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200965 drbd_warn(connection, "Error receiving initial packet\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700966 sock_release(s);
Philipp Reisner80c6eed2012-08-01 14:53:39 +0200967randomize:
Akinobu Mita38b682b22013-04-29 16:21:31 -0700968 if (prandom_u32() & 1)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700969 goto retry;
970 }
971 }
972
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200973 if (connection->cstate <= C_DISCONNECTING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700974 goto out_release_sockets;
975 if (signal_pending(current)) {
976 flush_signals(current);
977 smp_rmb();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200978 if (get_t_state(&connection->receiver) == EXITING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700979 goto out_release_sockets;
980 }
981
Philipp Reisnerb666dbf2012-07-26 14:12:59 +0200982 ok = drbd_socket_okay(&sock.socket);
983 ok = drbd_socket_okay(&msock.socket) && ok;
984 } while (!ok);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700985
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200986 if (ad.s_listen)
987 sock_release(ad.s_listen);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700988
Philipp Reisner98683652012-11-09 14:18:43 +0100989 sock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
990 msock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700991
Philipp Reisner7da35862011-12-19 22:42:56 +0100992 sock.socket->sk->sk_allocation = GFP_NOIO;
993 msock.socket->sk->sk_allocation = GFP_NOIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700994
Philipp Reisner7da35862011-12-19 22:42:56 +0100995 sock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
996 msock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700997
Philipp Reisnerb411b362009-09-25 16:07:19 -0700998 /* NOT YET ...
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200999 * sock.socket->sk->sk_sndtimeo = connection->net_conf->timeout*HZ/10;
Philipp Reisner7da35862011-12-19 22:42:56 +01001000 * sock.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
Andreas Gruenbacher60381782011-03-28 17:05:50 +02001001 * first set it to the P_CONNECTION_FEATURES timeout,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001002 * which we set to 4x the configured ping_timeout. */
Philipp Reisner44ed1672011-04-19 17:10:19 +02001003 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001004 nc = rcu_dereference(connection->net_conf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001005
Philipp Reisner7da35862011-12-19 22:42:56 +01001006 sock.socket->sk->sk_sndtimeo =
1007 sock.socket->sk->sk_rcvtimeo = nc->ping_timeo*4*HZ/10;
Philipp Reisner44ed1672011-04-19 17:10:19 +02001008
Philipp Reisner7da35862011-12-19 22:42:56 +01001009 msock.socket->sk->sk_rcvtimeo = nc->ping_int*HZ;
Philipp Reisner44ed1672011-04-19 17:10:19 +02001010 timeout = nc->timeout * HZ / 10;
Philipp Reisner08b165b2011-09-05 16:22:33 +02001011 discard_my_data = nc->discard_my_data;
Philipp Reisner44ed1672011-04-19 17:10:19 +02001012 rcu_read_unlock();
1013
Philipp Reisner7da35862011-12-19 22:42:56 +01001014 msock.socket->sk->sk_sndtimeo = timeout;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001015
1016 /* we don't want delays.
Lucas De Marchi25985ed2011-03-30 22:57:33 -03001017 * we use TCP_CORK where appropriate, though */
Philipp Reisner7da35862011-12-19 22:42:56 +01001018 drbd_tcp_nodelay(sock.socket);
1019 drbd_tcp_nodelay(msock.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001020
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001021 connection->data.socket = sock.socket;
1022 connection->meta.socket = msock.socket;
1023 connection->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001024
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001025 h = drbd_do_features(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001026 if (h <= 0)
1027 return h;
1028
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001029 if (connection->cram_hmac_tfm) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001030 /* drbd_request_state(device, NS(conn, WFAuth)); */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001031 switch (drbd_do_auth(connection)) {
Johannes Thomab10d96c2010-01-07 16:02:50 +01001032 case -1:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001033 drbd_err(connection, "Authentication of peer failed\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001034 return -1;
Johannes Thomab10d96c2010-01-07 16:02:50 +01001035 case 0:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001036 drbd_err(connection, "Authentication of peer failed, trying again.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01001037 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001038 }
1039 }
1040
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001041 connection->data.socket->sk->sk_sndtimeo = timeout;
1042 connection->data.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001043
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001044 if (drbd_send_protocol(connection) == -EOPNOTSUPP)
Philipp Reisner7e2455c2010-04-22 14:50:23 +02001045 return -1;
Philipp Reisner1e86ac42011-08-04 10:33:08 +02001046
Philipp Reisner31007742014-04-28 18:43:12 +02001047 /* Prevent a race between resync-handshake and
1048 * being promoted to Primary.
1049 *
1050 * Grab and release the state mutex, so we know that any current
1051 * drbd_set_role() is finished, and any incoming drbd_set_role
1052 * will see the STATE_SENT flag, and wait for it to be cleared.
1053 */
1054 idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
1055 mutex_lock(peer_device->device->state_mutex);
1056
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001057 set_bit(STATE_SENT, &connection->flags);
Philipp Reisner197296f2012-03-26 16:47:11 +02001058
Philipp Reisner31007742014-04-28 18:43:12 +02001059 idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
1060 mutex_unlock(peer_device->device->state_mutex);
1061
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02001062 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001063 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1064 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001065 kref_get(&device->kref);
Andreas Gruenbacher26ea8f92013-06-25 16:50:03 +02001066 rcu_read_unlock();
1067
Philipp Reisner08b165b2011-09-05 16:22:33 +02001068 if (discard_my_data)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001069 set_bit(DISCARD_MY_DATA, &device->flags);
Philipp Reisner08b165b2011-09-05 16:22:33 +02001070 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001071 clear_bit(DISCARD_MY_DATA, &device->flags);
Philipp Reisner08b165b2011-09-05 16:22:33 +02001072
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001073 drbd_connected(peer_device);
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001074 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02001075 rcu_read_lock();
1076 }
1077 rcu_read_unlock();
1078
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001079 rv = conn_request_state(connection, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE);
1080 if (rv < SS_SUCCESS || connection->cstate != C_WF_REPORT_PARAMS) {
1081 clear_bit(STATE_SENT, &connection->flags);
Philipp Reisner1e86ac42011-08-04 10:33:08 +02001082 return 0;
Philipp Reisnera1096a62012-04-06 12:07:34 +02001083 }
Philipp Reisner1e86ac42011-08-04 10:33:08 +02001084
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001085 drbd_thread_start(&connection->asender);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001086
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001087 mutex_lock(&connection->resource->conf_update);
Philipp Reisner08b165b2011-09-05 16:22:33 +02001088 /* The discard_my_data flag is a single-shot modifier to the next
1089 * connection attempt, the handshake of which is now well underway.
1090 * No need for rcu style copying of the whole struct
1091 * just to clear a single value. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001092 connection->net_conf->discard_my_data = 0;
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001093 mutex_unlock(&connection->resource->conf_update);
Philipp Reisner08b165b2011-09-05 16:22:33 +02001094
Philipp Reisnerd3fcb492011-04-13 14:46:05 -07001095 return h;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001096
1097out_release_sockets:
Philipp Reisner7a426fd2012-07-12 14:22:37 +02001098 if (ad.s_listen)
1099 sock_release(ad.s_listen);
Philipp Reisner7da35862011-12-19 22:42:56 +01001100 if (sock.socket)
1101 sock_release(sock.socket);
1102 if (msock.socket)
1103 sock_release(msock.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001104 return -1;
1105}
1106
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001107static int decode_header(struct drbd_connection *connection, void *header, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001108{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001109 unsigned int header_size = drbd_header_size(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001110
Andreas Gruenbacher0c8e36d2011-03-30 16:00:17 +02001111 if (header_size == sizeof(struct p_header100) &&
1112 *(__be32 *)header == cpu_to_be32(DRBD_MAGIC_100)) {
1113 struct p_header100 *h = header;
1114 if (h->pad != 0) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001115 drbd_err(connection, "Header padding is not zero\n");
Andreas Gruenbacher0c8e36d2011-03-30 16:00:17 +02001116 return -EINVAL;
1117 }
1118 pi->vnr = be16_to_cpu(h->volume);
1119 pi->cmd = be16_to_cpu(h->command);
1120 pi->size = be32_to_cpu(h->length);
1121 } else if (header_size == sizeof(struct p_header95) &&
1122 *(__be16 *)header == cpu_to_be16(DRBD_MAGIC_BIG)) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001123 struct p_header95 *h = header;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001124 pi->cmd = be16_to_cpu(h->command);
Andreas Gruenbacherb55d84b2011-03-22 13:17:47 +01001125 pi->size = be32_to_cpu(h->length);
1126 pi->vnr = 0;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001127 } else if (header_size == sizeof(struct p_header80) &&
1128 *(__be32 *)header == cpu_to_be32(DRBD_MAGIC)) {
1129 struct p_header80 *h = header;
1130 pi->cmd = be16_to_cpu(h->command);
1131 pi->size = be16_to_cpu(h->length);
Philipp Reisner77351055b2011-02-07 17:24:26 +01001132 pi->vnr = 0;
Philipp Reisner02918be2010-08-20 14:35:10 +02001133 } else {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001134 drbd_err(connection, "Wrong magic value 0x%08x in protocol version %d\n",
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001135 be32_to_cpu(*(__be32 *)header),
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001136 connection->agreed_pro_version);
Andreas Gruenbacher8172f3e2011-03-16 17:22:39 +01001137 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001138 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001139 pi->data = header + header_size;
Andreas Gruenbacher8172f3e2011-03-16 17:22:39 +01001140 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001141}
1142
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001143static int drbd_recv_header(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisner257d0af2011-01-26 12:15:29 +01001144{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001145 void *buffer = connection->data.rbuf;
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001146 int err;
Philipp Reisner257d0af2011-01-26 12:15:29 +01001147
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001148 err = drbd_recv_all_warn(connection, buffer, drbd_header_size(connection));
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001149 if (err)
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001150 return err;
Philipp Reisner257d0af2011-01-26 12:15:29 +01001151
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001152 err = decode_header(connection, buffer, pi);
1153 connection->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001154
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001155 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001156}
1157
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001158static void drbd_flush(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001159{
1160 int rv;
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001161 struct drbd_peer_device *peer_device;
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001162 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001163
Philipp Reisnere9526582013-11-22 15:53:41 +01001164 if (connection->resource->write_ordering >= WO_bdev_flush) {
Lars Ellenberg615e0872011-11-17 14:32:12 +01001165 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001166 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1167 struct drbd_device *device = peer_device->device;
1168
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001169 if (!get_ldev(device))
Lars Ellenberg615e0872011-11-17 14:32:12 +01001170 continue;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001171 kref_get(&device->kref);
Lars Ellenberg615e0872011-11-17 14:32:12 +01001172 rcu_read_unlock();
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001173
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001174 rv = blkdev_issue_flush(device->ldev->backing_bdev,
Lars Ellenberg615e0872011-11-17 14:32:12 +01001175 GFP_NOIO, NULL);
1176 if (rv) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001177 drbd_info(device, "local disk flush failed with status %d\n", rv);
Lars Ellenberg615e0872011-11-17 14:32:12 +01001178 /* would rather check on EOPNOTSUPP, but that is not reliable.
1179 * don't try again for ANY return value != 0
1180 * if (rv == -EOPNOTSUPP) */
Philipp Reisner8fe39aa2013-11-22 13:22:13 +01001181 drbd_bump_write_ordering(connection->resource, NULL, WO_drain_io);
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001182 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001183 put_ldev(device);
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001184 kref_put(&device->kref, drbd_destroy_device);
Lars Ellenberg615e0872011-11-17 14:32:12 +01001185
1186 rcu_read_lock();
1187 if (rv)
1188 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001189 }
Lars Ellenberg615e0872011-11-17 14:32:12 +01001190 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07001191 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001192}
1193
1194/**
1195 * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it.
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001196 * @device: DRBD device.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001197 * @epoch: Epoch object.
1198 * @ev: Epoch event.
1199 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001200static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *connection,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001201 struct drbd_epoch *epoch,
1202 enum epoch_event ev)
1203{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001204 int epoch_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001205 struct drbd_epoch *next_epoch;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001206 enum finish_epoch rv = FE_STILL_LIVE;
1207
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001208 spin_lock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001209 do {
1210 next_epoch = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001211
1212 epoch_size = atomic_read(&epoch->epoch_size);
1213
1214 switch (ev & ~EV_CLEANUP) {
1215 case EV_PUT:
1216 atomic_dec(&epoch->active);
1217 break;
1218 case EV_GOT_BARRIER_NR:
1219 set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001220 break;
1221 case EV_BECAME_LAST:
1222 /* nothing to do*/
1223 break;
1224 }
1225
Philipp Reisnerb411b362009-09-25 16:07:19 -07001226 if (epoch_size != 0 &&
1227 atomic_read(&epoch->active) == 0 &&
Philipp Reisner80f9fd52011-07-18 15:45:15 +02001228 (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) || ev & EV_CLEANUP)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001229 if (!(ev & EV_CLEANUP)) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001230 spin_unlock(&connection->epoch_lock);
1231 drbd_send_b_ack(epoch->connection, epoch->barrier_nr, epoch_size);
1232 spin_lock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001233 }
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001234#if 0
1235 /* FIXME: dec unacked on connection, once we have
1236 * something to count pending connection packets in. */
Philipp Reisner80f9fd52011-07-18 15:45:15 +02001237 if (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags))
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001238 dec_unacked(epoch->connection);
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001239#endif
Philipp Reisnerb411b362009-09-25 16:07:19 -07001240
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001241 if (connection->current_epoch != epoch) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001242 next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
1243 list_del(&epoch->list);
1244 ev = EV_BECAME_LAST | (ev & EV_CLEANUP);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001245 connection->epochs--;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001246 kfree(epoch);
1247
1248 if (rv == FE_STILL_LIVE)
1249 rv = FE_DESTROYED;
1250 } else {
1251 epoch->flags = 0;
1252 atomic_set(&epoch->epoch_size, 0);
Uwe Kleine-König698f9312010-07-02 20:41:51 +02001253 /* atomic_set(&epoch->active, 0); is already zero */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001254 if (rv == FE_STILL_LIVE)
1255 rv = FE_RECYCLED;
1256 }
1257 }
1258
1259 if (!next_epoch)
1260 break;
1261
1262 epoch = next_epoch;
1263 } while (1);
1264
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001265 spin_unlock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001266
Philipp Reisnerb411b362009-09-25 16:07:19 -07001267 return rv;
1268}
1269
Philipp Reisner8fe39aa2013-11-22 13:22:13 +01001270static enum write_ordering_e
1271max_allowed_wo(struct drbd_backing_dev *bdev, enum write_ordering_e wo)
1272{
1273 struct disk_conf *dc;
1274
1275 dc = rcu_dereference(bdev->disk_conf);
1276
1277 if (wo == WO_bdev_flush && !dc->disk_flushes)
1278 wo = WO_drain_io;
1279 if (wo == WO_drain_io && !dc->disk_drain)
1280 wo = WO_none;
1281
1282 return wo;
1283}
1284
Philipp Reisnerb411b362009-09-25 16:07:19 -07001285/**
1286 * drbd_bump_write_ordering() - Fall back to an other write ordering method
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001287 * @connection: DRBD connection.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001288 * @wo: Write ordering method to try.
1289 */
Philipp Reisner8fe39aa2013-11-22 13:22:13 +01001290void drbd_bump_write_ordering(struct drbd_resource *resource, struct drbd_backing_dev *bdev,
1291 enum write_ordering_e wo)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001292{
Philipp Reisnere9526582013-11-22 15:53:41 +01001293 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001294 enum write_ordering_e pwo;
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001295 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001296 static char *write_ordering_str[] = {
1297 [WO_none] = "none",
1298 [WO_drain_io] = "drain",
1299 [WO_bdev_flush] = "flush",
Philipp Reisnerb411b362009-09-25 16:07:19 -07001300 };
1301
Philipp Reisnere9526582013-11-22 15:53:41 +01001302 pwo = resource->write_ordering;
Lars Ellenberg70df7092013-12-20 11:17:02 +01001303 if (wo != WO_bdev_flush)
1304 wo = min(pwo, wo);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001305 rcu_read_lock();
Philipp Reisnere9526582013-11-22 15:53:41 +01001306 idr_for_each_entry(&resource->devices, device, vnr) {
Philipp Reisner8fe39aa2013-11-22 13:22:13 +01001307 if (get_ldev(device)) {
1308 wo = max_allowed_wo(device->ldev, wo);
1309 if (device->ldev == bdev)
1310 bdev = NULL;
1311 put_ldev(device);
1312 }
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001313 }
Philipp Reisner8fe39aa2013-11-22 13:22:13 +01001314
1315 if (bdev)
1316 wo = max_allowed_wo(bdev, wo);
1317
Lars Ellenberg70df7092013-12-20 11:17:02 +01001318 rcu_read_unlock();
1319
Philipp Reisnere9526582013-11-22 15:53:41 +01001320 resource->write_ordering = wo;
1321 if (pwo != resource->write_ordering || wo == WO_bdev_flush)
1322 drbd_info(resource, "Method to ensure write ordering: %s\n", write_ordering_str[resource->write_ordering]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001323}
1324
1325/**
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001326 * drbd_submit_peer_request()
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001327 * @device: DRBD device.
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001328 * @peer_req: peer request
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001329 * @rw: flag field, see bio->bi_rw
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001330 *
1331 * May spread the pages to multiple bios,
1332 * depending on bio_add_page restrictions.
1333 *
1334 * Returns 0 if all bios have been submitted,
1335 * -ENOMEM if we could not allocate enough bios,
1336 * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a
1337 * single page to an empty bio (which should never happen and likely indicates
1338 * that the lower level IO stack is in some way broken). This has been observed
1339 * on certain Xen deployments.
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001340 */
1341/* TODO allocate from our own bio_set. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001342int drbd_submit_peer_request(struct drbd_device *device,
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001343 struct drbd_peer_request *peer_req,
1344 const unsigned rw, const int fault_type)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001345{
1346 struct bio *bios = NULL;
1347 struct bio *bio;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001348 struct page *page = peer_req->pages;
1349 sector_t sector = peer_req->i.sector;
1350 unsigned ds = peer_req->i.size;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001351 unsigned n_bios = 0;
1352 unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT;
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001353 int err = -ENOMEM;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001354
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001355 if (peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) {
1356 /* wait for all pending IO completions, before we start
1357 * zeroing things out. */
1358 conn_wait_active_ee_empty(first_peer_device(device)->connection);
1359 if (blkdev_issue_zeroout(device->ldev->backing_bdev,
1360 sector, ds >> 9, GFP_NOIO))
1361 peer_req->flags |= EE_WAS_ERROR;
1362 drbd_endio_write_sec_final(peer_req);
1363 return 0;
1364 }
1365
Lars Ellenberg54ed4ed2014-06-25 17:52:38 +02001366 /* Discards don't have any payload.
1367 * But the scsi layer still expects a bio_vec it can use internally,
1368 * see sd_setup_discard_cmnd() and blk_add_request_payload(). */
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001369 if (peer_req->flags & EE_IS_TRIM)
Lars Ellenberg54ed4ed2014-06-25 17:52:38 +02001370 nr_pages = 1;
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001371
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001372 /* In most cases, we will only need one bio. But in case the lower
1373 * level restrictions happen to be different at this offset on this
1374 * side than those of the sending peer, we may need to submit the
Lars Ellenberg9476f392011-02-23 17:02:01 +01001375 * request in more than one bio.
1376 *
1377 * Plain bio_alloc is good enough here, this is no DRBD internally
1378 * generated bio, but a bio allocated on behalf of the peer.
1379 */
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001380next_bio:
1381 bio = bio_alloc(GFP_NOIO, nr_pages);
1382 if (!bio) {
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001383 drbd_err(device, "submit_ee: Allocation of a bio failed (nr_pages=%u)\n", nr_pages);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001384 goto fail;
1385 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001386 /* > peer_req->i.sector, unless this is the first bio */
Kent Overstreet4f024f32013-10-11 15:44:27 -07001387 bio->bi_iter.bi_sector = sector;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001388 bio->bi_bdev = device->ldev->backing_bdev;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001389 bio->bi_rw = rw;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001390 bio->bi_private = peer_req;
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001391 bio->bi_end_io = drbd_peer_request_endio;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001392
1393 bio->bi_next = bios;
1394 bios = bio;
1395 ++n_bios;
1396
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001397 if (rw & REQ_DISCARD) {
1398 bio->bi_iter.bi_size = ds;
1399 goto submit;
1400 }
1401
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001402 page_chain_for_each(page) {
1403 unsigned len = min_t(unsigned, ds, PAGE_SIZE);
1404 if (!bio_add_page(bio, page, len, 0)) {
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001405 /* A single page must always be possible!
1406 * But in case it fails anyways,
1407 * we deal with it, and complain (below). */
1408 if (bio->bi_vcnt == 0) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001409 drbd_err(device,
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001410 "bio_add_page failed for len=%u, "
1411 "bi_vcnt=0 (bi_sector=%llu)\n",
Kent Overstreet4f024f32013-10-11 15:44:27 -07001412 len, (uint64_t)bio->bi_iter.bi_sector);
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001413 err = -ENOSPC;
1414 goto fail;
1415 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001416 goto next_bio;
1417 }
1418 ds -= len;
1419 sector += len >> 9;
1420 --nr_pages;
1421 }
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001422 D_ASSERT(device, ds == 0);
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001423submit:
1424 D_ASSERT(device, page == NULL);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001425
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001426 atomic_set(&peer_req->pending_bios, n_bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001427 do {
1428 bio = bios;
1429 bios = bios->bi_next;
1430 bio->bi_next = NULL;
1431
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001432 drbd_generic_make_request(device, fault_type, bio);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001433 } while (bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001434 return 0;
1435
1436fail:
1437 while (bios) {
1438 bio = bios;
1439 bios = bios->bi_next;
1440 bio_put(bio);
1441 }
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001442 return err;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001443}
1444
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001445static void drbd_remove_epoch_entry_interval(struct drbd_device *device,
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001446 struct drbd_peer_request *peer_req)
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001447{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001448 struct drbd_interval *i = &peer_req->i;
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001449
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001450 drbd_remove_interval(&device->write_requests, i);
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001451 drbd_clear_interval(i);
1452
Andreas Gruenbacher6c852be2011-02-04 15:38:52 +01001453 /* Wake up any processes waiting for this peer request to complete. */
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001454 if (i->waiting)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001455 wake_up(&device->misc_wait);
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001456}
1457
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001458static void conn_wait_active_ee_empty(struct drbd_connection *connection)
Philipp Reisner77fede52011-11-10 21:19:11 +01001459{
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001460 struct drbd_peer_device *peer_device;
Philipp Reisner77fede52011-11-10 21:19:11 +01001461 int vnr;
1462
1463 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001464 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1465 struct drbd_device *device = peer_device->device;
1466
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001467 kref_get(&device->kref);
Philipp Reisner77fede52011-11-10 21:19:11 +01001468 rcu_read_unlock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001469 drbd_wait_ee_list_empty(device, &device->active_ee);
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001470 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisner77fede52011-11-10 21:19:11 +01001471 rcu_read_lock();
1472 }
1473 rcu_read_unlock();
1474}
1475
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001476static struct drbd_peer_device *
1477conn_peer_device(struct drbd_connection *connection, int volume_number)
1478{
1479 return idr_find(&connection->peer_devices, volume_number);
1480}
1481
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001482static int receive_Barrier(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001483{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001484 int rv;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001485 struct p_barrier *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001486 struct drbd_epoch *epoch;
1487
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001488 /* FIXME these are unacked on connection,
1489 * not a specific (peer)device.
1490 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001491 connection->current_epoch->barrier_nr = p->barrier;
1492 connection->current_epoch->connection = connection;
1493 rv = drbd_may_finish_epoch(connection, connection->current_epoch, EV_GOT_BARRIER_NR);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001494
1495 /* P_BARRIER_ACK may imply that the corresponding extent is dropped from
1496 * the activity log, which means it would not be resynced in case the
1497 * R_PRIMARY crashes now.
1498 * Therefore we must send the barrier_ack after the barrier request was
1499 * completed. */
Philipp Reisnere9526582013-11-22 15:53:41 +01001500 switch (connection->resource->write_ordering) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001501 case WO_none:
1502 if (rv == FE_RECYCLED)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001503 return 0;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001504
1505 /* receiver context, in the writeout path of the other node.
1506 * avoid potential distributed deadlock */
1507 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1508 if (epoch)
1509 break;
1510 else
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001511 drbd_warn(connection, "Allocation of an epoch failed, slowing down\n");
Philipp Reisner2451fc32010-08-24 13:43:11 +02001512 /* Fall through */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001513
1514 case WO_bdev_flush:
1515 case WO_drain_io:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001516 conn_wait_active_ee_empty(connection);
1517 drbd_flush(connection);
Philipp Reisner2451fc32010-08-24 13:43:11 +02001518
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001519 if (atomic_read(&connection->current_epoch->epoch_size)) {
Philipp Reisner2451fc32010-08-24 13:43:11 +02001520 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1521 if (epoch)
1522 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001523 }
1524
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001525 return 0;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001526 default:
Philipp Reisnere9526582013-11-22 15:53:41 +01001527 drbd_err(connection, "Strangeness in connection->write_ordering %d\n",
1528 connection->resource->write_ordering);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001529 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001530 }
1531
1532 epoch->flags = 0;
1533 atomic_set(&epoch->epoch_size, 0);
1534 atomic_set(&epoch->active, 0);
1535
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001536 spin_lock(&connection->epoch_lock);
1537 if (atomic_read(&connection->current_epoch->epoch_size)) {
1538 list_add(&epoch->list, &connection->current_epoch->list);
1539 connection->current_epoch = epoch;
1540 connection->epochs++;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001541 } else {
1542 /* The current_epoch got recycled while we allocated this one... */
1543 kfree(epoch);
1544 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001545 spin_unlock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001546
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001547 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001548}
1549
1550/* used from receive_RSDataReply (recv_resync_read)
1551 * and from receive_Data */
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +01001552static struct drbd_peer_request *
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001553read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001554 struct packet_info *pi) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001555{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001556 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001557 const sector_t capacity = drbd_get_capacity(device->this_bdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001558 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001559 struct page *page;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001560 int dgs, ds, err;
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001561 int data_size = pi->size;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001562 void *dig_in = peer_device->connection->int_dig_in;
1563 void *dig_vv = peer_device->connection->int_dig_vv;
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001564 unsigned long *data;
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001565 struct p_trim *trim = (pi->cmd == P_TRIM) ? pi->data : NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001566
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001567 dgs = 0;
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001568 if (!trim && peer_device->connection->peer_integrity_tfm) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001569 dgs = crypto_hash_digestsize(peer_device->connection->peer_integrity_tfm);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001570 /*
1571 * FIXME: Receive the incoming digest into the receive buffer
1572 * here, together with its struct p_data?
1573 */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001574 err = drbd_recv_all_warn(peer_device->connection, dig_in, dgs);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001575 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001576 return NULL;
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001577 data_size -= dgs;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001578 }
1579
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001580 if (trim) {
1581 D_ASSERT(peer_device, data_size == 0);
1582 data_size = be32_to_cpu(trim->size);
1583 }
1584
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01001585 if (!expect(IS_ALIGNED(data_size, 512)))
1586 return NULL;
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001587 /* prepare for larger trim requests. */
1588 if (!trim && !expect(data_size <= DRBD_MAX_BIO_SIZE))
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01001589 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001590
Lars Ellenberg66660322010-04-06 12:15:04 +02001591 /* even though we trust out peer,
1592 * we sometimes have to double check. */
1593 if (sector + (data_size>>9) > capacity) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001594 drbd_err(device, "request from peer beyond end of local disk: "
Lars Ellenbergfdda6542011-01-24 15:11:01 +01001595 "capacity: %llus < sector: %llus + size: %u\n",
Lars Ellenberg66660322010-04-06 12:15:04 +02001596 (unsigned long long)capacity,
1597 (unsigned long long)sector, data_size);
1598 return NULL;
1599 }
1600
Philipp Reisnerb411b362009-09-25 16:07:19 -07001601 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
1602 * "criss-cross" setup, that might cause write-out on some other DRBD,
1603 * which in turn might block on the other node at this very place. */
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001604 peer_req = drbd_alloc_peer_req(peer_device, id, sector, data_size, trim == NULL, GFP_NOIO);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001605 if (!peer_req)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001606 return NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001607
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001608 if (trim)
Lars Ellenberg81a35372012-07-30 09:00:54 +02001609 return peer_req;
Lars Ellenberga73ff322012-06-25 19:15:38 +02001610
Philipp Reisnerb411b362009-09-25 16:07:19 -07001611 ds = data_size;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001612 page = peer_req->pages;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001613 page_chain_for_each(page) {
1614 unsigned len = min_t(int, ds, PAGE_SIZE);
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001615 data = kmap(page);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001616 err = drbd_recv_all_warn(peer_device->connection, data, len);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001617 if (drbd_insert_fault(device, DRBD_FAULT_RECEIVE)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001618 drbd_err(device, "Fault injection: Corrupting data on receive\n");
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001619 data[0] = data[0] ^ (unsigned long)-1;
1620 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001621 kunmap(page);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001622 if (err) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001623 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001624 return NULL;
1625 }
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001626 ds -= len;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001627 }
1628
1629 if (dgs) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001630 drbd_csum_ee(peer_device->connection->peer_integrity_tfm, peer_req, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001631 if (memcmp(dig_in, dig_vv, dgs)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001632 drbd_err(device, "Digest integrity check FAILED: %llus +%u\n",
Lars Ellenberg470be442010-11-10 10:36:52 +01001633 (unsigned long long)sector, data_size);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001634 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001635 return NULL;
1636 }
1637 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001638 device->recv_cnt += data_size>>9;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001639 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001640}
1641
1642/* drbd_drain_block() just takes a data block
1643 * out of the socket input buffer, and discards it.
1644 */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001645static int drbd_drain_block(struct drbd_peer_device *peer_device, int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001646{
1647 struct page *page;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001648 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001649 void *data;
1650
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001651 if (!data_size)
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001652 return 0;
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001653
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001654 page = drbd_alloc_pages(peer_device, 1, 1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001655
1656 data = kmap(page);
1657 while (data_size) {
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001658 unsigned int len = min_t(int, data_size, PAGE_SIZE);
1659
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001660 err = drbd_recv_all_warn(peer_device->connection, data, len);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001661 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001662 break;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001663 data_size -= len;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001664 }
1665 kunmap(page);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001666 drbd_free_pages(peer_device->device, page, 0);
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001667 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001668}
1669
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001670static int recv_dless_read(struct drbd_peer_device *peer_device, struct drbd_request *req,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001671 sector_t sector, int data_size)
1672{
Kent Overstreet79886132013-11-23 17:19:00 -08001673 struct bio_vec bvec;
1674 struct bvec_iter iter;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001675 struct bio *bio;
Kent Overstreet79886132013-11-23 17:19:00 -08001676 int dgs, err, expect;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001677 void *dig_in = peer_device->connection->int_dig_in;
1678 void *dig_vv = peer_device->connection->int_dig_vv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001679
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001680 dgs = 0;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001681 if (peer_device->connection->peer_integrity_tfm) {
1682 dgs = crypto_hash_digestsize(peer_device->connection->peer_integrity_tfm);
1683 err = drbd_recv_all_warn(peer_device->connection, dig_in, dgs);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001684 if (err)
1685 return err;
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001686 data_size -= dgs;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001687 }
1688
Philipp Reisnerb411b362009-09-25 16:07:19 -07001689 /* optimistically update recv_cnt. if receiving fails below,
1690 * we disconnect anyways, and counters will be reset. */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001691 peer_device->device->recv_cnt += data_size>>9;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001692
1693 bio = req->master_bio;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001694 D_ASSERT(peer_device->device, sector == bio->bi_iter.bi_sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001695
Kent Overstreet79886132013-11-23 17:19:00 -08001696 bio_for_each_segment(bvec, bio, iter) {
1697 void *mapped = kmap(bvec.bv_page) + bvec.bv_offset;
1698 expect = min_t(int, data_size, bvec.bv_len);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001699 err = drbd_recv_all_warn(peer_device->connection, mapped, expect);
Kent Overstreet79886132013-11-23 17:19:00 -08001700 kunmap(bvec.bv_page);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001701 if (err)
1702 return err;
1703 data_size -= expect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001704 }
1705
1706 if (dgs) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001707 drbd_csum_bio(peer_device->connection->peer_integrity_tfm, bio, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001708 if (memcmp(dig_in, dig_vv, dgs)) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001709 drbd_err(peer_device, "Digest integrity check FAILED. Broken NICs?\n");
Andreas Gruenbacher28284ce2011-03-16 17:54:02 +01001710 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001711 }
1712 }
1713
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001714 D_ASSERT(peer_device->device, data_size == 0);
Andreas Gruenbacher28284ce2011-03-16 17:54:02 +01001715 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001716}
1717
Andreas Gruenbachera990be42011-04-06 17:56:48 +02001718/*
1719 * e_end_resync_block() is called in asender context via
1720 * drbd_finish_peer_reqs().
1721 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001722static int e_end_resync_block(struct drbd_work *w, int unused)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001723{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001724 struct drbd_peer_request *peer_req =
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001725 container_of(w, struct drbd_peer_request, w);
1726 struct drbd_peer_device *peer_device = peer_req->peer_device;
1727 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001728 sector_t sector = peer_req->i.sector;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001729 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001730
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001731 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001732
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001733 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001734 drbd_set_in_sync(device, sector, peer_req->i.size);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001735 err = drbd_send_ack(peer_device, P_RS_WRITE_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001736 } else {
1737 /* Record failure to sync */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001738 drbd_rs_failed_io(device, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001739
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001740 err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001741 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001742 dec_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001743
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001744 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001745}
1746
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001747static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t sector,
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001748 struct packet_info *pi) __releases(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001749{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001750 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001751 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001752
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001753 peer_req = read_in_block(peer_device, ID_SYNCER, sector, pi);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001754 if (!peer_req)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001755 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001756
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001757 dec_rs_pending(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001758
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001759 inc_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001760 /* corresponding dec_unacked() in e_end_resync_block()
1761 * respective _drbd_clear_done_ee */
1762
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001763 peer_req->w.cb = e_end_resync_block;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001764
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001765 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001766 list_add(&peer_req->w.list, &device->sync_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001767 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001768
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001769 atomic_add(pi->size >> 9, &device->rs_sect_ev);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001770 if (drbd_submit_peer_request(device, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0)
Andreas Gruenbachere1c1b0f2011-03-16 17:58:27 +01001771 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001772
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001773 /* don't care for the reason here */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001774 drbd_err(device, "submit failed, triggering re-connect\n");
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001775 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001776 list_del(&peer_req->w.list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001777 spin_unlock_irq(&device->resource->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02001778
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001779 drbd_free_peer_req(device, peer_req);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001780fail:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001781 put_ldev(device);
Andreas Gruenbachere1c1b0f2011-03-16 17:58:27 +01001782 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001783}
1784
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001785static struct drbd_request *
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001786find_request(struct drbd_device *device, struct rb_root *root, u64 id,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001787 sector_t sector, bool missing_ok, const char *func)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001788{
1789 struct drbd_request *req;
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001790
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001791 /* Request object according to our peer */
1792 req = (struct drbd_request *)(unsigned long)id;
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001793 if (drbd_contains_interval(root, sector, &req->i) && req->i.local)
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001794 return req;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001795 if (!missing_ok) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001796 drbd_err(device, "%s: failed to find request 0x%lx, sector %llus\n", func,
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001797 (unsigned long)id, (unsigned long long)sector);
1798 }
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001799 return NULL;
1800}
1801
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001802static int receive_DataReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001803{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001804 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001805 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001806 struct drbd_request *req;
1807 sector_t sector;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001808 int err;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001809 struct p_data *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001810
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001811 peer_device = conn_peer_device(connection, pi->vnr);
1812 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001813 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001814 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001815
1816 sector = be64_to_cpu(p->sector);
1817
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001818 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001819 req = find_request(device, &device->read_requests, p->block_id, sector, false, __func__);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001820 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001821 if (unlikely(!req))
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001822 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001823
Bart Van Assche24c48302011-05-21 18:32:29 +02001824 /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid
Philipp Reisnerb411b362009-09-25 16:07:19 -07001825 * special casing it there for the various failure cases.
1826 * still no race with drbd_fail_pending_reads */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001827 err = recv_dless_read(peer_device, req, sector, pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001828 if (!err)
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001829 req_mod(req, DATA_RECEIVED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001830 /* else: nothing. handled from drbd_disconnect...
1831 * I don't think we may complete this just yet
1832 * in case we are "on-disconnect: freeze" */
1833
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001834 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001835}
1836
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001837static int receive_RSDataReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001838{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001839 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001840 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001841 sector_t sector;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001842 int err;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001843 struct p_data *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001844
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001845 peer_device = conn_peer_device(connection, pi->vnr);
1846 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001847 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001848 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001849
1850 sector = be64_to_cpu(p->sector);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001851 D_ASSERT(device, p->block_id == ID_SYNCER);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001852
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001853 if (get_ldev(device)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001854 /* data is submitted to disk within recv_resync_read.
1855 * corresponding put_ldev done below on error,
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001856 * or in drbd_peer_request_endio. */
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001857 err = recv_resync_read(peer_device, sector, pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001858 } else {
1859 if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001860 drbd_err(device, "Can not write resync data to local disk.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001861
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001862 err = drbd_drain_block(peer_device, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001863
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001864 drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001865 }
1866
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001867 atomic_add(pi->size >> 9, &device->rs_sect_in);
Philipp Reisner778f2712010-07-06 11:14:00 +02001868
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001869 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001870}
1871
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001872static void restart_conflicting_writes(struct drbd_device *device,
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001873 sector_t sector, int size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001874{
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001875 struct drbd_interval *i;
1876 struct drbd_request *req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001877
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001878 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001879 if (!i->local)
1880 continue;
1881 req = container_of(i, struct drbd_request, i);
1882 if (req->rq_state & RQ_LOCAL_PENDING ||
1883 !(req->rq_state & RQ_POSTPONED))
1884 continue;
Lars Ellenberg2312f0b32011-11-24 10:36:25 +01001885 /* as it is RQ_POSTPONED, this will cause it to
1886 * be queued on the retry workqueue. */
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02001887 __req_mod(req, CONFLICT_RESOLVED, NULL);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001888 }
1889}
1890
Andreas Gruenbachera990be42011-04-06 17:56:48 +02001891/*
1892 * e_end_block() is called in asender context via drbd_finish_peer_reqs().
Philipp Reisnerb411b362009-09-25 16:07:19 -07001893 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001894static int e_end_block(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001895{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001896 struct drbd_peer_request *peer_req =
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001897 container_of(w, struct drbd_peer_request, w);
1898 struct drbd_peer_device *peer_device = peer_req->peer_device;
1899 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001900 sector_t sector = peer_req->i.sector;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001901 int err = 0, pcmd;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001902
Philipp Reisner303d1442011-04-13 16:24:47 -07001903 if (peer_req->flags & EE_SEND_WRITE_ACK) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001904 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001905 pcmd = (device->state.conn >= C_SYNC_SOURCE &&
1906 device->state.conn <= C_PAUSED_SYNC_T &&
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001907 peer_req->flags & EE_MAY_SET_IN_SYNC) ?
Philipp Reisnerb411b362009-09-25 16:07:19 -07001908 P_RS_WRITE_ACK : P_WRITE_ACK;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001909 err = drbd_send_ack(peer_device, pcmd, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001910 if (pcmd == P_RS_WRITE_ACK)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001911 drbd_set_in_sync(device, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001912 } else {
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001913 err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001914 /* we expect it to be marked out of sync anyways...
1915 * maybe assert this? */
1916 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001917 dec_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001918 }
1919 /* we delete from the conflict detection hash _after_ we sent out the
1920 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */
Philipp Reisner302bdea2011-04-21 11:36:49 +02001921 if (peer_req->flags & EE_IN_INTERVAL_TREE) {
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001922 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001923 D_ASSERT(device, !drbd_interval_empty(&peer_req->i));
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001924 drbd_remove_epoch_entry_interval(device, peer_req);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001925 if (peer_req->flags & EE_RESTART_REQUESTS)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001926 restart_conflicting_writes(device, sector, peer_req->i.size);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001927 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +01001928 } else
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001929 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001930
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001931 drbd_may_finish_epoch(first_peer_device(device)->connection, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001932
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001933 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001934}
1935
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001936static int e_send_ack(struct drbd_work *w, enum drbd_packet ack)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001937{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001938 struct drbd_peer_request *peer_req =
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001939 container_of(w, struct drbd_peer_request, w);
1940 struct drbd_peer_device *peer_device = peer_req->peer_device;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001941 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001942
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001943 err = drbd_send_ack(peer_device, ack, peer_req);
1944 dec_unacked(peer_device->device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001945
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001946 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001947}
1948
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02001949static int e_send_superseded(struct drbd_work *w, int unused)
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001950{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001951 return e_send_ack(w, P_SUPERSEDED);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001952}
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001953
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001954static int e_send_retry_write(struct drbd_work *w, int unused)
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001955{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001956 struct drbd_peer_request *peer_req =
1957 container_of(w, struct drbd_peer_request, w);
1958 struct drbd_connection *connection = peer_req->peer_device->connection;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001959
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001960 return e_send_ack(w, connection->agreed_pro_version >= 100 ?
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02001961 P_RETRY_WRITE : P_SUPERSEDED);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001962}
1963
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001964static bool seq_greater(u32 a, u32 b)
1965{
1966 /*
1967 * We assume 32-bit wrap-around here.
1968 * For 24-bit wrap-around, we would have to shift:
1969 * a <<= 8; b <<= 8;
1970 */
1971 return (s32)a - (s32)b > 0;
1972}
1973
1974static u32 seq_max(u32 a, u32 b)
1975{
1976 return seq_greater(a, b) ? a : b;
1977}
1978
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001979static void update_peer_seq(struct drbd_peer_device *peer_device, unsigned int peer_seq)
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001980{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001981 struct drbd_device *device = peer_device->device;
Lars Ellenberg3c13b682011-02-23 16:10:01 +01001982 unsigned int newest_peer_seq;
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001983
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001984 if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001985 spin_lock(&device->peer_seq_lock);
1986 newest_peer_seq = seq_max(device->peer_seq, peer_seq);
1987 device->peer_seq = newest_peer_seq;
1988 spin_unlock(&device->peer_seq_lock);
1989 /* wake up only if we actually changed device->peer_seq */
Lars Ellenberg3c13b682011-02-23 16:10:01 +01001990 if (peer_seq == newest_peer_seq)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001991 wake_up(&device->seq_wait);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001992 }
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001993}
1994
Lars Ellenbergd93f6302012-03-26 15:49:13 +02001995static inline int overlaps(sector_t s1, int l1, sector_t s2, int l2)
1996{
1997 return !((s1 + (l1>>9) <= s2) || (s1 >= s2 + (l2>>9)));
1998}
1999
2000/* maybe change sync_ee into interval trees as well? */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002001static bool overlapping_resync_write(struct drbd_device *device, struct drbd_peer_request *peer_req)
Lars Ellenbergd93f6302012-03-26 15:49:13 +02002002{
2003 struct drbd_peer_request *rs_req;
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01002004 bool rv = 0;
2005
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002006 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002007 list_for_each_entry(rs_req, &device->sync_ee, w.list) {
Lars Ellenbergd93f6302012-03-26 15:49:13 +02002008 if (overlaps(peer_req->i.sector, peer_req->i.size,
2009 rs_req->i.sector, rs_req->i.size)) {
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01002010 rv = 1;
2011 break;
2012 }
2013 }
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002014 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01002015
2016 return rv;
2017}
2018
Philipp Reisnerb411b362009-09-25 16:07:19 -07002019/* Called from receive_Data.
2020 * Synchronize packets on sock with packets on msock.
2021 *
2022 * This is here so even when a P_DATA packet traveling via sock overtook an Ack
2023 * packet traveling on msock, they are still processed in the order they have
2024 * been sent.
2025 *
2026 * Note: we don't care for Ack packets overtaking P_DATA packets.
2027 *
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002028 * In case packet_seq is larger than device->peer_seq number, there are
Philipp Reisnerb411b362009-09-25 16:07:19 -07002029 * outstanding packets on the msock. We wait for them to arrive.
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002030 * In case we are the logically next packet, we update device->peer_seq
Philipp Reisnerb411b362009-09-25 16:07:19 -07002031 * ourselves. Correctly handles 32bit wrap around.
2032 *
2033 * Assume we have a 10 GBit connection, that is about 1<<30 byte per second,
2034 * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds
2035 * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have
2036 * 1<<9 == 512 seconds aka ages for the 32bit wrap around...
2037 *
2038 * returns 0 if we may process the packet,
2039 * -ERESTARTSYS if we were interrupted (by disconnect signal). */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002040static int wait_for_and_update_peer_seq(struct drbd_peer_device *peer_device, const u32 peer_seq)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002041{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002042 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002043 DEFINE_WAIT(wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002044 long timeout;
Philipp Reisnerb874d232013-10-23 10:59:16 +02002045 int ret = 0, tp;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002046
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002047 if (!test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags))
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002048 return 0;
2049
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002050 spin_lock(&device->peer_seq_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002051 for (;;) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002052 if (!seq_greater(peer_seq - 1, device->peer_seq)) {
2053 device->peer_seq = seq_max(device->peer_seq, peer_seq);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002054 break;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002055 }
Philipp Reisnerb874d232013-10-23 10:59:16 +02002056
Philipp Reisnerb411b362009-09-25 16:07:19 -07002057 if (signal_pending(current)) {
2058 ret = -ERESTARTSYS;
2059 break;
2060 }
Philipp Reisnerb874d232013-10-23 10:59:16 +02002061
2062 rcu_read_lock();
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002063 tp = rcu_dereference(first_peer_device(device)->connection->net_conf)->two_primaries;
Philipp Reisnerb874d232013-10-23 10:59:16 +02002064 rcu_read_unlock();
2065
2066 if (!tp)
2067 break;
2068
2069 /* Only need to wait if two_primaries is enabled */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002070 prepare_to_wait(&device->seq_wait, &wait, TASK_INTERRUPTIBLE);
2071 spin_unlock(&device->peer_seq_lock);
Philipp Reisner44ed1672011-04-19 17:10:19 +02002072 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002073 timeout = rcu_dereference(peer_device->connection->net_conf)->ping_timeo*HZ/10;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002074 rcu_read_unlock();
Andreas Gruenbacher71b1c1e2011-03-01 15:40:43 +01002075 timeout = schedule_timeout(timeout);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002076 spin_lock(&device->peer_seq_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002077 if (!timeout) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002078 ret = -ETIMEDOUT;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002079 drbd_err(device, "Timed out waiting for missing ack packets; disconnecting\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002080 break;
2081 }
2082 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002083 spin_unlock(&device->peer_seq_lock);
2084 finish_wait(&device->seq_wait, &wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002085 return ret;
2086}
2087
Lars Ellenberg688593c2010-11-17 22:25:03 +01002088/* see also bio_flags_to_wire()
2089 * DRBD_REQ_*, because we need to semantically map the flags to data packet
2090 * flags and back. We may replicate to other kernel versions. */
Andreas Gruenbacher81f0ffd2011-08-30 16:22:33 +02002091static unsigned long wire_flags_to_bio(u32 dpf)
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02002092{
Lars Ellenberg688593c2010-11-17 22:25:03 +01002093 return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
2094 (dpf & DP_FUA ? REQ_FUA : 0) |
2095 (dpf & DP_FLUSH ? REQ_FLUSH : 0) |
2096 (dpf & DP_DISCARD ? REQ_DISCARD : 0);
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02002097}
2098
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002099static void fail_postponed_requests(struct drbd_device *device, sector_t sector,
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002100 unsigned int size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002101{
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002102 struct drbd_interval *i;
2103
2104 repeat:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002105 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002106 struct drbd_request *req;
2107 struct bio_and_error m;
2108
2109 if (!i->local)
2110 continue;
2111 req = container_of(i, struct drbd_request, i);
2112 if (!(req->rq_state & RQ_POSTPONED))
2113 continue;
2114 req->rq_state &= ~RQ_POSTPONED;
2115 __req_mod(req, NEG_ACKED, &m);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002116 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002117 if (m.bio)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002118 complete_master_bio(device, &m);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002119 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002120 goto repeat;
2121 }
2122}
2123
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002124static int handle_write_conflicts(struct drbd_device *device,
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002125 struct drbd_peer_request *peer_req)
2126{
Andreas Gruenbachere33b32d2011-08-30 15:38:04 +02002127 struct drbd_connection *connection = peer_req->peer_device->connection;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002128 bool resolve_conflicts = test_bit(RESOLVE_CONFLICTS, &connection->flags);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002129 sector_t sector = peer_req->i.sector;
2130 const unsigned int size = peer_req->i.size;
2131 struct drbd_interval *i;
2132 bool equal;
2133 int err;
2134
2135 /*
2136 * Inserting the peer request into the write_requests tree will prevent
2137 * new conflicting local requests from being added.
2138 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002139 drbd_insert_interval(&device->write_requests, &peer_req->i);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002140
2141 repeat:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002142 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002143 if (i == &peer_req->i)
2144 continue;
2145
2146 if (!i->local) {
2147 /*
2148 * Our peer has sent a conflicting remote request; this
2149 * should not happen in a two-node setup. Wait for the
2150 * earlier peer request to complete.
2151 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002152 err = drbd_wait_misc(device, i);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002153 if (err)
2154 goto out;
2155 goto repeat;
2156 }
2157
2158 equal = i->sector == sector && i->size == size;
2159 if (resolve_conflicts) {
2160 /*
2161 * If the peer request is fully contained within the
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002162 * overlapping request, it can be considered overwritten
2163 * and thus superseded; otherwise, it will be retried
2164 * once all overlapping requests have completed.
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002165 */
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002166 bool superseded = i->sector <= sector && i->sector +
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002167 (i->size >> 9) >= sector + (size >> 9);
2168
2169 if (!equal)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002170 drbd_alert(device, "Concurrent writes detected: "
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002171 "local=%llus +%u, remote=%llus +%u, "
2172 "assuming %s came first\n",
2173 (unsigned long long)i->sector, i->size,
2174 (unsigned long long)sector, size,
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002175 superseded ? "local" : "remote");
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002176
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002177 inc_unacked(device);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002178 peer_req->w.cb = superseded ? e_send_superseded :
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002179 e_send_retry_write;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002180 list_add_tail(&peer_req->w.list, &device->done_ee);
Andreas Gruenbachere33b32d2011-08-30 15:38:04 +02002181 wake_asender(connection);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002182
2183 err = -ENOENT;
2184 goto out;
2185 } else {
2186 struct drbd_request *req =
2187 container_of(i, struct drbd_request, i);
2188
2189 if (!equal)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002190 drbd_alert(device, "Concurrent writes detected: "
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002191 "local=%llus +%u, remote=%llus +%u\n",
2192 (unsigned long long)i->sector, i->size,
2193 (unsigned long long)sector, size);
2194
2195 if (req->rq_state & RQ_LOCAL_PENDING ||
2196 !(req->rq_state & RQ_POSTPONED)) {
2197 /*
2198 * Wait for the node with the discard flag to
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002199 * decide if this request has been superseded
2200 * or needs to be retried.
2201 * Requests that have been superseded will
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002202 * disappear from the write_requests tree.
2203 *
2204 * In addition, wait for the conflicting
2205 * request to finish locally before submitting
2206 * the conflicting peer request.
2207 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002208 err = drbd_wait_misc(device, &req->i);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002209 if (err) {
Andreas Gruenbachere33b32d2011-08-30 15:38:04 +02002210 _conn_request_state(connection, NS(conn, C_TIMEOUT), CS_HARD);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002211 fail_postponed_requests(device, sector, size);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002212 goto out;
2213 }
2214 goto repeat;
2215 }
2216 /*
2217 * Remember to restart the conflicting requests after
2218 * the new peer request has completed.
2219 */
2220 peer_req->flags |= EE_RESTART_REQUESTS;
2221 }
2222 }
2223 err = 0;
2224
2225 out:
2226 if (err)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002227 drbd_remove_epoch_entry_interval(device, peer_req);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002228 return err;
2229}
2230
Philipp Reisnerb411b362009-09-25 16:07:19 -07002231/* mirrored write */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002232static int receive_Data(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002233{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002234 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002235 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002236 sector_t sector;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002237 struct drbd_peer_request *peer_req;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02002238 struct p_data *p = pi->data;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002239 u32 peer_seq = be32_to_cpu(p->seq_num);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002240 int rw = WRITE;
2241 u32 dp_flags;
Philipp Reisner302bdea2011-04-21 11:36:49 +02002242 int err, tp;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002243
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002244 peer_device = conn_peer_device(connection, pi->vnr);
2245 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002246 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002247 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002248
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002249 if (!get_ldev(device)) {
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002250 int err2;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002251
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002252 err = wait_for_and_update_peer_seq(peer_device, peer_seq);
2253 drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002254 atomic_inc(&connection->current_epoch->epoch_size);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002255 err2 = drbd_drain_block(peer_device, pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002256 if (!err)
2257 err = err2;
2258 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002259 }
2260
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01002261 /*
2262 * Corresponding put_ldev done either below (on various errors), or in
2263 * drbd_peer_request_endio, if we successfully submit the data at the
2264 * end of this function.
2265 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002266
2267 sector = be64_to_cpu(p->sector);
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02002268 peer_req = read_in_block(peer_device, p->block_id, sector, pi);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002269 if (!peer_req) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002270 put_ldev(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002271 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002272 }
2273
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002274 peer_req->w.cb = e_end_block;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002275
Lars Ellenberg688593c2010-11-17 22:25:03 +01002276 dp_flags = be32_to_cpu(p->dp_flags);
Andreas Gruenbacher81f0ffd2011-08-30 16:22:33 +02002277 rw |= wire_flags_to_bio(dp_flags);
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02002278 if (pi->cmd == P_TRIM) {
2279 struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev);
2280 peer_req->flags |= EE_IS_TRIM;
2281 if (!blk_queue_discard(q))
2282 peer_req->flags |= EE_IS_TRIM_USE_ZEROOUT;
2283 D_ASSERT(peer_device, peer_req->i.size > 0);
2284 D_ASSERT(peer_device, rw & REQ_DISCARD);
2285 D_ASSERT(peer_device, peer_req->pages == NULL);
2286 } else if (peer_req->pages == NULL) {
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02002287 D_ASSERT(device, peer_req->i.size == 0);
2288 D_ASSERT(device, dp_flags & DP_FLUSH);
Lars Ellenberga73ff322012-06-25 19:15:38 +02002289 }
Lars Ellenberg688593c2010-11-17 22:25:03 +01002290
2291 if (dp_flags & DP_MAY_SET_IN_SYNC)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002292 peer_req->flags |= EE_MAY_SET_IN_SYNC;
Lars Ellenberg688593c2010-11-17 22:25:03 +01002293
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002294 spin_lock(&connection->epoch_lock);
2295 peer_req->epoch = connection->current_epoch;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002296 atomic_inc(&peer_req->epoch->epoch_size);
2297 atomic_inc(&peer_req->epoch->active);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002298 spin_unlock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002299
Philipp Reisner302bdea2011-04-21 11:36:49 +02002300 rcu_read_lock();
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002301 tp = rcu_dereference(peer_device->connection->net_conf)->two_primaries;
Philipp Reisner302bdea2011-04-21 11:36:49 +02002302 rcu_read_unlock();
2303 if (tp) {
2304 peer_req->flags |= EE_IN_INTERVAL_TREE;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002305 err = wait_for_and_update_peer_seq(peer_device, peer_seq);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002306 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002307 goto out_interrupted;
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002308 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002309 err = handle_write_conflicts(device, peer_req);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002310 if (err) {
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002311 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002312 if (err == -ENOENT) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002313 put_ldev(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002314 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002315 }
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002316 goto out_interrupted;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002317 }
Philipp Reisnerb874d232013-10-23 10:59:16 +02002318 } else {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002319 update_peer_seq(peer_device, peer_seq);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002320 spin_lock_irq(&device->resource->req_lock);
Philipp Reisnerb874d232013-10-23 10:59:16 +02002321 }
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02002322 /* if we use the zeroout fallback code, we process synchronously
2323 * and we wait for all pending requests, respectively wait for
2324 * active_ee to become empty in drbd_submit_peer_request();
2325 * better not add ourselves here. */
2326 if ((peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) == 0)
2327 list_add(&peer_req->w.list, &device->active_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002328 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002329
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002330 if (device->state.conn == C_SYNC_TARGET)
2331 wait_event(device->ee_wait, !overlapping_resync_write(device, peer_req));
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01002332
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002333 if (peer_device->connection->agreed_pro_version < 100) {
Philipp Reisner44ed1672011-04-19 17:10:19 +02002334 rcu_read_lock();
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002335 switch (rcu_dereference(peer_device->connection->net_conf)->wire_protocol) {
Philipp Reisner303d1442011-04-13 16:24:47 -07002336 case DRBD_PROT_C:
2337 dp_flags |= DP_SEND_WRITE_ACK;
2338 break;
2339 case DRBD_PROT_B:
2340 dp_flags |= DP_SEND_RECEIVE_ACK;
2341 break;
2342 }
Philipp Reisner44ed1672011-04-19 17:10:19 +02002343 rcu_read_unlock();
Philipp Reisner303d1442011-04-13 16:24:47 -07002344 }
2345
2346 if (dp_flags & DP_SEND_WRITE_ACK) {
2347 peer_req->flags |= EE_SEND_WRITE_ACK;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002348 inc_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002349 /* corresponding dec_unacked() in e_end_block()
2350 * respective _drbd_clear_done_ee */
Philipp Reisner303d1442011-04-13 16:24:47 -07002351 }
2352
2353 if (dp_flags & DP_SEND_RECEIVE_ACK) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002354 /* I really don't like it that the receiver thread
2355 * sends on the msock, but anyways */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002356 drbd_send_ack(first_peer_device(device), P_RECV_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002357 }
2358
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002359 if (device->state.pdsk < D_INCONSISTENT) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002360 /* In case we have the only disk of the cluster, */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002361 drbd_set_out_of_sync(device, peer_req->i.sector, peer_req->i.size);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002362 peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
2363 peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
Lars Ellenberg4dd726f2014-02-11 11:15:36 +01002364 drbd_al_begin_io(device, &peer_req->i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002365 }
2366
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002367 err = drbd_submit_peer_request(device, peer_req, rw, DRBD_FAULT_DT_WR);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002368 if (!err)
2369 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002370
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002371 /* don't care for the reason here */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002372 drbd_err(device, "submit failed, triggering re-connect\n");
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002373 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002374 list_del(&peer_req->w.list);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002375 drbd_remove_epoch_entry_interval(device, peer_req);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002376 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002377 if (peer_req->flags & EE_CALL_AL_COMPLETE_IO)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002378 drbd_al_complete_io(device, &peer_req->i);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002379
Philipp Reisnerb411b362009-09-25 16:07:19 -07002380out_interrupted:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002381 drbd_may_finish_epoch(connection, peer_req->epoch, EV_PUT + EV_CLEANUP);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002382 put_ldev(device);
2383 drbd_free_peer_req(device, peer_req);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002384 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002385}
2386
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002387/* We may throttle resync, if the lower device seems to be busy,
2388 * and current sync rate is above c_min_rate.
2389 *
2390 * To decide whether or not the lower device is busy, we use a scheme similar
2391 * to MD RAID is_mddev_idle(): if the partition stats reveal "significant"
2392 * (more than 64 sectors) of activity we cannot account for with our own resync
2393 * activity, it obviously is "busy".
2394 *
2395 * The current sync rate used here uses only the most recent two step marks,
2396 * to have a short time average so we can react faster.
2397 */
Lars Ellenberge8299872014-04-28 18:43:19 +02002398bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector)
2399{
2400 struct lc_element *tmp;
2401 bool throttle = true;
2402
2403 if (!drbd_rs_c_min_rate_throttle(device))
2404 return false;
2405
2406 spin_lock_irq(&device->al_lock);
2407 tmp = lc_find(device->resync, BM_SECT_TO_EXT(sector));
2408 if (tmp) {
2409 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
2410 if (test_bit(BME_PRIORITY, &bm_ext->flags))
2411 throttle = false;
2412 /* Do not slow down if app IO is already waiting for this extent */
2413 }
2414 spin_unlock_irq(&device->al_lock);
2415
2416 return throttle;
2417}
2418
2419bool drbd_rs_c_min_rate_throttle(struct drbd_device *device)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002420{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002421 struct gendisk *disk = device->ldev->backing_bdev->bd_contains->bd_disk;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002422 unsigned long db, dt, dbdt;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002423 unsigned int c_min_rate;
Lars Ellenberge8299872014-04-28 18:43:19 +02002424 int curr_events;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002425
2426 rcu_read_lock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002427 c_min_rate = rcu_dereference(device->ldev->disk_conf)->c_min_rate;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002428 rcu_read_unlock();
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002429
2430 /* feature disabled? */
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002431 if (c_min_rate == 0)
Lars Ellenberge8299872014-04-28 18:43:19 +02002432 return false;
Philipp Reisnere3555d82010-11-07 15:56:29 +01002433
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002434 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
2435 (int)part_stat_read(&disk->part0, sectors[1]) -
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002436 atomic_read(&device->rs_sect_ev);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002437 if (!device->rs_last_events || curr_events - device->rs_last_events > 64) {
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002438 unsigned long rs_left;
2439 int i;
2440
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002441 device->rs_last_events = curr_events;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002442
2443 /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
2444 * approx. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002445 i = (device->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
Lars Ellenberg2649f082010-11-05 10:05:47 +01002446
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002447 if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T)
2448 rs_left = device->ov_left;
Lars Ellenberg2649f082010-11-05 10:05:47 +01002449 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002450 rs_left = drbd_bm_total_weight(device) - device->rs_failed;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002451
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002452 dt = ((long)jiffies - (long)device->rs_mark_time[i]) / HZ;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002453 if (!dt)
2454 dt++;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002455 db = device->rs_mark_left[i] - rs_left;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002456 dbdt = Bit2KB(db/dt);
2457
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002458 if (dbdt > c_min_rate)
Lars Ellenberge8299872014-04-28 18:43:19 +02002459 return true;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002460 }
Lars Ellenberge8299872014-04-28 18:43:19 +02002461 return false;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002462}
2463
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002464static int receive_DataRequest(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002465{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002466 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002467 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002468 sector_t sector;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002469 sector_t capacity;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002470 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002471 struct digest_info *di = NULL;
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002472 int size, verb;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002473 unsigned int fault_type;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02002474 struct p_block_req *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002475
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002476 peer_device = conn_peer_device(connection, pi->vnr);
2477 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002478 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002479 device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002480 capacity = drbd_get_capacity(device->this_bdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002481
2482 sector = be64_to_cpu(p->sector);
2483 size = be32_to_cpu(p->blksize);
2484
Andreas Gruenbacherc670a392011-02-21 12:41:39 +01002485 if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002486 drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
Philipp Reisnerb411b362009-09-25 16:07:19 -07002487 (unsigned long long)sector, size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002488 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002489 }
2490 if (sector + (size>>9) > capacity) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002491 drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
Philipp Reisnerb411b362009-09-25 16:07:19 -07002492 (unsigned long long)sector, size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002493 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002494 }
2495
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002496 if (!get_ldev_if_state(device, D_UP_TO_DATE)) {
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002497 verb = 1;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002498 switch (pi->cmd) {
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002499 case P_DATA_REQUEST:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002500 drbd_send_ack_rp(peer_device, P_NEG_DREPLY, p);
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002501 break;
2502 case P_RS_DATA_REQUEST:
2503 case P_CSUM_RS_REQUEST:
2504 case P_OV_REQUEST:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002505 drbd_send_ack_rp(peer_device, P_NEG_RS_DREPLY , p);
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002506 break;
2507 case P_OV_REPLY:
2508 verb = 0;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002509 dec_rs_pending(device);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002510 drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size, ID_IN_SYNC);
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002511 break;
2512 default:
Andreas Gruenbacher49ba9b12011-03-25 00:35:45 +01002513 BUG();
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002514 }
2515 if (verb && __ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002516 drbd_err(device, "Can not satisfy peer's read request, "
Philipp Reisnerb411b362009-09-25 16:07:19 -07002517 "no local data.\n");
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002518
Lars Ellenberga821cc42010-09-06 12:31:37 +02002519 /* drain possibly payload */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002520 return drbd_drain_block(peer_device, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002521 }
2522
2523 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
2524 * "criss-cross" setup, that might cause write-out on some other DRBD,
2525 * which in turn might block on the other node at this very place. */
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02002526 peer_req = drbd_alloc_peer_req(peer_device, p->block_id, sector, size,
2527 true /* has real payload */, GFP_NOIO);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002528 if (!peer_req) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002529 put_ldev(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002530 return -ENOMEM;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002531 }
2532
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002533 switch (pi->cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002534 case P_DATA_REQUEST:
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002535 peer_req->w.cb = w_e_end_data_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002536 fault_type = DRBD_FAULT_DT_RD;
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002537 /* application IO, don't drbd_rs_begin_io */
2538 goto submit;
2539
Philipp Reisnerb411b362009-09-25 16:07:19 -07002540 case P_RS_DATA_REQUEST:
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002541 peer_req->w.cb = w_e_end_rsdata_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002542 fault_type = DRBD_FAULT_RS_RD;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002543 /* used in the sector offset progress display */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002544 device->bm_resync_fo = BM_SECT_TO_BIT(sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002545 break;
2546
2547 case P_OV_REPLY:
2548 case P_CSUM_RS_REQUEST:
2549 fault_type = DRBD_FAULT_RS_RD;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002550 di = kmalloc(sizeof(*di) + pi->size, GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002551 if (!di)
2552 goto out_free_e;
2553
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002554 di->digest_size = pi->size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002555 di->digest = (((char *)di)+sizeof(struct digest_info));
2556
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002557 peer_req->digest = di;
2558 peer_req->flags |= EE_HAS_DIGEST;
Lars Ellenbergc36c3ce2010-08-11 20:42:55 +02002559
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002560 if (drbd_recv_all(peer_device->connection, di->digest, pi->size))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002561 goto out_free_e;
2562
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002563 if (pi->cmd == P_CSUM_RS_REQUEST) {
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002564 D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002565 peer_req->w.cb = w_e_end_csum_rs_req;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002566 /* used in the sector offset progress display */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002567 device->bm_resync_fo = BM_SECT_TO_BIT(sector);
Lars Ellenbergaaaba342014-03-18 12:30:09 +01002568 /* remember to report stats in drbd_resync_finished */
2569 device->use_csums = true;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002570 } else if (pi->cmd == P_OV_REPLY) {
Lars Ellenberg2649f082010-11-05 10:05:47 +01002571 /* track progress, we may need to throttle */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002572 atomic_add(size >> 9, &device->rs_sect_in);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002573 peer_req->w.cb = w_e_end_ov_reply;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002574 dec_rs_pending(device);
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002575 /* drbd_rs_begin_io done when we sent this request,
2576 * but accounting still needs to be done. */
2577 goto submit_for_resync;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002578 }
2579 break;
2580
2581 case P_OV_REQUEST:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002582 if (device->ov_start_sector == ~(sector_t)0 &&
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002583 peer_device->connection->agreed_pro_version >= 90) {
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002584 unsigned long now = jiffies;
2585 int i;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002586 device->ov_start_sector = sector;
2587 device->ov_position = sector;
2588 device->ov_left = drbd_bm_bits(device) - BM_SECT_TO_BIT(sector);
2589 device->rs_total = device->ov_left;
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002590 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002591 device->rs_mark_left[i] = device->ov_left;
2592 device->rs_mark_time[i] = now;
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002593 }
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002594 drbd_info(device, "Online Verify start sector: %llu\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07002595 (unsigned long long)sector);
2596 }
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002597 peer_req->w.cb = w_e_end_ov_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002598 fault_type = DRBD_FAULT_RS_RD;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002599 break;
2600
Philipp Reisnerb411b362009-09-25 16:07:19 -07002601 default:
Andreas Gruenbacher49ba9b12011-03-25 00:35:45 +01002602 BUG();
Philipp Reisnerb411b362009-09-25 16:07:19 -07002603 }
2604
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002605 /* Throttle, drbd_rs_begin_io and submit should become asynchronous
2606 * wrt the receiver, but it is not as straightforward as it may seem.
2607 * Various places in the resync start and stop logic assume resync
2608 * requests are processed in order, requeuing this on the worker thread
2609 * introduces a bunch of new code for synchronization between threads.
2610 *
2611 * Unlimited throttling before drbd_rs_begin_io may stall the resync
2612 * "forever", throttling after drbd_rs_begin_io will lock that extent
2613 * for application writes for the same time. For now, just throttle
2614 * here, where the rest of the code expects the receiver to sleep for
2615 * a while, anyways.
2616 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002617
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002618 /* Throttle before drbd_rs_begin_io, as that locks out application IO;
2619 * this defers syncer requests for some time, before letting at least
2620 * on request through. The resync controller on the receiving side
2621 * will adapt to the incoming rate accordingly.
2622 *
2623 * We cannot throttle here if remote is Primary/SyncTarget:
2624 * we would also throttle its application reads.
2625 * In that case, throttling is done on the SyncTarget only.
2626 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002627 if (device->state.peer != R_PRIMARY && drbd_rs_should_slow_down(device, sector))
Philipp Reisnere3555d82010-11-07 15:56:29 +01002628 schedule_timeout_uninterruptible(HZ/10);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002629 if (drbd_rs_begin_io(device, sector))
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002630 goto out_free_e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002631
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002632submit_for_resync:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002633 atomic_add(size >> 9, &device->rs_sect_ev);
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002634
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002635submit:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002636 inc_unacked(device);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002637 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002638 list_add_tail(&peer_req->w.list, &device->read_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002639 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002640
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002641 if (drbd_submit_peer_request(device, peer_req, READ, fault_type) == 0)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002642 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002643
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002644 /* don't care for the reason here */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002645 drbd_err(device, "submit failed, triggering re-connect\n");
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002646 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002647 list_del(&peer_req->w.list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002648 spin_unlock_irq(&device->resource->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002649 /* no drbd_rs_complete_io(), we are dropping the connection anyways */
2650
Philipp Reisnerb411b362009-09-25 16:07:19 -07002651out_free_e:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002652 put_ldev(device);
2653 drbd_free_peer_req(device, peer_req);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002654 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002655}
2656
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002657/**
2658 * drbd_asb_recover_0p - Recover after split-brain with no remaining primaries
2659 */
2660static int drbd_asb_recover_0p(struct drbd_peer_device *peer_device) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002661{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002662 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002663 int self, peer, rv = -100;
2664 unsigned long ch_self, ch_peer;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002665 enum drbd_after_sb_p after_sb_0p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002666
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002667 self = device->ldev->md.uuid[UI_BITMAP] & 1;
2668 peer = device->p_uuid[UI_BITMAP] & 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002669
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002670 ch_peer = device->p_uuid[UI_SIZE];
2671 ch_self = device->comm_bm_set;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002672
Philipp Reisner44ed1672011-04-19 17:10:19 +02002673 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002674 after_sb_0p = rcu_dereference(peer_device->connection->net_conf)->after_sb_0p;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002675 rcu_read_unlock();
2676 switch (after_sb_0p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002677 case ASB_CONSENSUS:
2678 case ASB_DISCARD_SECONDARY:
2679 case ASB_CALL_HELPER:
Philipp Reisner44ed1672011-04-19 17:10:19 +02002680 case ASB_VIOLENTLY:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002681 drbd_err(device, "Configuration error.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002682 break;
2683 case ASB_DISCONNECT:
2684 break;
2685 case ASB_DISCARD_YOUNGER_PRI:
2686 if (self == 0 && peer == 1) {
2687 rv = -1;
2688 break;
2689 }
2690 if (self == 1 && peer == 0) {
2691 rv = 1;
2692 break;
2693 }
2694 /* Else fall through to one of the other strategies... */
2695 case ASB_DISCARD_OLDER_PRI:
2696 if (self == 0 && peer == 1) {
2697 rv = 1;
2698 break;
2699 }
2700 if (self == 1 && peer == 0) {
2701 rv = -1;
2702 break;
2703 }
2704 /* Else fall through to one of the other strategies... */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002705 drbd_warn(device, "Discard younger/older primary did not find a decision\n"
Philipp Reisnerb411b362009-09-25 16:07:19 -07002706 "Using discard-least-changes instead\n");
2707 case ASB_DISCARD_ZERO_CHG:
2708 if (ch_peer == 0 && ch_self == 0) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002709 rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002710 ? -1 : 1;
2711 break;
2712 } else {
2713 if (ch_peer == 0) { rv = 1; break; }
2714 if (ch_self == 0) { rv = -1; break; }
2715 }
Philipp Reisner44ed1672011-04-19 17:10:19 +02002716 if (after_sb_0p == ASB_DISCARD_ZERO_CHG)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002717 break;
2718 case ASB_DISCARD_LEAST_CHG:
2719 if (ch_self < ch_peer)
2720 rv = -1;
2721 else if (ch_self > ch_peer)
2722 rv = 1;
2723 else /* ( ch_self == ch_peer ) */
2724 /* Well, then use something else. */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002725 rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002726 ? -1 : 1;
2727 break;
2728 case ASB_DISCARD_LOCAL:
2729 rv = -1;
2730 break;
2731 case ASB_DISCARD_REMOTE:
2732 rv = 1;
2733 }
2734
2735 return rv;
2736}
2737
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002738/**
2739 * drbd_asb_recover_1p - Recover after split-brain with one remaining primary
2740 */
2741static int drbd_asb_recover_1p(struct drbd_peer_device *peer_device) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002742{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002743 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002744 int hg, rv = -100;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002745 enum drbd_after_sb_p after_sb_1p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002746
Philipp Reisner44ed1672011-04-19 17:10:19 +02002747 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002748 after_sb_1p = rcu_dereference(peer_device->connection->net_conf)->after_sb_1p;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002749 rcu_read_unlock();
2750 switch (after_sb_1p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002751 case ASB_DISCARD_YOUNGER_PRI:
2752 case ASB_DISCARD_OLDER_PRI:
2753 case ASB_DISCARD_LEAST_CHG:
2754 case ASB_DISCARD_LOCAL:
2755 case ASB_DISCARD_REMOTE:
Philipp Reisner44ed1672011-04-19 17:10:19 +02002756 case ASB_DISCARD_ZERO_CHG:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002757 drbd_err(device, "Configuration error.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002758 break;
2759 case ASB_DISCONNECT:
2760 break;
2761 case ASB_CONSENSUS:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002762 hg = drbd_asb_recover_0p(peer_device);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002763 if (hg == -1 && device->state.role == R_SECONDARY)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002764 rv = hg;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002765 if (hg == 1 && device->state.role == R_PRIMARY)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002766 rv = hg;
2767 break;
2768 case ASB_VIOLENTLY:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002769 rv = drbd_asb_recover_0p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002770 break;
2771 case ASB_DISCARD_SECONDARY:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002772 return device->state.role == R_PRIMARY ? 1 : -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002773 case ASB_CALL_HELPER:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002774 hg = drbd_asb_recover_0p(peer_device);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002775 if (hg == -1 && device->state.role == R_PRIMARY) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002776 enum drbd_state_rv rv2;
2777
Philipp Reisnerb411b362009-09-25 16:07:19 -07002778 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2779 * we might be here in C_WF_REPORT_PARAMS which is transient.
2780 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002781 rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002782 if (rv2 != SS_SUCCESS) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002783 drbd_khelper(device, "pri-lost-after-sb");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002784 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002785 drbd_warn(device, "Successfully gave up primary role.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002786 rv = hg;
2787 }
2788 } else
2789 rv = hg;
2790 }
2791
2792 return rv;
2793}
2794
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002795/**
2796 * drbd_asb_recover_2p - Recover after split-brain with two remaining primaries
2797 */
2798static int drbd_asb_recover_2p(struct drbd_peer_device *peer_device) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002799{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002800 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002801 int hg, rv = -100;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002802 enum drbd_after_sb_p after_sb_2p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002803
Philipp Reisner44ed1672011-04-19 17:10:19 +02002804 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002805 after_sb_2p = rcu_dereference(peer_device->connection->net_conf)->after_sb_2p;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002806 rcu_read_unlock();
2807 switch (after_sb_2p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002808 case ASB_DISCARD_YOUNGER_PRI:
2809 case ASB_DISCARD_OLDER_PRI:
2810 case ASB_DISCARD_LEAST_CHG:
2811 case ASB_DISCARD_LOCAL:
2812 case ASB_DISCARD_REMOTE:
2813 case ASB_CONSENSUS:
2814 case ASB_DISCARD_SECONDARY:
Philipp Reisner44ed1672011-04-19 17:10:19 +02002815 case ASB_DISCARD_ZERO_CHG:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002816 drbd_err(device, "Configuration error.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002817 break;
2818 case ASB_VIOLENTLY:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002819 rv = drbd_asb_recover_0p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002820 break;
2821 case ASB_DISCONNECT:
2822 break;
2823 case ASB_CALL_HELPER:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002824 hg = drbd_asb_recover_0p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002825 if (hg == -1) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002826 enum drbd_state_rv rv2;
2827
Philipp Reisnerb411b362009-09-25 16:07:19 -07002828 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2829 * we might be here in C_WF_REPORT_PARAMS which is transient.
2830 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002831 rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002832 if (rv2 != SS_SUCCESS) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002833 drbd_khelper(device, "pri-lost-after-sb");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002834 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002835 drbd_warn(device, "Successfully gave up primary role.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002836 rv = hg;
2837 }
2838 } else
2839 rv = hg;
2840 }
2841
2842 return rv;
2843}
2844
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002845static void drbd_uuid_dump(struct drbd_device *device, char *text, u64 *uuid,
Philipp Reisnerb411b362009-09-25 16:07:19 -07002846 u64 bits, u64 flags)
2847{
2848 if (!uuid) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002849 drbd_info(device, "%s uuid info vanished while I was looking!\n", text);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002850 return;
2851 }
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002852 drbd_info(device, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07002853 text,
2854 (unsigned long long)uuid[UI_CURRENT],
2855 (unsigned long long)uuid[UI_BITMAP],
2856 (unsigned long long)uuid[UI_HISTORY_START],
2857 (unsigned long long)uuid[UI_HISTORY_END],
2858 (unsigned long long)bits,
2859 (unsigned long long)flags);
2860}
2861
2862/*
2863 100 after split brain try auto recover
2864 2 C_SYNC_SOURCE set BitMap
2865 1 C_SYNC_SOURCE use BitMap
2866 0 no Sync
2867 -1 C_SYNC_TARGET use BitMap
2868 -2 C_SYNC_TARGET set BitMap
2869 -100 after split brain, disconnect
2870-1000 unrelated data
Philipp Reisner4a23f262011-01-11 17:42:17 +01002871-1091 requires proto 91
2872-1096 requires proto 96
Philipp Reisnerb411b362009-09-25 16:07:19 -07002873 */
Lars Ellenberg44a4d552013-11-22 12:40:58 +01002874static int drbd_uuid_compare(struct drbd_device *const device, int *rule_nr) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002875{
Lars Ellenberg44a4d552013-11-22 12:40:58 +01002876 struct drbd_peer_device *const peer_device = first_peer_device(device);
2877 struct drbd_connection *const connection = peer_device ? peer_device->connection : NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002878 u64 self, peer;
2879 int i, j;
2880
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002881 self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2882 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002883
2884 *rule_nr = 10;
2885 if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED)
2886 return 0;
2887
2888 *rule_nr = 20;
2889 if ((self == UUID_JUST_CREATED || self == (u64)0) &&
2890 peer != UUID_JUST_CREATED)
2891 return -2;
2892
2893 *rule_nr = 30;
2894 if (self != UUID_JUST_CREATED &&
2895 (peer == UUID_JUST_CREATED || peer == (u64)0))
2896 return 2;
2897
2898 if (self == peer) {
2899 int rct, dc; /* roles at crash time */
2900
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002901 if (device->p_uuid[UI_BITMAP] == (u64)0 && device->ldev->md.uuid[UI_BITMAP] != (u64)0) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002902
Lars Ellenberg44a4d552013-11-22 12:40:58 +01002903 if (connection->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002904 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002905
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002906 if ((device->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
2907 (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002908 drbd_info(device, "was SyncSource, missed the resync finished event, corrected myself:\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002909 drbd_uuid_move_history(device);
2910 device->ldev->md.uuid[UI_HISTORY_START] = device->ldev->md.uuid[UI_BITMAP];
2911 device->ldev->md.uuid[UI_BITMAP] = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002912
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002913 drbd_uuid_dump(device, "self", device->ldev->md.uuid,
2914 device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002915 *rule_nr = 34;
2916 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002917 drbd_info(device, "was SyncSource (peer failed to write sync_uuid)\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002918 *rule_nr = 36;
2919 }
2920
2921 return 1;
2922 }
2923
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002924 if (device->ldev->md.uuid[UI_BITMAP] == (u64)0 && device->p_uuid[UI_BITMAP] != (u64)0) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002925
Lars Ellenberg44a4d552013-11-22 12:40:58 +01002926 if (connection->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002927 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002928
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002929 if ((device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_BITMAP] & ~((u64)1)) &&
2930 (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002931 drbd_info(device, "was SyncTarget, peer missed the resync finished event, corrected peer:\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002932
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002933 device->p_uuid[UI_HISTORY_START + 1] = device->p_uuid[UI_HISTORY_START];
2934 device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_BITMAP];
2935 device->p_uuid[UI_BITMAP] = 0UL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002936
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002937 drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002938 *rule_nr = 35;
2939 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002940 drbd_info(device, "was SyncTarget (failed to write sync_uuid)\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002941 *rule_nr = 37;
2942 }
2943
2944 return -1;
2945 }
2946
2947 /* Common power [off|failure] */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002948 rct = (test_bit(CRASHED_PRIMARY, &device->flags) ? 1 : 0) +
2949 (device->p_uuid[UI_FLAGS] & 2);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002950 /* lowest bit is set when we were primary,
2951 * next bit (weight 2) is set when peer was primary */
2952 *rule_nr = 40;
2953
2954 switch (rct) {
2955 case 0: /* !self_pri && !peer_pri */ return 0;
2956 case 1: /* self_pri && !peer_pri */ return 1;
2957 case 2: /* !self_pri && peer_pri */ return -1;
2958 case 3: /* self_pri && peer_pri */
Lars Ellenberg44a4d552013-11-22 12:40:58 +01002959 dc = test_bit(RESOLVE_CONFLICTS, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002960 return dc ? -1 : 1;
2961 }
2962 }
2963
2964 *rule_nr = 50;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002965 peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002966 if (self == peer)
2967 return -1;
2968
2969 *rule_nr = 51;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002970 peer = device->p_uuid[UI_HISTORY_START] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002971 if (self == peer) {
Lars Ellenberg44a4d552013-11-22 12:40:58 +01002972 if (connection->agreed_pro_version < 96 ?
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002973 (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
2974 (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
2975 peer + UUID_NEW_BM_OFFSET == (device->p_uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002976 /* The last P_SYNC_UUID did not get though. Undo the last start of
2977 resync as sync source modifications of the peer's UUIDs. */
2978
Lars Ellenberg44a4d552013-11-22 12:40:58 +01002979 if (connection->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002980 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002981
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002982 device->p_uuid[UI_BITMAP] = device->p_uuid[UI_HISTORY_START];
2983 device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_HISTORY_START + 1];
Philipp Reisner4a23f262011-01-11 17:42:17 +01002984
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002985 drbd_info(device, "Lost last syncUUID packet, corrected:\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002986 drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
Philipp Reisner4a23f262011-01-11 17:42:17 +01002987
Philipp Reisnerb411b362009-09-25 16:07:19 -07002988 return -1;
2989 }
2990 }
2991
2992 *rule_nr = 60;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002993 self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002994 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002995 peer = device->p_uuid[i] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002996 if (self == peer)
2997 return -2;
2998 }
2999
3000 *rule_nr = 70;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003001 self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
3002 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003003 if (self == peer)
3004 return 1;
3005
3006 *rule_nr = 71;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003007 self = device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003008 if (self == peer) {
Lars Ellenberg44a4d552013-11-22 12:40:58 +01003009 if (connection->agreed_pro_version < 96 ?
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003010 (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
3011 (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
3012 self + UUID_NEW_BM_OFFSET == (device->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003013 /* The last P_SYNC_UUID did not get though. Undo the last start of
3014 resync as sync source modifications of our UUIDs. */
3015
Lars Ellenberg44a4d552013-11-22 12:40:58 +01003016 if (connection->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01003017 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003018
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003019 __drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_HISTORY_START]);
3020 __drbd_uuid_set(device, UI_HISTORY_START, device->ldev->md.uuid[UI_HISTORY_START + 1]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003021
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003022 drbd_info(device, "Last syncUUID did not get through, corrected:\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003023 drbd_uuid_dump(device, "self", device->ldev->md.uuid,
3024 device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003025
3026 return 1;
3027 }
3028 }
3029
3030
3031 *rule_nr = 80;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003032 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003033 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003034 self = device->ldev->md.uuid[i] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003035 if (self == peer)
3036 return 2;
3037 }
3038
3039 *rule_nr = 90;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003040 self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
3041 peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003042 if (self == peer && self != ((u64)0))
3043 return 100;
3044
3045 *rule_nr = 100;
3046 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003047 self = device->ldev->md.uuid[i] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003048 for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003049 peer = device->p_uuid[j] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003050 if (self == peer)
3051 return -100;
3052 }
3053 }
3054
3055 return -1000;
3056}
3057
3058/* drbd_sync_handshake() returns the new conn state on success, or
3059 CONN_MASK (-1) on failure.
3060 */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003061static enum drbd_conns drbd_sync_handshake(struct drbd_peer_device *peer_device,
3062 enum drbd_role peer_role,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003063 enum drbd_disk_state peer_disk) __must_hold(local)
3064{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003065 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003066 enum drbd_conns rv = C_MASK;
3067 enum drbd_disk_state mydisk;
Philipp Reisner44ed1672011-04-19 17:10:19 +02003068 struct net_conf *nc;
Andreas Gruenbacher6dff2902011-06-28 14:18:12 +02003069 int hg, rule_nr, rr_conflict, tentative;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003070
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003071 mydisk = device->state.disk;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003072 if (mydisk == D_NEGOTIATING)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003073 mydisk = device->new_state_tmp.disk;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003074
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003075 drbd_info(device, "drbd_sync_handshake:\n");
Philipp Reisner9f2247b2012-08-16 14:25:58 +02003076
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003077 spin_lock_irq(&device->ldev->md.uuid_lock);
3078 drbd_uuid_dump(device, "self", device->ldev->md.uuid, device->comm_bm_set, 0);
3079 drbd_uuid_dump(device, "peer", device->p_uuid,
3080 device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003081
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003082 hg = drbd_uuid_compare(device, &rule_nr);
3083 spin_unlock_irq(&device->ldev->md.uuid_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003084
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003085 drbd_info(device, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003086
3087 if (hg == -1000) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003088 drbd_alert(device, "Unrelated data, aborting!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003089 return C_MASK;
3090 }
Philipp Reisner4a23f262011-01-11 17:42:17 +01003091 if (hg < -1000) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003092 drbd_alert(device, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003093 return C_MASK;
3094 }
3095
3096 if ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) ||
3097 (peer_disk == D_INCONSISTENT && mydisk > D_INCONSISTENT)) {
3098 int f = (hg == -100) || abs(hg) == 2;
3099 hg = mydisk > D_INCONSISTENT ? 1 : -1;
3100 if (f)
3101 hg = hg*2;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003102 drbd_info(device, "Becoming sync %s due to disk states.\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07003103 hg > 0 ? "source" : "target");
3104 }
3105
Adam Gandelman3a11a482010-04-08 16:48:23 -07003106 if (abs(hg) == 100)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003107 drbd_khelper(device, "initial-split-brain");
Adam Gandelman3a11a482010-04-08 16:48:23 -07003108
Philipp Reisner44ed1672011-04-19 17:10:19 +02003109 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003110 nc = rcu_dereference(peer_device->connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +02003111
3112 if (hg == 100 || (hg == -100 && nc->always_asbp)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003113 int pcount = (device->state.role == R_PRIMARY)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003114 + (peer_role == R_PRIMARY);
3115 int forced = (hg == -100);
3116
3117 switch (pcount) {
3118 case 0:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003119 hg = drbd_asb_recover_0p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003120 break;
3121 case 1:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003122 hg = drbd_asb_recover_1p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003123 break;
3124 case 2:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003125 hg = drbd_asb_recover_2p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003126 break;
3127 }
3128 if (abs(hg) < 100) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003129 drbd_warn(device, "Split-Brain detected, %d primaries, "
Philipp Reisnerb411b362009-09-25 16:07:19 -07003130 "automatically solved. Sync from %s node\n",
3131 pcount, (hg < 0) ? "peer" : "this");
3132 if (forced) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003133 drbd_warn(device, "Doing a full sync, since"
Philipp Reisnerb411b362009-09-25 16:07:19 -07003134 " UUIDs where ambiguous.\n");
3135 hg = hg*2;
3136 }
3137 }
3138 }
3139
3140 if (hg == -100) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003141 if (test_bit(DISCARD_MY_DATA, &device->flags) && !(device->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003142 hg = -1;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003143 if (!test_bit(DISCARD_MY_DATA, &device->flags) && (device->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003144 hg = 1;
3145
3146 if (abs(hg) < 100)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003147 drbd_warn(device, "Split-Brain detected, manually solved. "
Philipp Reisnerb411b362009-09-25 16:07:19 -07003148 "Sync from %s node\n",
3149 (hg < 0) ? "peer" : "this");
3150 }
Philipp Reisner44ed1672011-04-19 17:10:19 +02003151 rr_conflict = nc->rr_conflict;
Andreas Gruenbacher6dff2902011-06-28 14:18:12 +02003152 tentative = nc->tentative;
Philipp Reisner44ed1672011-04-19 17:10:19 +02003153 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07003154
3155 if (hg == -100) {
Lars Ellenberg580b9762010-02-26 23:15:23 +01003156 /* FIXME this log message is not correct if we end up here
3157 * after an attempted attach on a diskless node.
3158 * We just refuse to attach -- well, we drop the "connection"
3159 * to that disk, in a way... */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003160 drbd_alert(device, "Split-Brain detected but unresolved, dropping connection!\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003161 drbd_khelper(device, "split-brain");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003162 return C_MASK;
3163 }
3164
3165 if (hg > 0 && mydisk <= D_INCONSISTENT) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003166 drbd_err(device, "I shall become SyncSource, but I am inconsistent!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003167 return C_MASK;
3168 }
3169
3170 if (hg < 0 && /* by intention we do not use mydisk here. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003171 device->state.role == R_PRIMARY && device->state.disk >= D_CONSISTENT) {
Philipp Reisner44ed1672011-04-19 17:10:19 +02003172 switch (rr_conflict) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003173 case ASB_CALL_HELPER:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003174 drbd_khelper(device, "pri-lost");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003175 /* fall through */
3176 case ASB_DISCONNECT:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003177 drbd_err(device, "I shall become SyncTarget, but I am primary!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003178 return C_MASK;
3179 case ASB_VIOLENTLY:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003180 drbd_warn(device, "Becoming SyncTarget, violating the stable-data"
Philipp Reisnerb411b362009-09-25 16:07:19 -07003181 "assumption\n");
3182 }
3183 }
3184
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003185 if (tentative || test_bit(CONN_DRY_RUN, &peer_device->connection->flags)) {
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003186 if (hg == 0)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003187 drbd_info(device, "dry-run connect: No resync, would become Connected immediately.\n");
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003188 else
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003189 drbd_info(device, "dry-run connect: Would become %s, doing a %s resync.",
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003190 drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET),
3191 abs(hg) >= 2 ? "full" : "bit-map based");
3192 return C_MASK;
3193 }
3194
Philipp Reisnerb411b362009-09-25 16:07:19 -07003195 if (abs(hg) >= 2) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003196 drbd_info(device, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003197 if (drbd_bitmap_io(device, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003198 BM_LOCKED_SET_ALLOWED))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003199 return C_MASK;
3200 }
3201
3202 if (hg > 0) { /* become sync source. */
3203 rv = C_WF_BITMAP_S;
3204 } else if (hg < 0) { /* become sync target */
3205 rv = C_WF_BITMAP_T;
3206 } else {
3207 rv = C_CONNECTED;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003208 if (drbd_bm_total_weight(device)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003209 drbd_info(device, "No resync, but %lu bits in bitmap!\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003210 drbd_bm_total_weight(device));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003211 }
3212 }
3213
3214 return rv;
3215}
3216
Philipp Reisnerf179d762011-05-16 17:31:47 +02003217static enum drbd_after_sb_p convert_after_sb(enum drbd_after_sb_p peer)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003218{
3219 /* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */
Philipp Reisnerf179d762011-05-16 17:31:47 +02003220 if (peer == ASB_DISCARD_REMOTE)
3221 return ASB_DISCARD_LOCAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003222
3223 /* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */
Philipp Reisnerf179d762011-05-16 17:31:47 +02003224 if (peer == ASB_DISCARD_LOCAL)
3225 return ASB_DISCARD_REMOTE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003226
3227 /* everything else is valid if they are equal on both sides. */
Philipp Reisnerf179d762011-05-16 17:31:47 +02003228 return peer;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003229}
3230
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003231static int receive_protocol(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003232{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003233 struct p_protocol *p = pi->data;
Philipp Reisner036b17e2011-05-16 17:38:11 +02003234 enum drbd_after_sb_p p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
3235 int p_proto, p_discard_my_data, p_two_primaries, cf;
3236 struct net_conf *nc, *old_net_conf, *new_net_conf = NULL;
3237 char integrity_alg[SHARED_SECRET_MAX] = "";
Andreas Gruenbacheraccdbcc2011-07-15 17:41:09 +02003238 struct crypto_hash *peer_integrity_tfm = NULL;
Philipp Reisner7aca6c72011-05-17 10:12:56 +02003239 void *int_dig_in = NULL, *int_dig_vv = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003240
Philipp Reisnerb411b362009-09-25 16:07:19 -07003241 p_proto = be32_to_cpu(p->protocol);
3242 p_after_sb_0p = be32_to_cpu(p->after_sb_0p);
3243 p_after_sb_1p = be32_to_cpu(p->after_sb_1p);
3244 p_after_sb_2p = be32_to_cpu(p->after_sb_2p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003245 p_two_primaries = be32_to_cpu(p->two_primaries);
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003246 cf = be32_to_cpu(p->conn_flags);
Andreas Gruenbacher6139f602011-05-06 20:00:02 +02003247 p_discard_my_data = cf & CF_DISCARD_MY_DATA;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003248
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003249 if (connection->agreed_pro_version >= 87) {
Andreas Gruenbacher86db0612011-04-28 15:24:18 +02003250 int err;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003251
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02003252 if (pi->size > sizeof(integrity_alg))
Andreas Gruenbacher86db0612011-04-28 15:24:18 +02003253 return -EIO;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003254 err = drbd_recv_all(connection, integrity_alg, pi->size);
Andreas Gruenbacher86db0612011-04-28 15:24:18 +02003255 if (err)
3256 return err;
Philipp Reisner036b17e2011-05-16 17:38:11 +02003257 integrity_alg[SHARED_SECRET_MAX - 1] = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003258 }
3259
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003260 if (pi->cmd != P_PROTOCOL_UPDATE) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003261 clear_bit(CONN_DRY_RUN, &connection->flags);
Philipp Reisner036b17e2011-05-16 17:38:11 +02003262
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003263 if (cf & CF_DRY_RUN)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003264 set_bit(CONN_DRY_RUN, &connection->flags);
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003265
3266 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003267 nc = rcu_dereference(connection->net_conf);
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003268
3269 if (p_proto != nc->wire_protocol) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003270 drbd_err(connection, "incompatible %s settings\n", "protocol");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003271 goto disconnect_rcu_unlock;
3272 }
3273
3274 if (convert_after_sb(p_after_sb_0p) != nc->after_sb_0p) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003275 drbd_err(connection, "incompatible %s settings\n", "after-sb-0pri");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003276 goto disconnect_rcu_unlock;
3277 }
3278
3279 if (convert_after_sb(p_after_sb_1p) != nc->after_sb_1p) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003280 drbd_err(connection, "incompatible %s settings\n", "after-sb-1pri");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003281 goto disconnect_rcu_unlock;
3282 }
3283
3284 if (convert_after_sb(p_after_sb_2p) != nc->after_sb_2p) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003285 drbd_err(connection, "incompatible %s settings\n", "after-sb-2pri");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003286 goto disconnect_rcu_unlock;
3287 }
3288
3289 if (p_discard_my_data && nc->discard_my_data) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003290 drbd_err(connection, "incompatible %s settings\n", "discard-my-data");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003291 goto disconnect_rcu_unlock;
3292 }
3293
3294 if (p_two_primaries != nc->two_primaries) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003295 drbd_err(connection, "incompatible %s settings\n", "allow-two-primaries");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003296 goto disconnect_rcu_unlock;
3297 }
3298
3299 if (strcmp(integrity_alg, nc->integrity_alg)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003300 drbd_err(connection, "incompatible %s settings\n", "data-integrity-alg");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003301 goto disconnect_rcu_unlock;
3302 }
3303
3304 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07003305 }
3306
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003307 if (integrity_alg[0]) {
3308 int hash_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003309
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003310 /*
3311 * We can only change the peer data integrity algorithm
3312 * here. Changing our own data integrity algorithm
3313 * requires that we send a P_PROTOCOL_UPDATE packet at
3314 * the same time; otherwise, the peer has no way to
3315 * tell between which packets the algorithm should
3316 * change.
3317 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003318
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003319 peer_integrity_tfm = crypto_alloc_hash(integrity_alg, 0, CRYPTO_ALG_ASYNC);
3320 if (!peer_integrity_tfm) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003321 drbd_err(connection, "peer data-integrity-alg %s not supported\n",
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003322 integrity_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003323 goto disconnect;
3324 }
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003325
3326 hash_size = crypto_hash_digestsize(peer_integrity_tfm);
3327 int_dig_in = kmalloc(hash_size, GFP_KERNEL);
3328 int_dig_vv = kmalloc(hash_size, GFP_KERNEL);
3329 if (!(int_dig_in && int_dig_vv)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003330 drbd_err(connection, "Allocation of buffers for data integrity checking failed\n");
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003331 goto disconnect;
3332 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003333 }
3334
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003335 new_net_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL);
3336 if (!new_net_conf) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003337 drbd_err(connection, "Allocation of new net_conf failed\n");
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003338 goto disconnect;
3339 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003340
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003341 mutex_lock(&connection->data.mutex);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003342 mutex_lock(&connection->resource->conf_update);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003343 old_net_conf = connection->net_conf;
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003344 *new_net_conf = *old_net_conf;
3345
3346 new_net_conf->wire_protocol = p_proto;
3347 new_net_conf->after_sb_0p = convert_after_sb(p_after_sb_0p);
3348 new_net_conf->after_sb_1p = convert_after_sb(p_after_sb_1p);
3349 new_net_conf->after_sb_2p = convert_after_sb(p_after_sb_2p);
3350 new_net_conf->two_primaries = p_two_primaries;
3351
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003352 rcu_assign_pointer(connection->net_conf, new_net_conf);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003353 mutex_unlock(&connection->resource->conf_update);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003354 mutex_unlock(&connection->data.mutex);
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003355
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003356 crypto_free_hash(connection->peer_integrity_tfm);
3357 kfree(connection->int_dig_in);
3358 kfree(connection->int_dig_vv);
3359 connection->peer_integrity_tfm = peer_integrity_tfm;
3360 connection->int_dig_in = int_dig_in;
3361 connection->int_dig_vv = int_dig_vv;
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003362
3363 if (strcmp(old_net_conf->integrity_alg, integrity_alg))
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003364 drbd_info(connection, "peer data-integrity-alg: %s\n",
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003365 integrity_alg[0] ? integrity_alg : "(none)");
3366
3367 synchronize_rcu();
3368 kfree(old_net_conf);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003369 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003370
Philipp Reisner44ed1672011-04-19 17:10:19 +02003371disconnect_rcu_unlock:
3372 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07003373disconnect:
Andreas Gruenbacherb792c352011-07-15 16:48:49 +02003374 crypto_free_hash(peer_integrity_tfm);
Philipp Reisner036b17e2011-05-16 17:38:11 +02003375 kfree(int_dig_in);
3376 kfree(int_dig_vv);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003377 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003378 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003379}
3380
3381/* helper function
3382 * input: alg name, feature name
3383 * return: NULL (alg name was "")
3384 * ERR_PTR(error) if something goes wrong
3385 * or the crypto hash ptr, if it worked out ok. */
Lars Ellenberg8ce953a2014-02-27 09:46:18 +01003386static struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_device *device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003387 const char *alg, const char *name)
3388{
3389 struct crypto_hash *tfm;
3390
3391 if (!alg[0])
3392 return NULL;
3393
3394 tfm = crypto_alloc_hash(alg, 0, CRYPTO_ALG_ASYNC);
3395 if (IS_ERR(tfm)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003396 drbd_err(device, "Can not allocate \"%s\" as %s (reason: %ld)\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07003397 alg, name, PTR_ERR(tfm));
3398 return tfm;
3399 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003400 return tfm;
3401}
3402
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003403static int ignore_remaining_packet(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003404{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003405 void *buffer = connection->data.rbuf;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003406 int size = pi->size;
3407
3408 while (size) {
3409 int s = min_t(int, size, DRBD_SOCKET_BUFFER_SIZE);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003410 s = drbd_recv(connection, buffer, s);
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003411 if (s <= 0) {
3412 if (s < 0)
3413 return s;
3414 break;
3415 }
3416 size -= s;
3417 }
3418 if (size)
3419 return -EIO;
3420 return 0;
3421}
3422
3423/*
3424 * config_unknown_volume - device configuration command for unknown volume
3425 *
3426 * When a device is added to an existing connection, the node on which the
3427 * device is added first will send configuration commands to its peer but the
3428 * peer will not know about the device yet. It will warn and ignore these
3429 * commands. Once the device is added on the second node, the second node will
3430 * send the same device configuration commands, but in the other direction.
3431 *
3432 * (We can also end up here if drbd is misconfigured.)
3433 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003434static int config_unknown_volume(struct drbd_connection *connection, struct packet_info *pi)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003435{
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003436 drbd_warn(connection, "%s packet received for volume %u, which is not configured locally\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02003437 cmdname(pi->cmd), pi->vnr);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003438 return ignore_remaining_packet(connection, pi);
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003439}
3440
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003441static int receive_SyncParam(struct drbd_connection *connection, struct packet_info *pi)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003442{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003443 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003444 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003445 struct p_rs_param_95 *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003446 unsigned int header_size, data_size, exp_max_sz;
3447 struct crypto_hash *verify_tfm = NULL;
3448 struct crypto_hash *csums_tfm = NULL;
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003449 struct net_conf *old_net_conf, *new_net_conf = NULL;
Philipp Reisner813472c2011-05-03 16:47:02 +02003450 struct disk_conf *old_disk_conf = NULL, *new_disk_conf = NULL;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003451 const int apv = connection->agreed_pro_version;
Philipp Reisner813472c2011-05-03 16:47:02 +02003452 struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
Philipp Reisner778f2712010-07-06 11:14:00 +02003453 int fifo_size = 0;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003454 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003455
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003456 peer_device = conn_peer_device(connection, pi->vnr);
3457 if (!peer_device)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003458 return config_unknown_volume(connection, pi);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003459 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003460
3461 exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param)
3462 : apv == 88 ? sizeof(struct p_rs_param)
3463 + SHARED_SECRET_MAX
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003464 : apv <= 94 ? sizeof(struct p_rs_param_89)
3465 : /* apv >= 95 */ sizeof(struct p_rs_param_95);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003466
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003467 if (pi->size > exp_max_sz) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003468 drbd_err(device, "SyncParam packet too long: received %u, expected <= %u bytes\n",
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003469 pi->size, exp_max_sz);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003470 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003471 }
3472
3473 if (apv <= 88) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003474 header_size = sizeof(struct p_rs_param);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003475 data_size = pi->size - header_size;
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003476 } else if (apv <= 94) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003477 header_size = sizeof(struct p_rs_param_89);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003478 data_size = pi->size - header_size;
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02003479 D_ASSERT(device, data_size == 0);
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003480 } else {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003481 header_size = sizeof(struct p_rs_param_95);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003482 data_size = pi->size - header_size;
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02003483 D_ASSERT(device, data_size == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003484 }
3485
3486 /* initialize verify_alg and csums_alg */
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003487 p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003488 memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
3489
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003490 err = drbd_recv_all(peer_device->connection, p, header_size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003491 if (err)
3492 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003493
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003494 mutex_lock(&connection->resource->conf_update);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003495 old_net_conf = peer_device->connection->net_conf;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003496 if (get_ldev(device)) {
Philipp Reisner813472c2011-05-03 16:47:02 +02003497 new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3498 if (!new_disk_conf) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003499 put_ldev(device);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003500 mutex_unlock(&connection->resource->conf_update);
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003501 drbd_err(device, "Allocation of new disk_conf failed\n");
Philipp Reisner813472c2011-05-03 16:47:02 +02003502 return -ENOMEM;
3503 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003504
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003505 old_disk_conf = device->ldev->disk_conf;
Philipp Reisner813472c2011-05-03 16:47:02 +02003506 *new_disk_conf = *old_disk_conf;
3507
Andreas Gruenbacher6394b932011-05-11 14:29:52 +02003508 new_disk_conf->resync_rate = be32_to_cpu(p->resync_rate);
Philipp Reisner813472c2011-05-03 16:47:02 +02003509 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003510
3511 if (apv >= 88) {
3512 if (apv == 88) {
Philipp Reisner5de73822012-03-28 10:17:32 +02003513 if (data_size > SHARED_SECRET_MAX || data_size == 0) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003514 drbd_err(device, "verify-alg of wrong size, "
Philipp Reisner5de73822012-03-28 10:17:32 +02003515 "peer wants %u, accepting only up to %u byte\n",
3516 data_size, SHARED_SECRET_MAX);
Philipp Reisner813472c2011-05-03 16:47:02 +02003517 err = -EIO;
3518 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003519 }
3520
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003521 err = drbd_recv_all(peer_device->connection, p->verify_alg, data_size);
Philipp Reisner813472c2011-05-03 16:47:02 +02003522 if (err)
3523 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003524 /* we expect NUL terminated string */
3525 /* but just in case someone tries to be evil */
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02003526 D_ASSERT(device, p->verify_alg[data_size-1] == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003527 p->verify_alg[data_size-1] = 0;
3528
3529 } else /* apv >= 89 */ {
3530 /* we still expect NUL terminated strings */
3531 /* but just in case someone tries to be evil */
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02003532 D_ASSERT(device, p->verify_alg[SHARED_SECRET_MAX-1] == 0);
3533 D_ASSERT(device, p->csums_alg[SHARED_SECRET_MAX-1] == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003534 p->verify_alg[SHARED_SECRET_MAX-1] = 0;
3535 p->csums_alg[SHARED_SECRET_MAX-1] = 0;
3536 }
3537
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003538 if (strcmp(old_net_conf->verify_alg, p->verify_alg)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003539 if (device->state.conn == C_WF_REPORT_PARAMS) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003540 drbd_err(device, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003541 old_net_conf->verify_alg, p->verify_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003542 goto disconnect;
3543 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003544 verify_tfm = drbd_crypto_alloc_digest_safe(device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003545 p->verify_alg, "verify-alg");
3546 if (IS_ERR(verify_tfm)) {
3547 verify_tfm = NULL;
3548 goto disconnect;
3549 }
3550 }
3551
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003552 if (apv >= 89 && strcmp(old_net_conf->csums_alg, p->csums_alg)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003553 if (device->state.conn == C_WF_REPORT_PARAMS) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003554 drbd_err(device, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003555 old_net_conf->csums_alg, p->csums_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003556 goto disconnect;
3557 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003558 csums_tfm = drbd_crypto_alloc_digest_safe(device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003559 p->csums_alg, "csums-alg");
3560 if (IS_ERR(csums_tfm)) {
3561 csums_tfm = NULL;
3562 goto disconnect;
3563 }
3564 }
3565
Philipp Reisner813472c2011-05-03 16:47:02 +02003566 if (apv > 94 && new_disk_conf) {
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003567 new_disk_conf->c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
3568 new_disk_conf->c_delay_target = be32_to_cpu(p->c_delay_target);
3569 new_disk_conf->c_fill_target = be32_to_cpu(p->c_fill_target);
3570 new_disk_conf->c_max_rate = be32_to_cpu(p->c_max_rate);
Philipp Reisner778f2712010-07-06 11:14:00 +02003571
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003572 fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003573 if (fifo_size != device->rs_plan_s->size) {
Philipp Reisner813472c2011-05-03 16:47:02 +02003574 new_plan = fifo_alloc(fifo_size);
3575 if (!new_plan) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003576 drbd_err(device, "kmalloc of fifo_buffer failed");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003577 put_ldev(device);
Philipp Reisner778f2712010-07-06 11:14:00 +02003578 goto disconnect;
3579 }
3580 }
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003581 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003582
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003583 if (verify_tfm || csums_tfm) {
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003584 new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
3585 if (!new_net_conf) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003586 drbd_err(device, "Allocation of new net_conf failed\n");
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003587 goto disconnect;
3588 }
3589
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003590 *new_net_conf = *old_net_conf;
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003591
3592 if (verify_tfm) {
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003593 strcpy(new_net_conf->verify_alg, p->verify_alg);
3594 new_net_conf->verify_alg_len = strlen(p->verify_alg) + 1;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003595 crypto_free_hash(peer_device->connection->verify_tfm);
3596 peer_device->connection->verify_tfm = verify_tfm;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003597 drbd_info(device, "using verify-alg: \"%s\"\n", p->verify_alg);
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003598 }
3599 if (csums_tfm) {
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003600 strcpy(new_net_conf->csums_alg, p->csums_alg);
3601 new_net_conf->csums_alg_len = strlen(p->csums_alg) + 1;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003602 crypto_free_hash(peer_device->connection->csums_tfm);
3603 peer_device->connection->csums_tfm = csums_tfm;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003604 drbd_info(device, "using csums-alg: \"%s\"\n", p->csums_alg);
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003605 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003606 rcu_assign_pointer(connection->net_conf, new_net_conf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003607 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003608 }
3609
Philipp Reisner813472c2011-05-03 16:47:02 +02003610 if (new_disk_conf) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003611 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
3612 put_ldev(device);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003613 }
Philipp Reisner813472c2011-05-03 16:47:02 +02003614
3615 if (new_plan) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003616 old_plan = device->rs_plan_s;
3617 rcu_assign_pointer(device->rs_plan_s, new_plan);
Philipp Reisner813472c2011-05-03 16:47:02 +02003618 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003619
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003620 mutex_unlock(&connection->resource->conf_update);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003621 synchronize_rcu();
3622 if (new_net_conf)
3623 kfree(old_net_conf);
3624 kfree(old_disk_conf);
Philipp Reisner813472c2011-05-03 16:47:02 +02003625 kfree(old_plan);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003626
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003627 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003628
Philipp Reisner813472c2011-05-03 16:47:02 +02003629reconnect:
3630 if (new_disk_conf) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003631 put_ldev(device);
Philipp Reisner813472c2011-05-03 16:47:02 +02003632 kfree(new_disk_conf);
3633 }
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003634 mutex_unlock(&connection->resource->conf_update);
Philipp Reisner813472c2011-05-03 16:47:02 +02003635 return -EIO;
3636
Philipp Reisnerb411b362009-09-25 16:07:19 -07003637disconnect:
Philipp Reisner813472c2011-05-03 16:47:02 +02003638 kfree(new_plan);
3639 if (new_disk_conf) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003640 put_ldev(device);
Philipp Reisner813472c2011-05-03 16:47:02 +02003641 kfree(new_disk_conf);
3642 }
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003643 mutex_unlock(&connection->resource->conf_update);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003644 /* just for completeness: actually not needed,
3645 * as this is not reached if csums_tfm was ok. */
3646 crypto_free_hash(csums_tfm);
3647 /* but free the verify_tfm again, if csums_tfm did not work out */
3648 crypto_free_hash(verify_tfm);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003649 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003650 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003651}
3652
Philipp Reisnerb411b362009-09-25 16:07:19 -07003653/* warn if the arguments differ by more than 12.5% */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003654static void warn_if_differ_considerably(struct drbd_device *device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003655 const char *s, sector_t a, sector_t b)
3656{
3657 sector_t d;
3658 if (a == 0 || b == 0)
3659 return;
3660 d = (a > b) ? (a - b) : (b - a);
3661 if (d > (a>>3) || d > (b>>3))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003662 drbd_warn(device, "Considerable difference in %s: %llus vs. %llus\n", s,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003663 (unsigned long long)a, (unsigned long long)b);
3664}
3665
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003666static int receive_sizes(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003667{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003668 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003669 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003670 struct p_sizes *p = pi->data;
Philipp Reisnere96c9632013-06-25 16:50:07 +02003671 enum determine_dev_size dd = DS_UNCHANGED;
Lars Ellenberg6a8d68b2014-03-18 12:22:14 +01003672 sector_t p_size, p_usize, p_csize, my_usize;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003673 int ldsc = 0; /* local disk size changed */
Philipp Reisnere89b5912010-03-24 17:11:33 +01003674 enum dds_flags ddsf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003675
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003676 peer_device = conn_peer_device(connection, pi->vnr);
3677 if (!peer_device)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003678 return config_unknown_volume(connection, pi);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003679 device = peer_device->device;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003680
Philipp Reisnerb411b362009-09-25 16:07:19 -07003681 p_size = be64_to_cpu(p->d_size);
3682 p_usize = be64_to_cpu(p->u_size);
Lars Ellenberg6a8d68b2014-03-18 12:22:14 +01003683 p_csize = be64_to_cpu(p->c_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003684
Philipp Reisnerb411b362009-09-25 16:07:19 -07003685 /* just store the peer's disk size for now.
3686 * we still need to figure out whether we accept that. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003687 device->p_size = p_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003688
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003689 if (get_ldev(device)) {
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003690 rcu_read_lock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003691 my_usize = rcu_dereference(device->ldev->disk_conf)->disk_size;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003692 rcu_read_unlock();
3693
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003694 warn_if_differ_considerably(device, "lower level device sizes",
3695 p_size, drbd_get_max_capacity(device->ldev));
3696 warn_if_differ_considerably(device, "user requested size",
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003697 p_usize, my_usize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003698
3699 /* if this is the first connect, or an otherwise expected
3700 * param exchange, choose the minimum */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003701 if (device->state.conn == C_WF_REPORT_PARAMS)
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003702 p_usize = min_not_zero(my_usize, p_usize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003703
3704 /* Never shrink a device with usable data during connect.
3705 But allow online shrinking if we are connected. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003706 if (drbd_new_dev_size(device, device->ldev, p_usize, 0) <
3707 drbd_get_capacity(device->this_bdev) &&
3708 device->state.disk >= D_OUTDATED &&
3709 device->state.conn < C_CONNECTED) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003710 drbd_err(device, "The peer's disk size is too small!\n");
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003711 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003712 put_ldev(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003713 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003714 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003715
3716 if (my_usize != p_usize) {
3717 struct disk_conf *old_disk_conf, *new_disk_conf = NULL;
3718
3719 new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3720 if (!new_disk_conf) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003721 drbd_err(device, "Allocation of new disk_conf failed\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003722 put_ldev(device);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003723 return -ENOMEM;
3724 }
3725
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003726 mutex_lock(&connection->resource->conf_update);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003727 old_disk_conf = device->ldev->disk_conf;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003728 *new_disk_conf = *old_disk_conf;
3729 new_disk_conf->disk_size = p_usize;
3730
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003731 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003732 mutex_unlock(&connection->resource->conf_update);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003733 synchronize_rcu();
3734 kfree(old_disk_conf);
3735
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003736 drbd_info(device, "Peer sets u_size to %lu sectors\n",
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003737 (unsigned long)my_usize);
3738 }
3739
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003740 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003741 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003742
Lars Ellenberg20c68fd2014-04-28 18:43:25 +02003743 device->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
Lars Ellenberg20c68fd2014-04-28 18:43:25 +02003744 /* Leave drbd_reconsider_max_bio_size() before drbd_determine_dev_size().
3745 In case we cleared the QUEUE_FLAG_DISCARD from our queue in
3746 drbd_reconsider_max_bio_size(), we can be sure that after
3747 drbd_determine_dev_size() no REQ_DISCARDs are in the queue. */
3748
Philipp Reisnere89b5912010-03-24 17:11:33 +01003749 ddsf = be16_to_cpu(p->dds_flags);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003750 if (get_ldev(device)) {
Philipp Reisner8fe39aa2013-11-22 13:22:13 +01003751 drbd_reconsider_max_bio_size(device, device->ldev);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003752 dd = drbd_determine_dev_size(device, ddsf, NULL);
3753 put_ldev(device);
Philipp Reisnere96c9632013-06-25 16:50:07 +02003754 if (dd == DS_ERROR)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003755 return -EIO;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003756 drbd_md_sync(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003757 } else {
Lars Ellenberg6a8d68b2014-03-18 12:22:14 +01003758 /*
3759 * I am diskless, need to accept the peer's *current* size.
3760 * I must NOT accept the peers backing disk size,
3761 * it may have been larger than mine all along...
3762 *
3763 * At this point, the peer knows more about my disk, or at
3764 * least about what we last agreed upon, than myself.
3765 * So if his c_size is less than his d_size, the most likely
3766 * reason is that *my* d_size was smaller last time we checked.
3767 *
3768 * However, if he sends a zero current size,
3769 * take his (user-capped or) backing disk size anyways.
3770 */
Philipp Reisner8fe39aa2013-11-22 13:22:13 +01003771 drbd_reconsider_max_bio_size(device, NULL);
Lars Ellenberg6a8d68b2014-03-18 12:22:14 +01003772 drbd_set_my_capacity(device, p_csize ?: p_usize ?: p_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003773 }
3774
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003775 if (get_ldev(device)) {
3776 if (device->ldev->known_size != drbd_get_capacity(device->ldev->backing_bdev)) {
3777 device->ldev->known_size = drbd_get_capacity(device->ldev->backing_bdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003778 ldsc = 1;
3779 }
3780
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003781 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003782 }
3783
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003784 if (device->state.conn > C_WF_REPORT_PARAMS) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003785 if (be64_to_cpu(p->c_size) !=
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003786 drbd_get_capacity(device->this_bdev) || ldsc) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003787 /* we have different sizes, probably peer
3788 * needs to know my new size... */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003789 drbd_send_sizes(peer_device, 0, ddsf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003790 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003791 if (test_and_clear_bit(RESIZE_PENDING, &device->flags) ||
3792 (dd == DS_GREW && device->state.conn == C_CONNECTED)) {
3793 if (device->state.pdsk >= D_INCONSISTENT &&
3794 device->state.disk >= D_INCONSISTENT) {
Philipp Reisnere89b5912010-03-24 17:11:33 +01003795 if (ddsf & DDSF_NO_RESYNC)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003796 drbd_info(device, "Resync of new storage suppressed with --assume-clean\n");
Philipp Reisnere89b5912010-03-24 17:11:33 +01003797 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003798 resync_after_online_grow(device);
Philipp Reisnere89b5912010-03-24 17:11:33 +01003799 } else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003800 set_bit(RESYNC_AFTER_NEG, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003801 }
3802 }
3803
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003804 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003805}
3806
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003807static int receive_uuids(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003808{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003809 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003810 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003811 struct p_uuids *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003812 u64 *p_uuid;
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003813 int i, updated_uuids = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003814
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003815 peer_device = conn_peer_device(connection, pi->vnr);
3816 if (!peer_device)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003817 return config_unknown_volume(connection, pi);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003818 device = peer_device->device;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003819
Philipp Reisnerb411b362009-09-25 16:07:19 -07003820 p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO);
Jing Wang063eacf2012-10-25 15:00:56 +08003821 if (!p_uuid) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003822 drbd_err(device, "kmalloc of p_uuid failed\n");
Jing Wang063eacf2012-10-25 15:00:56 +08003823 return false;
3824 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003825
3826 for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
3827 p_uuid[i] = be64_to_cpu(p->uuid[i]);
3828
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003829 kfree(device->p_uuid);
3830 device->p_uuid = p_uuid;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003831
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003832 if (device->state.conn < C_CONNECTED &&
3833 device->state.disk < D_INCONSISTENT &&
3834 device->state.role == R_PRIMARY &&
3835 (device->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003836 drbd_err(device, "Can only connect to data with current UUID=%016llX\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003837 (unsigned long long)device->ed_uuid);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003838 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003839 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003840 }
3841
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003842 if (get_ldev(device)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003843 int skip_initial_sync =
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003844 device->state.conn == C_CONNECTED &&
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003845 peer_device->connection->agreed_pro_version >= 90 &&
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003846 device->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003847 (p_uuid[UI_FLAGS] & 8);
3848 if (skip_initial_sync) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003849 drbd_info(device, "Accepted new current UUID, preparing to skip initial sync\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003850 drbd_bitmap_io(device, &drbd_bmio_clear_n_write,
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003851 "clear_n_write from receive_uuids",
3852 BM_LOCKED_TEST_ALLOWED);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003853 _drbd_uuid_set(device, UI_CURRENT, p_uuid[UI_CURRENT]);
3854 _drbd_uuid_set(device, UI_BITMAP, 0);
3855 _drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
Philipp Reisnerb411b362009-09-25 16:07:19 -07003856 CS_VERBOSE, NULL);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003857 drbd_md_sync(device);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003858 updated_uuids = 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003859 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003860 put_ldev(device);
3861 } else if (device->state.disk < D_INCONSISTENT &&
3862 device->state.role == R_PRIMARY) {
Philipp Reisner18a50fa2010-06-21 14:14:15 +02003863 /* I am a diskless primary, the peer just created a new current UUID
3864 for me. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003865 updated_uuids = drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003866 }
3867
3868 /* Before we test for the disk state, we should wait until an eventually
3869 ongoing cluster wide state change is finished. That is important if
3870 we are primary and are detaching from our disk. We need to see the
3871 new disk state... */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003872 mutex_lock(device->state_mutex);
3873 mutex_unlock(device->state_mutex);
3874 if (device->state.conn >= C_CONNECTED && device->state.disk < D_INCONSISTENT)
3875 updated_uuids |= drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003876
3877 if (updated_uuids)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003878 drbd_print_uuids(device, "receiver updated UUIDs to");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003879
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003880 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003881}
3882
3883/**
3884 * convert_state() - Converts the peer's view of the cluster state to our point of view
3885 * @ps: The state as seen by the peer.
3886 */
3887static union drbd_state convert_state(union drbd_state ps)
3888{
3889 union drbd_state ms;
3890
3891 static enum drbd_conns c_tab[] = {
Philipp Reisner369bea62011-07-06 23:04:44 +02003892 [C_WF_REPORT_PARAMS] = C_WF_REPORT_PARAMS,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003893 [C_CONNECTED] = C_CONNECTED,
3894
3895 [C_STARTING_SYNC_S] = C_STARTING_SYNC_T,
3896 [C_STARTING_SYNC_T] = C_STARTING_SYNC_S,
3897 [C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */
3898 [C_VERIFY_S] = C_VERIFY_T,
3899 [C_MASK] = C_MASK,
3900 };
3901
3902 ms.i = ps.i;
3903
3904 ms.conn = c_tab[ps.conn];
3905 ms.peer = ps.role;
3906 ms.role = ps.peer;
3907 ms.pdsk = ps.disk;
3908 ms.disk = ps.pdsk;
3909 ms.peer_isp = (ps.aftr_isp | ps.user_isp);
3910
3911 return ms;
3912}
3913
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003914static int receive_req_state(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003915{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003916 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003917 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003918 struct p_req_state *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003919 union drbd_state mask, val;
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +01003920 enum drbd_state_rv rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003921
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003922 peer_device = conn_peer_device(connection, pi->vnr);
3923 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003924 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003925 device = peer_device->device;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003926
Philipp Reisnerb411b362009-09-25 16:07:19 -07003927 mask.i = be32_to_cpu(p->mask);
3928 val.i = be32_to_cpu(p->val);
3929
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003930 if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags) &&
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003931 mutex_is_locked(device->state_mutex)) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003932 drbd_send_sr_reply(peer_device, SS_CONCURRENT_ST_CHG);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003933 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003934 }
3935
3936 mask = convert_state(mask);
3937 val = convert_state(val);
3938
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003939 rv = drbd_change_state(device, CS_VERBOSE, mask, val);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003940 drbd_send_sr_reply(peer_device, rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003941
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003942 drbd_md_sync(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003943
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003944 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003945}
3946
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003947static int receive_req_conn_state(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003948{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003949 struct p_req_state *p = pi->data;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003950 union drbd_state mask, val;
3951 enum drbd_state_rv rv;
3952
3953 mask.i = be32_to_cpu(p->mask);
3954 val.i = be32_to_cpu(p->val);
3955
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003956 if (test_bit(RESOLVE_CONFLICTS, &connection->flags) &&
3957 mutex_is_locked(&connection->cstate_mutex)) {
3958 conn_send_sr_reply(connection, SS_CONCURRENT_ST_CHG);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003959 return 0;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003960 }
3961
3962 mask = convert_state(mask);
3963 val = convert_state(val);
3964
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003965 rv = conn_request_state(connection, mask, val, CS_VERBOSE | CS_LOCAL_ONLY | CS_IGN_OUTD_FAIL);
3966 conn_send_sr_reply(connection, rv);
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003967
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003968 return 0;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003969}
3970
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003971static int receive_state(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003972{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003973 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003974 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003975 struct p_state *p = pi->data;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003976 union drbd_state os, ns, peer_state;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003977 enum drbd_disk_state real_peer_disk;
Philipp Reisner65d922c2010-06-16 16:18:09 +02003978 enum chg_state_flags cs_flags;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003979 int rv;
3980
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003981 peer_device = conn_peer_device(connection, pi->vnr);
3982 if (!peer_device)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003983 return config_unknown_volume(connection, pi);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003984 device = peer_device->device;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003985
Philipp Reisnerb411b362009-09-25 16:07:19 -07003986 peer_state.i = be32_to_cpu(p->state);
3987
3988 real_peer_disk = peer_state.disk;
3989 if (peer_state.disk == D_NEGOTIATING) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003990 real_peer_disk = device->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003991 drbd_info(device, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003992 }
3993
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003994 spin_lock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003995 retry:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003996 os = ns = drbd_read_state(device);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003997 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003998
Lars Ellenberg545752d2011-12-05 14:39:25 +01003999 /* If some other part of the code (asender thread, timeout)
4000 * already decided to close the connection again,
4001 * we must not "re-establish" it here. */
4002 if (os.conn <= C_TEAR_DOWN)
Lars Ellenberg58ffa582012-07-26 14:09:49 +02004003 return -ECONNRESET;
Lars Ellenberg545752d2011-12-05 14:39:25 +01004004
Lars Ellenberg40424e42011-09-26 15:24:56 +02004005 /* If this is the "end of sync" confirmation, usually the peer disk
4006 * transitions from D_INCONSISTENT to D_UP_TO_DATE. For empty (0 bits
4007 * set) resync started in PausedSyncT, or if the timing of pause-/
4008 * unpause-sync events has been "just right", the peer disk may
4009 * transition from D_CONSISTENT to D_UP_TO_DATE as well.
4010 */
4011 if ((os.pdsk == D_INCONSISTENT || os.pdsk == D_CONSISTENT) &&
4012 real_peer_disk == D_UP_TO_DATE &&
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02004013 os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
4014 /* If we are (becoming) SyncSource, but peer is still in sync
4015 * preparation, ignore its uptodate-ness to avoid flapping, it
4016 * will change to inconsistent once the peer reaches active
4017 * syncing states.
4018 * It may have changed syncer-paused flags, however, so we
4019 * cannot ignore this completely. */
4020 if (peer_state.conn > C_CONNECTED &&
4021 peer_state.conn < C_SYNC_SOURCE)
4022 real_peer_disk = D_INCONSISTENT;
4023
4024 /* if peer_state changes to connected at the same time,
4025 * it explicitly notifies us that it finished resync.
4026 * Maybe we should finish it up, too? */
4027 else if (os.conn >= C_SYNC_SOURCE &&
4028 peer_state.conn == C_CONNECTED) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004029 if (drbd_bm_total_weight(device) <= device->rs_failed)
4030 drbd_resync_finished(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004031 return 0;
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02004032 }
4033 }
4034
Lars Ellenberg02b91b52012-06-28 18:26:52 +02004035 /* explicit verify finished notification, stop sector reached. */
4036 if (os.conn == C_VERIFY_T && os.disk == D_UP_TO_DATE &&
4037 peer_state.conn == C_CONNECTED && real_peer_disk == D_UP_TO_DATE) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004038 ov_out_of_sync_print(device);
4039 drbd_resync_finished(device);
Lars Ellenberg58ffa582012-07-26 14:09:49 +02004040 return 0;
Lars Ellenberg02b91b52012-06-28 18:26:52 +02004041 }
4042
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02004043 /* peer says his disk is inconsistent, while we think it is uptodate,
4044 * and this happens while the peer still thinks we have a sync going on,
4045 * but we think we are already done with the sync.
4046 * We ignore this to avoid flapping pdsk.
4047 * This should not happen, if the peer is a recent version of drbd. */
4048 if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT &&
4049 os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE)
4050 real_peer_disk = D_UP_TO_DATE;
4051
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004052 if (ns.conn == C_WF_REPORT_PARAMS)
4053 ns.conn = C_CONNECTED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004054
Philipp Reisner67531712010-10-27 12:21:30 +02004055 if (peer_state.conn == C_AHEAD)
4056 ns.conn = C_BEHIND;
4057
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004058 if (device->p_uuid && peer_state.disk >= D_NEGOTIATING &&
4059 get_ldev_if_state(device, D_NEGOTIATING)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004060 int cr; /* consider resync */
4061
4062 /* if we established a new connection */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004063 cr = (os.conn < C_CONNECTED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004064 /* if we had an established connection
4065 * and one of the nodes newly attaches a disk */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004066 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07004067 (peer_state.disk == D_NEGOTIATING ||
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004068 os.disk == D_NEGOTIATING));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004069 /* if we have both been inconsistent, and the peer has been
4070 * forced to be UpToDate with --overwrite-data */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004071 cr |= test_bit(CONSIDER_RESYNC, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004072 /* if we had been plain connected, and the admin requested to
4073 * start a sync by "invalidate" or "invalidate-remote" */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004074 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07004075 (peer_state.conn >= C_STARTING_SYNC_S &&
4076 peer_state.conn <= C_WF_BITMAP_T));
4077
4078 if (cr)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004079 ns.conn = drbd_sync_handshake(peer_device, peer_state.role, real_peer_disk);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004080
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004081 put_ldev(device);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004082 if (ns.conn == C_MASK) {
4083 ns.conn = C_CONNECTED;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004084 if (device->state.disk == D_NEGOTIATING) {
4085 drbd_force_state(device, NS(disk, D_FAILED));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004086 } else if (peer_state.disk == D_NEGOTIATING) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004087 drbd_err(device, "Disk attach process on the peer node was aborted.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004088 peer_state.disk = D_DISKLESS;
Lars Ellenberg580b9762010-02-26 23:15:23 +01004089 real_peer_disk = D_DISKLESS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004090 } else {
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004091 if (test_and_clear_bit(CONN_DRY_RUN, &peer_device->connection->flags))
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004092 return -EIO;
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02004093 D_ASSERT(device, os.conn == C_WF_REPORT_PARAMS);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004094 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004095 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004096 }
4097 }
4098 }
4099
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004100 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004101 if (os.i != drbd_read_state(device).i)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004102 goto retry;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004103 clear_bit(CONSIDER_RESYNC, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004104 ns.peer = peer_state.role;
4105 ns.pdsk = real_peer_disk;
4106 ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004107 if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004108 ns.disk = device->new_state_tmp.disk;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004109 cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004110 if (ns.pdsk == D_CONSISTENT && drbd_suspended(device) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
4111 test_bit(NEW_CUR_UUID, &device->flags)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004112 /* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this
Philipp Reisner481c6f52010-06-22 14:03:27 +02004113 for temporal network outages! */
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004114 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004115 drbd_err(device, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004116 tl_clear(peer_device->connection);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004117 drbd_uuid_new_current(device);
4118 clear_bit(NEW_CUR_UUID, &device->flags);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004119 conn_request_state(peer_device->connection, NS2(conn, C_PROTOCOL_ERROR, susp, 0), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004120 return -EIO;
Philipp Reisner481c6f52010-06-22 14:03:27 +02004121 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004122 rv = _drbd_set_state(device, ns, cs_flags, NULL);
4123 ns = drbd_read_state(device);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004124 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004125
4126 if (rv < SS_SUCCESS) {
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004127 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004128 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004129 }
4130
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004131 if (os.conn > C_WF_REPORT_PARAMS) {
4132 if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07004133 peer_state.disk != D_NEGOTIATING ) {
4134 /* we want resync, peer has not yet decided to sync... */
4135 /* Nowadays only used when forcing a node into primary role and
4136 setting its disk to UpToDate with that */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004137 drbd_send_uuids(peer_device);
4138 drbd_send_current_state(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004139 }
4140 }
4141
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004142 clear_bit(DISCARD_MY_DATA, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004143
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004144 drbd_md_sync(device); /* update connected indicator, la_size_sect, ... */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004145
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004146 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004147}
4148
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004149static int receive_sync_uuid(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004150{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004151 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004152 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004153 struct p_rs_uuid *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004154
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004155 peer_device = conn_peer_device(connection, pi->vnr);
4156 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004157 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004158 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004159
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004160 wait_event(device->misc_wait,
4161 device->state.conn == C_WF_SYNC_UUID ||
4162 device->state.conn == C_BEHIND ||
4163 device->state.conn < C_CONNECTED ||
4164 device->state.disk < D_NEGOTIATING);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004165
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02004166 /* D_ASSERT(device, device->state.conn == C_WF_SYNC_UUID ); */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004167
Philipp Reisnerb411b362009-09-25 16:07:19 -07004168 /* Here the _drbd_uuid_ functions are right, current should
4169 _not_ be rotated into the history */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004170 if (get_ldev_if_state(device, D_NEGOTIATING)) {
4171 _drbd_uuid_set(device, UI_CURRENT, be64_to_cpu(p->uuid));
4172 _drbd_uuid_set(device, UI_BITMAP, 0UL);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004173
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004174 drbd_print_uuids(device, "updated sync uuid");
4175 drbd_start_resync(device, C_SYNC_TARGET);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004176
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004177 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004178 } else
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004179 drbd_err(device, "Ignoring SyncUUID packet!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004180
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004181 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004182}
4183
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004184/**
4185 * receive_bitmap_plain
4186 *
4187 * Return 0 when done, 1 when another iteration is needed, and a negative error
4188 * code upon failure.
4189 */
4190static int
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004191receive_bitmap_plain(struct drbd_peer_device *peer_device, unsigned int size,
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004192 unsigned long *p, struct bm_xfer_ctx *c)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004193{
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004194 unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE -
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004195 drbd_header_size(peer_device->connection);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004196 unsigned int num_words = min_t(size_t, data_size / sizeof(*p),
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004197 c->bm_words - c->word_offset);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004198 unsigned int want = num_words * sizeof(*p);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004199 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004200
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004201 if (want != size) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004202 drbd_err(peer_device, "%s:want (%u) != size (%u)\n", __func__, want, size);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004203 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004204 }
4205 if (want == 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004206 return 0;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004207 err = drbd_recv_all(peer_device->connection, p, want);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004208 if (err)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004209 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004210
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004211 drbd_bm_merge_lel(peer_device->device, c->word_offset, num_words, p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004212
4213 c->word_offset += num_words;
4214 c->bit_offset = c->word_offset * BITS_PER_LONG;
4215 if (c->bit_offset > c->bm_bits)
4216 c->bit_offset = c->bm_bits;
4217
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004218 return 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004219}
4220
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004221static enum drbd_bitmap_code dcbp_get_code(struct p_compressed_bm *p)
4222{
4223 return (enum drbd_bitmap_code)(p->encoding & 0x0f);
4224}
4225
4226static int dcbp_get_start(struct p_compressed_bm *p)
4227{
4228 return (p->encoding & 0x80) != 0;
4229}
4230
4231static int dcbp_get_pad_bits(struct p_compressed_bm *p)
4232{
4233 return (p->encoding >> 4) & 0x7;
4234}
4235
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004236/**
4237 * recv_bm_rle_bits
4238 *
4239 * Return 0 when done, 1 when another iteration is needed, and a negative error
4240 * code upon failure.
4241 */
4242static int
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004243recv_bm_rle_bits(struct drbd_peer_device *peer_device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07004244 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01004245 struct bm_xfer_ctx *c,
4246 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004247{
4248 struct bitstream bs;
4249 u64 look_ahead;
4250 u64 rl;
4251 u64 tmp;
4252 unsigned long s = c->bit_offset;
4253 unsigned long e;
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004254 int toggle = dcbp_get_start(p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004255 int have;
4256 int bits;
4257
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004258 bitstream_init(&bs, p->code, len, dcbp_get_pad_bits(p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004259
4260 bits = bitstream_get_bits(&bs, &look_ahead, 64);
4261 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004262 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004263
4264 for (have = bits; have > 0; s += rl, toggle = !toggle) {
4265 bits = vli_decode_bits(&rl, look_ahead);
4266 if (bits <= 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004267 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004268
4269 if (toggle) {
4270 e = s + rl -1;
4271 if (e >= c->bm_bits) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004272 drbd_err(peer_device, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004273 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004274 }
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004275 _drbd_bm_set_bits(peer_device->device, s, e);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004276 }
4277
4278 if (have < bits) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004279 drbd_err(peer_device, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07004280 have, bits, look_ahead,
4281 (unsigned int)(bs.cur.b - p->code),
4282 (unsigned int)bs.buf_len);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004283 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004284 }
Lars Ellenbergd2da5b02013-10-23 10:59:18 +02004285 /* if we consumed all 64 bits, assign 0; >> 64 is "undefined"; */
4286 if (likely(bits < 64))
4287 look_ahead >>= bits;
4288 else
4289 look_ahead = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004290 have -= bits;
4291
4292 bits = bitstream_get_bits(&bs, &tmp, 64 - have);
4293 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004294 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004295 look_ahead |= tmp << have;
4296 have += bits;
4297 }
4298
4299 c->bit_offset = s;
4300 bm_xfer_ctx_bit_to_word_offset(c);
4301
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004302 return (s != c->bm_bits);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004303}
4304
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004305/**
4306 * decode_bitmap_c
4307 *
4308 * Return 0 when done, 1 when another iteration is needed, and a negative error
4309 * code upon failure.
4310 */
4311static int
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004312decode_bitmap_c(struct drbd_peer_device *peer_device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07004313 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01004314 struct bm_xfer_ctx *c,
4315 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004316{
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004317 if (dcbp_get_code(p) == RLE_VLI_Bits)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004318 return recv_bm_rle_bits(peer_device, p, c, len - sizeof(*p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004319
4320 /* other variants had been implemented for evaluation,
4321 * but have been dropped as this one turned out to be "best"
4322 * during all our tests. */
4323
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004324 drbd_err(peer_device, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
4325 conn_request_state(peer_device->connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004326 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004327}
4328
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004329void INFO_bm_xfer_stats(struct drbd_device *device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07004330 const char *direction, struct bm_xfer_ctx *c)
4331{
4332 /* what would it take to transfer it "plaintext" */
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02004333 unsigned int header_size = drbd_header_size(first_peer_device(device)->connection);
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004334 unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE - header_size;
4335 unsigned int plain =
4336 header_size * (DIV_ROUND_UP(c->bm_words, data_size) + 1) +
4337 c->bm_words * sizeof(unsigned long);
4338 unsigned int total = c->bytes[0] + c->bytes[1];
4339 unsigned int r;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004340
4341 /* total can not be zero. but just in case: */
4342 if (total == 0)
4343 return;
4344
4345 /* don't report if not compressed */
4346 if (total >= plain)
4347 return;
4348
4349 /* total < plain. check for overflow, still */
4350 r = (total > UINT_MAX/1000) ? (total / (plain/1000))
4351 : (1000 * total / plain);
4352
4353 if (r > 1000)
4354 r = 1000;
4355
4356 r = 1000 - r;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004357 drbd_info(device, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
Philipp Reisnerb411b362009-09-25 16:07:19 -07004358 "total %u; compression: %u.%u%%\n",
4359 direction,
4360 c->bytes[1], c->packets[1],
4361 c->bytes[0], c->packets[0],
4362 total, r/10, r % 10);
4363}
4364
4365/* Since we are processing the bitfield from lower addresses to higher,
4366 it does not matter if the process it in 32 bit chunks or 64 bit
4367 chunks as long as it is little endian. (Understand it as byte stream,
4368 beginning with the lowest byte...) If we would use big endian
4369 we would need to process it from the highest address to the lowest,
4370 in order to be agnostic to the 32 vs 64 bits issue.
4371
4372 returns 0 on failure, 1 if we successfully received it. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004373static int receive_bitmap(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004374{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004375 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004376 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004377 struct bm_xfer_ctx c;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004378 int err;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004379
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004380 peer_device = conn_peer_device(connection, pi->vnr);
4381 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004382 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004383 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004384
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004385 drbd_bm_lock(device, "receive bitmap", BM_LOCKED_SET_ALLOWED);
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004386 /* you are supposed to send additional out-of-sync information
4387 * if you actually set bits during this phase */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004388
Philipp Reisnerb411b362009-09-25 16:07:19 -07004389 c = (struct bm_xfer_ctx) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004390 .bm_bits = drbd_bm_bits(device),
4391 .bm_words = drbd_bm_words(device),
Philipp Reisnerb411b362009-09-25 16:07:19 -07004392 };
4393
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004394 for(;;) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004395 if (pi->cmd == P_BITMAP)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004396 err = receive_bitmap_plain(peer_device, pi->size, pi->data, &c);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004397 else if (pi->cmd == P_COMPRESSED_BITMAP) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004398 /* MAYBE: sanity check that we speak proto >= 90,
4399 * and the feature is enabled! */
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004400 struct p_compressed_bm *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004401
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004402 if (pi->size > DRBD_SOCKET_BUFFER_SIZE - drbd_header_size(connection)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004403 drbd_err(device, "ReportCBitmap packet too large\n");
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004404 err = -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004405 goto out;
4406 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004407 if (pi->size <= sizeof(*p)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004408 drbd_err(device, "ReportCBitmap packet too small (l:%u)\n", pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004409 err = -EIO;
Andreas Gruenbacher78fcbda2010-12-10 22:18:27 +01004410 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004411 }
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004412 err = drbd_recv_all(peer_device->connection, p, pi->size);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004413 if (err)
4414 goto out;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004415 err = decode_bitmap_c(peer_device, p, &c, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004416 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004417 drbd_warn(device, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", pi->cmd);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004418 err = -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004419 goto out;
4420 }
4421
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004422 c.packets[pi->cmd == P_BITMAP]++;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004423 c.bytes[pi->cmd == P_BITMAP] += drbd_header_size(connection) + pi->size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004424
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004425 if (err <= 0) {
4426 if (err < 0)
4427 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004428 break;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004429 }
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004430 err = drbd_recv_header(peer_device->connection, pi);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004431 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004432 goto out;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004433 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004434
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004435 INFO_bm_xfer_stats(device, "receive", &c);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004436
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004437 if (device->state.conn == C_WF_BITMAP_T) {
Andreas Gruenbacherde1f8e42010-12-10 21:04:00 +01004438 enum drbd_state_rv rv;
4439
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004440 err = drbd_send_bitmap(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004441 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004442 goto out;
4443 /* Omit CS_ORDERED with this state transition to avoid deadlocks. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004444 rv = _drbd_request_state(device, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02004445 D_ASSERT(device, rv == SS_SUCCESS);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004446 } else if (device->state.conn != C_WF_BITMAP_S) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004447 /* admin may have requested C_DISCONNECTING,
4448 * other threads may have noticed network errors */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004449 drbd_info(device, "unexpected cstate (%s) in receive_bitmap\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004450 drbd_conn_str(device->state.conn));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004451 }
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004452 err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004453
Philipp Reisnerb411b362009-09-25 16:07:19 -07004454 out:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004455 drbd_bm_unlock(device);
4456 if (!err && device->state.conn == C_WF_BITMAP_S)
4457 drbd_start_resync(device, C_SYNC_SOURCE);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004458 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004459}
4460
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004461static int receive_skip(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004462{
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004463 drbd_warn(connection, "skipping unknown optional packet type %d, l: %d!\n",
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004464 pi->cmd, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004465
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004466 return ignore_remaining_packet(connection, pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004467}
4468
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004469static int receive_UnplugRemote(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004470{
Philipp Reisnerb411b362009-09-25 16:07:19 -07004471 /* Make sure we've acked all the TCP data associated
4472 * with the data requests being unplugged */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004473 drbd_tcp_quickack(connection->data.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004474
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004475 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004476}
4477
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004478static int receive_out_of_sync(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisner73a01a12010-10-27 14:33:00 +02004479{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004480 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004481 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004482 struct p_block_desc *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004483
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004484 peer_device = conn_peer_device(connection, pi->vnr);
4485 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004486 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004487 device = peer_device->device;
Philipp Reisner73a01a12010-10-27 14:33:00 +02004488
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004489 switch (device->state.conn) {
Lars Ellenbergf735e3632010-12-17 21:06:18 +01004490 case C_WF_SYNC_UUID:
4491 case C_WF_BITMAP_T:
4492 case C_BEHIND:
4493 break;
4494 default:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004495 drbd_err(device, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004496 drbd_conn_str(device->state.conn));
Lars Ellenbergf735e3632010-12-17 21:06:18 +01004497 }
4498
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004499 drbd_set_out_of_sync(device, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
Philipp Reisner73a01a12010-10-27 14:33:00 +02004500
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004501 return 0;
Philipp Reisner73a01a12010-10-27 14:33:00 +02004502}
4503
Philipp Reisner02918be2010-08-20 14:35:10 +02004504struct data_cmd {
4505 int expect_payload;
4506 size_t pkt_size;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004507 int (*fn)(struct drbd_connection *, struct packet_info *);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004508};
4509
Philipp Reisner02918be2010-08-20 14:35:10 +02004510static struct data_cmd drbd_cmd_handler[] = {
4511 [P_DATA] = { 1, sizeof(struct p_data), receive_Data },
4512 [P_DATA_REPLY] = { 1, sizeof(struct p_data), receive_DataReply },
4513 [P_RS_DATA_REPLY] = { 1, sizeof(struct p_data), receive_RSDataReply } ,
4514 [P_BARRIER] = { 0, sizeof(struct p_barrier), receive_Barrier } ,
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004515 [P_BITMAP] = { 1, 0, receive_bitmap } ,
4516 [P_COMPRESSED_BITMAP] = { 1, 0, receive_bitmap } ,
4517 [P_UNPLUG_REMOTE] = { 0, 0, receive_UnplugRemote },
Philipp Reisner02918be2010-08-20 14:35:10 +02004518 [P_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
4519 [P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004520 [P_SYNC_PARAM] = { 1, 0, receive_SyncParam },
4521 [P_SYNC_PARAM89] = { 1, 0, receive_SyncParam },
Philipp Reisner02918be2010-08-20 14:35:10 +02004522 [P_PROTOCOL] = { 1, sizeof(struct p_protocol), receive_protocol },
4523 [P_UUIDS] = { 0, sizeof(struct p_uuids), receive_uuids },
4524 [P_SIZES] = { 0, sizeof(struct p_sizes), receive_sizes },
4525 [P_STATE] = { 0, sizeof(struct p_state), receive_state },
4526 [P_STATE_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_state },
4527 [P_SYNC_UUID] = { 0, sizeof(struct p_rs_uuid), receive_sync_uuid },
4528 [P_OV_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
4529 [P_OV_REPLY] = { 1, sizeof(struct p_block_req), receive_DataRequest },
4530 [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest },
4531 [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip },
Philipp Reisner73a01a12010-10-27 14:33:00 +02004532 [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004533 [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_conn_state },
Philipp Reisner036b17e2011-05-16 17:38:11 +02004534 [P_PROTOCOL_UPDATE] = { 1, sizeof(struct p_protocol), receive_protocol },
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02004535 [P_TRIM] = { 0, sizeof(struct p_trim), receive_Data },
Philipp Reisner02918be2010-08-20 14:35:10 +02004536};
4537
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004538static void drbdd(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004539{
Philipp Reisner77351055b2011-02-07 17:24:26 +01004540 struct packet_info pi;
Philipp Reisner02918be2010-08-20 14:35:10 +02004541 size_t shs; /* sub header size */
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004542 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004543
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004544 while (get_t_state(&connection->receiver) == RUNNING) {
Andreas Gruenbacherdeebe192011-03-25 00:01:04 +01004545 struct data_cmd *cmd;
4546
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004547 drbd_thread_current_set_cpu(&connection->receiver);
4548 if (drbd_recv_header(connection, &pi))
Philipp Reisner02918be2010-08-20 14:35:10 +02004549 goto err_out;
4550
Andreas Gruenbacherdeebe192011-03-25 00:01:04 +01004551 cmd = &drbd_cmd_handler[pi.cmd];
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004552 if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) || !cmd->fn)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004553 drbd_err(connection, "Unexpected data packet %s (0x%04x)",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004554 cmdname(pi.cmd), pi.cmd);
Philipp Reisner02918be2010-08-20 14:35:10 +02004555 goto err_out;
Lars Ellenberg0b33a912009-11-16 15:58:04 +01004556 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004557
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004558 shs = cmd->pkt_size;
4559 if (pi.size > shs && !cmd->expect_payload) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004560 drbd_err(connection, "No payload expected %s l:%d\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004561 cmdname(pi.cmd), pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02004562 goto err_out;
4563 }
4564
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02004565 if (shs) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004566 err = drbd_recv_all_warn(connection, pi.data, shs);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004567 if (err)
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02004568 goto err_out;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004569 pi.size -= shs;
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02004570 }
4571
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004572 err = cmd->fn(connection, &pi);
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004573 if (err) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004574 drbd_err(connection, "error receiving %s, e: %d l: %d!\n",
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004575 cmdname(pi.cmd), err, pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02004576 goto err_out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004577 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004578 }
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004579 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004580
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004581 err_out:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004582 conn_request_state(connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004583}
4584
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004585static void conn_disconnect(struct drbd_connection *connection)
Philipp Reisnerf70b35112010-06-24 14:34:40 +02004586{
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02004587 struct drbd_peer_device *peer_device;
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004588 enum drbd_conns oc;
Philipp Reisner376694a2011-11-07 10:54:28 +01004589 int vnr;
Philipp Reisnerf70b35112010-06-24 14:34:40 +02004590
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004591 if (connection->cstate == C_STANDALONE)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004592 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004593
Lars Ellenberg545752d2011-12-05 14:39:25 +01004594 /* We are about to start the cleanup after connection loss.
4595 * Make sure drbd_make_request knows about that.
4596 * Usually we should be in some network failure state already,
4597 * but just in case we are not, we fix it up here.
4598 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004599 conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
Lars Ellenberg545752d2011-12-05 14:39:25 +01004600
Philipp Reisnerb411b362009-09-25 16:07:19 -07004601 /* asender does not clean up anything. it must not interfere, either */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004602 drbd_thread_stop(&connection->asender);
4603 drbd_free_sock(connection);
Philipp Reisner360cc742011-02-08 14:29:53 +01004604
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02004605 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02004606 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
4607 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004608 kref_get(&device->kref);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02004609 rcu_read_unlock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004610 drbd_disconnected(peer_device);
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02004611 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02004612 rcu_read_lock();
4613 }
4614 rcu_read_unlock();
4615
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004616 if (!list_empty(&connection->current_epoch->list))
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004617 drbd_err(connection, "ASSERTION FAILED: connection->current_epoch->list not empty\n");
Philipp Reisner12038a32011-11-09 19:18:00 +01004618 /* ok, no more ee's on the fly, it is safe to reset the epoch_size */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004619 atomic_set(&connection->current_epoch->epoch_size, 0);
4620 connection->send.seen_any_write_yet = false;
Philipp Reisner12038a32011-11-09 19:18:00 +01004621
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004622 drbd_info(connection, "Connection closed\n");
Philipp Reisner360cc742011-02-08 14:29:53 +01004623
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004624 if (conn_highest_role(connection) == R_PRIMARY && conn_highest_pdsk(connection) >= D_UNKNOWN)
4625 conn_try_outdate_peer_async(connection);
Philipp Reisnercb703452011-03-24 11:03:07 +01004626
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004627 spin_lock_irq(&connection->resource->req_lock);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004628 oc = connection->cstate;
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004629 if (oc >= C_UNCONNECTED)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004630 _conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE);
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004631
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004632 spin_unlock_irq(&connection->resource->req_lock);
Philipp Reisner360cc742011-02-08 14:29:53 +01004633
Lars Ellenbergf3dfa402011-05-02 10:45:05 +02004634 if (oc == C_DISCONNECTING)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004635 conn_request_state(connection, NS(conn, C_STANDALONE), CS_VERBOSE | CS_HARD);
Philipp Reisner360cc742011-02-08 14:29:53 +01004636}
4637
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004638static int drbd_disconnected(struct drbd_peer_device *peer_device)
Philipp Reisner360cc742011-02-08 14:29:53 +01004639{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004640 struct drbd_device *device = peer_device->device;
Philipp Reisner360cc742011-02-08 14:29:53 +01004641 unsigned int i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004642
Philipp Reisner85719572010-07-21 10:20:17 +02004643 /* wait for current activity to cease. */
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004644 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004645 _drbd_wait_ee_list_empty(device, &device->active_ee);
4646 _drbd_wait_ee_list_empty(device, &device->sync_ee);
4647 _drbd_wait_ee_list_empty(device, &device->read_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004648 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004649
4650 /* We do not have data structures that would allow us to
4651 * get the rs_pending_cnt down to 0 again.
4652 * * On C_SYNC_TARGET we do not have any data structures describing
4653 * the pending RSDataRequest's we have sent.
4654 * * On C_SYNC_SOURCE there is no data structure that tracks
4655 * the P_RS_DATA_REPLY blocks that we sent to the SyncTarget.
4656 * And no, it is not the sum of the reference counts in the
4657 * resync_LRU. The resync_LRU tracks the whole operation including
4658 * the disk-IO, while the rs_pending_cnt only tracks the blocks
4659 * on the fly. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004660 drbd_rs_cancel_all(device);
4661 device->rs_total = 0;
4662 device->rs_failed = 0;
4663 atomic_set(&device->rs_pending_cnt, 0);
4664 wake_up(&device->misc_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004665
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004666 del_timer_sync(&device->resync_timer);
4667 resync_timer_fn((unsigned long)device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004668
Philipp Reisnerb411b362009-09-25 16:07:19 -07004669 /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
4670 * w_make_resync_request etc. which may still be on the worker queue
4671 * to be "canceled" */
Andreas Gruenbacherb5043c52011-07-28 15:56:02 +02004672 drbd_flush_workqueue(&peer_device->connection->sender_work);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004673
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004674 drbd_finish_peer_reqs(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004675
Philipp Reisnerd10b4ea2011-11-30 23:25:36 +01004676 /* This second workqueue flush is necessary, since drbd_finish_peer_reqs()
4677 might have issued a work again. The one before drbd_finish_peer_reqs() is
4678 necessary to reclain net_ee in drbd_finish_peer_reqs(). */
Andreas Gruenbacherb5043c52011-07-28 15:56:02 +02004679 drbd_flush_workqueue(&peer_device->connection->sender_work);
Philipp Reisnerd10b4ea2011-11-30 23:25:36 +01004680
Lars Ellenberg08332d72012-08-17 15:09:13 +02004681 /* need to do it again, drbd_finish_peer_reqs() may have populated it
4682 * again via drbd_try_clear_on_disk_bm(). */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004683 drbd_rs_cancel_all(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004684
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004685 kfree(device->p_uuid);
4686 device->p_uuid = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004687
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004688 if (!drbd_suspended(device))
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004689 tl_clear(peer_device->connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004690
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004691 drbd_md_sync(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004692
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004693 /* serialize with bitmap writeout triggered by the state change,
4694 * if any. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004695 wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004696
Philipp Reisnerb411b362009-09-25 16:07:19 -07004697 /* tcp_close and release of sendpage pages can be deferred. I don't
4698 * want to use SO_LINGER, because apparently it can be deferred for
4699 * more than 20 seconds (longest time I checked).
4700 *
4701 * Actually we don't care for exactly when the network stack does its
4702 * put_page(), but release our reference on these pages right here.
4703 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004704 i = drbd_free_peer_reqs(device, &device->net_ee);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004705 if (i)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004706 drbd_info(device, "net_ee not empty, killed %u entries\n", i);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004707 i = atomic_read(&device->pp_in_use_by_net);
Lars Ellenberg435f0742010-09-06 12:30:25 +02004708 if (i)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004709 drbd_info(device, "pp_in_use_by_net = %d, expected 0\n", i);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004710 i = atomic_read(&device->pp_in_use);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004711 if (i)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004712 drbd_info(device, "pp_in_use = %d, expected 0\n", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004713
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02004714 D_ASSERT(device, list_empty(&device->read_ee));
4715 D_ASSERT(device, list_empty(&device->active_ee));
4716 D_ASSERT(device, list_empty(&device->sync_ee));
4717 D_ASSERT(device, list_empty(&device->done_ee));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004718
Philipp Reisner360cc742011-02-08 14:29:53 +01004719 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004720}
4721
4722/*
4723 * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version
4724 * we can agree on is stored in agreed_pro_version.
4725 *
4726 * feature flags and the reserved array should be enough room for future
4727 * enhancements of the handshake protocol, and possible plugins...
4728 *
4729 * for now, they are expected to be zero, but ignored.
4730 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004731static int drbd_send_features(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004732{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004733 struct drbd_socket *sock;
4734 struct p_connection_features *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004735
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004736 sock = &connection->data;
4737 p = conn_prepare_command(connection, sock);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004738 if (!p)
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004739 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004740 memset(p, 0, sizeof(*p));
4741 p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
4742 p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
Lars Ellenberg20c68fd2014-04-28 18:43:25 +02004743 p->feature_flags = cpu_to_be32(PRO_FEATURES);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004744 return conn_send_command(connection, sock, P_CONNECTION_FEATURES, sizeof(*p), NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004745}
4746
4747/*
4748 * return values:
4749 * 1 yes, we have a valid connection
4750 * 0 oops, did not work out, please try again
4751 * -1 peer talks different language,
4752 * no point in trying again, please go standalone.
4753 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004754static int drbd_do_features(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004755{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004756 /* ASSERT current == connection->receiver ... */
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004757 struct p_connection_features *p;
4758 const int expect = sizeof(struct p_connection_features);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004759 struct packet_info pi;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004760 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004761
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004762 err = drbd_send_features(connection);
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004763 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004764 return 0;
4765
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004766 err = drbd_recv_header(connection, &pi);
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004767 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004768 return 0;
4769
Andreas Gruenbacher60381782011-03-28 17:05:50 +02004770 if (pi.cmd != P_CONNECTION_FEATURES) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004771 drbd_err(connection, "expected ConnectionFeatures packet, received: %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004772 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004773 return -1;
4774 }
4775
Philipp Reisner77351055b2011-02-07 17:24:26 +01004776 if (pi.size != expect) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004777 drbd_err(connection, "expected ConnectionFeatures length: %u, received: %u\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004778 expect, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004779 return -1;
4780 }
4781
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004782 p = pi.data;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004783 err = drbd_recv_all_warn(connection, p, expect);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004784 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004785 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004786
Philipp Reisnerb411b362009-09-25 16:07:19 -07004787 p->protocol_min = be32_to_cpu(p->protocol_min);
4788 p->protocol_max = be32_to_cpu(p->protocol_max);
4789 if (p->protocol_max == 0)
4790 p->protocol_max = p->protocol_min;
4791
4792 if (PRO_VERSION_MAX < p->protocol_min ||
4793 PRO_VERSION_MIN > p->protocol_max)
4794 goto incompat;
4795
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004796 connection->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
Lars Ellenberg20c68fd2014-04-28 18:43:25 +02004797 connection->agreed_features = PRO_FEATURES & be32_to_cpu(p->feature_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004798
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004799 drbd_info(connection, "Handshake successful: "
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004800 "Agreed network protocol version %d\n", connection->agreed_pro_version);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004801
Lars Ellenberg20c68fd2014-04-28 18:43:25 +02004802 drbd_info(connection, "Agreed to%ssupport TRIM on protocol level\n",
4803 connection->agreed_features & FF_TRIM ? " " : " not ");
4804
Philipp Reisnerb411b362009-09-25 16:07:19 -07004805 return 1;
4806
4807 incompat:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004808 drbd_err(connection, "incompatible DRBD dialects: "
Philipp Reisnerb411b362009-09-25 16:07:19 -07004809 "I support %d-%d, peer supports %d-%d\n",
4810 PRO_VERSION_MIN, PRO_VERSION_MAX,
4811 p->protocol_min, p->protocol_max);
4812 return -1;
4813}
4814
4815#if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004816static int drbd_do_auth(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004817{
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004818 drbd_err(connection, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
4819 drbd_err(connection, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004820 return -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004821}
4822#else
4823#define CHALLENGE_LEN 64
Johannes Thomab10d96c2010-01-07 16:02:50 +01004824
4825/* Return value:
4826 1 - auth succeeded,
4827 0 - failed, try again (network error),
4828 -1 - auth failed, don't try again.
4829*/
4830
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004831static int drbd_do_auth(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004832{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004833 struct drbd_socket *sock;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004834 char my_challenge[CHALLENGE_LEN]; /* 64 Bytes... */
4835 struct scatterlist sg;
4836 char *response = NULL;
4837 char *right_response = NULL;
4838 char *peers_ch = NULL;
Philipp Reisner44ed1672011-04-19 17:10:19 +02004839 unsigned int key_len;
4840 char secret[SHARED_SECRET_MAX]; /* 64 byte */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004841 unsigned int resp_size;
4842 struct hash_desc desc;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004843 struct packet_info pi;
Philipp Reisner44ed1672011-04-19 17:10:19 +02004844 struct net_conf *nc;
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004845 int err, rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004846
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004847 /* FIXME: Put the challenge/response into the preallocated socket buffer. */
4848
Philipp Reisner44ed1672011-04-19 17:10:19 +02004849 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004850 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +02004851 key_len = strlen(nc->shared_secret);
4852 memcpy(secret, nc->shared_secret, key_len);
4853 rcu_read_unlock();
4854
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004855 desc.tfm = connection->cram_hmac_tfm;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004856 desc.flags = 0;
4857
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004858 rv = crypto_hash_setkey(connection->cram_hmac_tfm, (u8 *)secret, key_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004859 if (rv) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004860 drbd_err(connection, "crypto_hash_setkey() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004861 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004862 goto fail;
4863 }
4864
4865 get_random_bytes(my_challenge, CHALLENGE_LEN);
4866
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004867 sock = &connection->data;
4868 if (!conn_prepare_command(connection, sock)) {
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004869 rv = 0;
4870 goto fail;
4871 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004872 rv = !conn_send_command(connection, sock, P_AUTH_CHALLENGE, 0,
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004873 my_challenge, CHALLENGE_LEN);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004874 if (!rv)
4875 goto fail;
4876
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004877 err = drbd_recv_header(connection, &pi);
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004878 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004879 rv = 0;
4880 goto fail;
4881 }
4882
Philipp Reisner77351055b2011-02-07 17:24:26 +01004883 if (pi.cmd != P_AUTH_CHALLENGE) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004884 drbd_err(connection, "expected AuthChallenge packet, received: %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004885 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004886 rv = 0;
4887 goto fail;
4888 }
4889
Philipp Reisner77351055b2011-02-07 17:24:26 +01004890 if (pi.size > CHALLENGE_LEN * 2) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004891 drbd_err(connection, "expected AuthChallenge payload too big.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004892 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004893 goto fail;
4894 }
4895
Philipp Reisner67cca282014-04-28 18:43:30 +02004896 if (pi.size < CHALLENGE_LEN) {
4897 drbd_err(connection, "AuthChallenge payload too small.\n");
4898 rv = -1;
4899 goto fail;
4900 }
4901
Philipp Reisner77351055b2011-02-07 17:24:26 +01004902 peers_ch = kmalloc(pi.size, GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004903 if (peers_ch == NULL) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004904 drbd_err(connection, "kmalloc of peers_ch failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004905 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004906 goto fail;
4907 }
4908
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004909 err = drbd_recv_all_warn(connection, peers_ch, pi.size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004910 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004911 rv = 0;
4912 goto fail;
4913 }
4914
Philipp Reisner67cca282014-04-28 18:43:30 +02004915 if (!memcmp(my_challenge, peers_ch, CHALLENGE_LEN)) {
4916 drbd_err(connection, "Peer presented the same challenge!\n");
4917 rv = -1;
4918 goto fail;
4919 }
4920
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004921 resp_size = crypto_hash_digestsize(connection->cram_hmac_tfm);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004922 response = kmalloc(resp_size, GFP_NOIO);
4923 if (response == NULL) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004924 drbd_err(connection, "kmalloc of response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004925 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004926 goto fail;
4927 }
4928
4929 sg_init_table(&sg, 1);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004930 sg_set_buf(&sg, peers_ch, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004931
4932 rv = crypto_hash_digest(&desc, &sg, sg.length, response);
4933 if (rv) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004934 drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004935 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004936 goto fail;
4937 }
4938
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004939 if (!conn_prepare_command(connection, sock)) {
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004940 rv = 0;
4941 goto fail;
4942 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004943 rv = !conn_send_command(connection, sock, P_AUTH_RESPONSE, 0,
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004944 response, resp_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004945 if (!rv)
4946 goto fail;
4947
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004948 err = drbd_recv_header(connection, &pi);
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004949 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004950 rv = 0;
4951 goto fail;
4952 }
4953
Philipp Reisner77351055b2011-02-07 17:24:26 +01004954 if (pi.cmd != P_AUTH_RESPONSE) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004955 drbd_err(connection, "expected AuthResponse packet, received: %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004956 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004957 rv = 0;
4958 goto fail;
4959 }
4960
Philipp Reisner77351055b2011-02-07 17:24:26 +01004961 if (pi.size != resp_size) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004962 drbd_err(connection, "expected AuthResponse payload of wrong size\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004963 rv = 0;
4964 goto fail;
4965 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004966
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004967 err = drbd_recv_all_warn(connection, response , resp_size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004968 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004969 rv = 0;
4970 goto fail;
4971 }
4972
4973 right_response = kmalloc(resp_size, GFP_NOIO);
Julia Lawall2d1ee872009-12-27 22:27:11 +01004974 if (right_response == NULL) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004975 drbd_err(connection, "kmalloc of right_response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004976 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004977 goto fail;
4978 }
4979
4980 sg_set_buf(&sg, my_challenge, CHALLENGE_LEN);
4981
4982 rv = crypto_hash_digest(&desc, &sg, sg.length, right_response);
4983 if (rv) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004984 drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004985 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004986 goto fail;
4987 }
4988
4989 rv = !memcmp(response, right_response, resp_size);
4990
4991 if (rv)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004992 drbd_info(connection, "Peer authenticated using %d bytes HMAC\n",
Philipp Reisner44ed1672011-04-19 17:10:19 +02004993 resp_size);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004994 else
4995 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004996
4997 fail:
4998 kfree(peers_ch);
4999 kfree(response);
5000 kfree(right_response);
5001
5002 return rv;
5003}
5004#endif
5005
Andreas Gruenbacher8fe60552011-07-22 11:04:36 +02005006int drbd_receiver(struct drbd_thread *thi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005007{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005008 struct drbd_connection *connection = thi->connection;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005009 int h;
5010
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005011 drbd_info(connection, "receiver (re)started\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005012
5013 do {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005014 h = conn_connect(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005015 if (h == 0) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005016 conn_disconnect(connection);
Philipp Reisner20ee6392011-01-18 15:28:59 +01005017 schedule_timeout_interruptible(HZ);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005018 }
5019 if (h == -1) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005020 drbd_warn(connection, "Discarding network configuration.\n");
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005021 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005022 }
5023 } while (h == 0);
5024
Philipp Reisner91fd4da2011-04-20 17:47:29 +02005025 if (h > 0)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005026 drbdd(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005027
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005028 conn_disconnect(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005029
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005030 drbd_info(connection, "receiver terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005031 return 0;
5032}
5033
5034/* ********* acknowledge sender ******** */
5035
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005036static int got_conn_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005037{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005038 struct p_req_state_reply *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005039 int retcode = be32_to_cpu(p->retcode);
5040
5041 if (retcode >= SS_SUCCESS) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005042 set_bit(CONN_WD_ST_CHG_OKAY, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005043 } else {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005044 set_bit(CONN_WD_ST_CHG_FAIL, &connection->flags);
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005045 drbd_err(connection, "Requested state change failed by peer: %s (%d)\n",
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01005046 drbd_set_st_err_str(retcode), retcode);
5047 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005048 wake_up(&connection->ping_wait);
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01005049
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005050 return 0;
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01005051}
5052
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005053static int got_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005054{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005055 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005056 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005057 struct p_req_state_reply *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005058 int retcode = be32_to_cpu(p->retcode);
5059
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005060 peer_device = conn_peer_device(connection, pi->vnr);
5061 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005062 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005063 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005064
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005065 if (test_bit(CONN_WD_ST_CHG_REQ, &connection->flags)) {
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02005066 D_ASSERT(device, connection->agreed_pro_version < 100);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005067 return got_conn_RqSReply(connection, pi);
Philipp Reisner4d0fc3f2012-01-20 13:52:27 +01005068 }
5069
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01005070 if (retcode >= SS_SUCCESS) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005071 set_bit(CL_ST_CHG_SUCCESS, &device->flags);
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01005072 } else {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005073 set_bit(CL_ST_CHG_FAIL, &device->flags);
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02005074 drbd_err(device, "Requested state change failed by peer: %s (%d)\n",
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01005075 drbd_set_st_err_str(retcode), retcode);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005076 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005077 wake_up(&device->state_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005078
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005079 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005080}
5081
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005082static int got_Ping(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005083{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005084 return drbd_send_ping_ack(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005085
5086}
5087
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005088static int got_PingAck(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005089{
5090 /* restore idle timeout */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005091 connection->meta.socket->sk->sk_rcvtimeo = connection->net_conf->ping_int*HZ;
5092 if (!test_and_set_bit(GOT_PING_ACK, &connection->flags))
5093 wake_up(&connection->ping_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005094
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005095 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005096}
5097
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005098static int got_IsInSync(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005099{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005100 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005101 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005102 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005103 sector_t sector = be64_to_cpu(p->sector);
5104 int blksize = be32_to_cpu(p->blksize);
5105
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005106 peer_device = conn_peer_device(connection, pi->vnr);
5107 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005108 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005109 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005110
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005111 D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005112
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005113 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005114
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005115 if (get_ldev(device)) {
5116 drbd_rs_complete_io(device, sector);
5117 drbd_set_in_sync(device, sector, blksize);
Lars Ellenberg1d53f092010-09-05 01:13:24 +02005118 /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005119 device->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
5120 put_ldev(device);
Lars Ellenberg1d53f092010-09-05 01:13:24 +02005121 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005122 dec_rs_pending(device);
5123 atomic_add(blksize >> 9, &device->rs_sect_in);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005124
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005125 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005126}
5127
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01005128static int
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005129validate_req_change_req_state(struct drbd_device *device, u64 id, sector_t sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01005130 struct rb_root *root, const char *func,
5131 enum drbd_req_event what, bool missing_ok)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005132{
5133 struct drbd_request *req;
5134 struct bio_and_error m;
5135
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005136 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005137 req = find_request(device, root, id, sector, missing_ok, func);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005138 if (unlikely(!req)) {
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005139 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005140 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005141 }
5142 __req_mod(req, what, &m);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005143 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005144
5145 if (m.bio)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005146 complete_master_bio(device, &m);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005147 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005148}
5149
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005150static int got_BlockAck(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005151{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005152 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005153 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005154 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005155 sector_t sector = be64_to_cpu(p->sector);
5156 int blksize = be32_to_cpu(p->blksize);
5157 enum drbd_req_event what;
5158
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005159 peer_device = conn_peer_device(connection, pi->vnr);
5160 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005161 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005162 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005163
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005164 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005165
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01005166 if (p->block_id == ID_SYNCER) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005167 drbd_set_in_sync(device, sector, blksize);
5168 dec_rs_pending(device);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005169 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005170 }
Andreas Gruenbachere05e1e52011-03-25 15:16:26 +01005171 switch (pi->cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07005172 case P_RS_WRITE_ACK:
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01005173 what = WRITE_ACKED_BY_PEER_AND_SIS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005174 break;
5175 case P_WRITE_ACK:
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01005176 what = WRITE_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005177 break;
5178 case P_RECV_ACK:
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01005179 what = RECV_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005180 break;
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02005181 case P_SUPERSEDED:
5182 what = CONFLICT_RESOLVED;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01005183 break;
5184 case P_RETRY_WRITE:
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01005185 what = POSTPONE_WRITE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005186 break;
5187 default:
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005188 BUG();
Philipp Reisnerb411b362009-09-25 16:07:19 -07005189 }
5190
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005191 return validate_req_change_req_state(device, p->block_id, sector,
5192 &device->write_requests, __func__,
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005193 what, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005194}
5195
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005196static int got_NegAck(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005197{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005198 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005199 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005200 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005201 sector_t sector = be64_to_cpu(p->sector);
Philipp Reisner2deb8332011-01-17 18:39:18 +01005202 int size = be32_to_cpu(p->blksize);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005203 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005204
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005205 peer_device = conn_peer_device(connection, pi->vnr);
5206 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005207 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005208 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005209
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005210 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005211
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01005212 if (p->block_id == ID_SYNCER) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005213 dec_rs_pending(device);
5214 drbd_rs_failed_io(device, sector, size);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005215 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005216 }
Philipp Reisner2deb8332011-01-17 18:39:18 +01005217
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005218 err = validate_req_change_req_state(device, p->block_id, sector,
5219 &device->write_requests, __func__,
Philipp Reisner303d1442011-04-13 16:24:47 -07005220 NEG_ACKED, true);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005221 if (err) {
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01005222 /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs.
5223 The master bio might already be completed, therefore the
5224 request is no longer in the collision hash. */
5225 /* In Protocol B we might already have got a P_RECV_ACK
5226 but then get a P_NEG_ACK afterwards. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005227 drbd_set_out_of_sync(device, sector, size);
Philipp Reisner2deb8332011-01-17 18:39:18 +01005228 }
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005229 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005230}
5231
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005232static int got_NegDReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005233{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005234 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005235 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005236 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005237 sector_t sector = be64_to_cpu(p->sector);
5238
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005239 peer_device = conn_peer_device(connection, pi->vnr);
5240 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005241 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005242 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005243
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005244 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01005245
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02005246 drbd_err(device, "Got NegDReply; Sector %llus, len %u.\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07005247 (unsigned long long)sector, be32_to_cpu(p->blksize));
5248
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005249 return validate_req_change_req_state(device, p->block_id, sector,
5250 &device->read_requests, __func__,
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005251 NEG_ACKED, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005252}
5253
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005254static int got_NegRSDReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005255{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005256 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005257 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005258 sector_t sector;
5259 int size;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005260 struct p_block_ack *p = pi->data;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005261
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005262 peer_device = conn_peer_device(connection, pi->vnr);
5263 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005264 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005265 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005266
5267 sector = be64_to_cpu(p->sector);
5268 size = be32_to_cpu(p->blksize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005269
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005270 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005271
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005272 dec_rs_pending(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005273
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005274 if (get_ldev_if_state(device, D_FAILED)) {
5275 drbd_rs_complete_io(device, sector);
Andreas Gruenbachere05e1e52011-03-25 15:16:26 +01005276 switch (pi->cmd) {
Philipp Reisnerd612d302010-12-27 10:53:28 +01005277 case P_NEG_RS_DREPLY:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005278 drbd_rs_failed_io(device, sector, size);
Philipp Reisnerd612d302010-12-27 10:53:28 +01005279 case P_RS_CANCEL:
5280 break;
5281 default:
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005282 BUG();
Philipp Reisnerd612d302010-12-27 10:53:28 +01005283 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005284 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005285 }
5286
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005287 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005288}
5289
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005290static int got_BarrierAck(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005291{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005292 struct p_barrier_ack *p = pi->data;
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005293 struct drbd_peer_device *peer_device;
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005294 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005295
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005296 tl_release(connection, p->barrier, be32_to_cpu(p->set_size));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005297
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005298 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005299 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5300 struct drbd_device *device = peer_device->device;
5301
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005302 if (device->state.conn == C_AHEAD &&
5303 atomic_read(&device->ap_in_flight) == 0 &&
5304 !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &device->flags)) {
5305 device->start_resync_timer.expires = jiffies + HZ;
5306 add_timer(&device->start_resync_timer);
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005307 }
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02005308 }
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005309 rcu_read_unlock();
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02005310
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005311 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005312}
5313
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005314static int got_OVResult(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005315{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005316 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005317 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005318 struct p_block_ack *p = pi->data;
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02005319 struct drbd_device_work *dw;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005320 sector_t sector;
5321 int size;
5322
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005323 peer_device = conn_peer_device(connection, pi->vnr);
5324 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005325 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005326 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005327
Philipp Reisnerb411b362009-09-25 16:07:19 -07005328 sector = be64_to_cpu(p->sector);
5329 size = be32_to_cpu(p->blksize);
5330
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005331 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005332
5333 if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005334 drbd_ov_out_of_sync_found(device, sector, size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005335 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005336 ov_out_of_sync_print(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005337
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005338 if (!get_ldev(device))
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005339 return 0;
Lars Ellenberg1d53f092010-09-05 01:13:24 +02005340
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005341 drbd_rs_complete_io(device, sector);
5342 dec_rs_pending(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005343
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005344 --device->ov_left;
Lars Ellenbergea5442a2010-11-05 09:48:01 +01005345
5346 /* let's advance progress step marks only for every other megabyte */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005347 if ((device->ov_left & 0x200) == 0x200)
5348 drbd_advance_rs_marks(device, device->ov_left);
Lars Ellenbergea5442a2010-11-05 09:48:01 +01005349
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005350 if (device->ov_left == 0) {
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02005351 dw = kmalloc(sizeof(*dw), GFP_NOIO);
5352 if (dw) {
5353 dw->w.cb = w_ov_finished;
5354 dw->device = device;
5355 drbd_queue_work(&peer_device->connection->sender_work, &dw->w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005356 } else {
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02005357 drbd_err(device, "kmalloc(dw) failed.");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005358 ov_out_of_sync_print(device);
5359 drbd_resync_finished(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005360 }
5361 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005362 put_ldev(device);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005363 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005364}
5365
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005366static int got_skip(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisner0ced55a2010-04-30 15:26:20 +02005367{
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005368 return 0;
Philipp Reisner0ced55a2010-04-30 15:26:20 +02005369}
5370
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005371static int connection_finish_peer_reqs(struct drbd_connection *connection)
Philipp Reisner32862ec2011-02-08 16:41:01 +01005372{
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005373 struct drbd_peer_device *peer_device;
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005374 int vnr, not_empty = 0;
Philipp Reisner32862ec2011-02-08 16:41:01 +01005375
5376 do {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005377 clear_bit(SIGNAL_ASENDER, &connection->flags);
Philipp Reisner32862ec2011-02-08 16:41:01 +01005378 flush_signals(current);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005379
5380 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005381 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5382 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005383 kref_get(&device->kref);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005384 rcu_read_unlock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005385 if (drbd_finish_peer_reqs(device)) {
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02005386 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005387 return 1;
Philipp Reisnerd3fcb492011-04-13 14:46:05 -07005388 }
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02005389 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005390 rcu_read_lock();
Philipp Reisner082a3432011-03-15 16:05:42 +01005391 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005392 set_bit(SIGNAL_ASENDER, &connection->flags);
Philipp Reisner082a3432011-03-15 16:05:42 +01005393
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005394 spin_lock_irq(&connection->resource->req_lock);
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005395 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5396 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005397 not_empty = !list_empty(&device->done_ee);
Philipp Reisner082a3432011-03-15 16:05:42 +01005398 if (not_empty)
5399 break;
5400 }
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005401 spin_unlock_irq(&connection->resource->req_lock);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005402 rcu_read_unlock();
Philipp Reisner32862ec2011-02-08 16:41:01 +01005403 } while (not_empty);
5404
5405 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005406}
5407
5408struct asender_cmd {
5409 size_t pkt_size;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005410 int (*fn)(struct drbd_connection *connection, struct packet_info *);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005411};
5412
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01005413static struct asender_cmd asender_tbl[] = {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005414 [P_PING] = { 0, got_Ping },
5415 [P_PING_ACK] = { 0, got_PingAck },
Philipp Reisnerb411b362009-09-25 16:07:19 -07005416 [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
5417 [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
5418 [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02005419 [P_SUPERSEDED] = { sizeof(struct p_block_ack), got_BlockAck },
Philipp Reisnerb411b362009-09-25 16:07:19 -07005420 [P_NEG_ACK] = { sizeof(struct p_block_ack), got_NegAck },
5421 [P_NEG_DREPLY] = { sizeof(struct p_block_ack), got_NegDReply },
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005422 [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), got_NegRSDReply },
Philipp Reisnerb411b362009-09-25 16:07:19 -07005423 [P_OV_RESULT] = { sizeof(struct p_block_ack), got_OVResult },
5424 [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck },
5425 [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
5426 [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync },
Philipp Reisner02918be2010-08-20 14:35:10 +02005427 [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip },
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005428 [P_RS_CANCEL] = { sizeof(struct p_block_ack), got_NegRSDReply },
5429 [P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), got_conn_RqSReply },
5430 [P_RETRY_WRITE] = { sizeof(struct p_block_ack), got_BlockAck },
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01005431};
Philipp Reisnerb411b362009-09-25 16:07:19 -07005432
5433int drbd_asender(struct drbd_thread *thi)
5434{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005435 struct drbd_connection *connection = thi->connection;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005436 struct asender_cmd *cmd = NULL;
Philipp Reisner77351055b2011-02-07 17:24:26 +01005437 struct packet_info pi;
Philipp Reisner257d0af2011-01-26 12:15:29 +01005438 int rv;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005439 void *buf = connection->meta.rbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005440 int received = 0;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005441 unsigned int header_size = drbd_header_size(connection);
Andreas Gruenbacher52b061a2011-03-30 11:38:49 +02005442 int expect = header_size;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005443 bool ping_timeout_active = false;
5444 struct net_conf *nc;
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005445 int ping_timeo, tcp_cork, ping_int;
Philipp Reisner3990e042013-03-27 14:08:48 +01005446 struct sched_param param = { .sched_priority = 2 };
Philipp Reisnerb411b362009-09-25 16:07:19 -07005447
Philipp Reisner3990e042013-03-27 14:08:48 +01005448 rv = sched_setscheduler(current, SCHED_RR, &param);
5449 if (rv < 0)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005450 drbd_err(connection, "drbd_asender: ERROR set priority, ret=%d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005451
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +01005452 while (get_t_state(thi) == RUNNING) {
Philipp Reisner80822282011-02-08 12:46:30 +01005453 drbd_thread_current_set_cpu(thi);
Philipp Reisner44ed1672011-04-19 17:10:19 +02005454
5455 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005456 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +02005457 ping_timeo = nc->ping_timeo;
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005458 tcp_cork = nc->tcp_cork;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005459 ping_int = nc->ping_int;
5460 rcu_read_unlock();
5461
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005462 if (test_and_clear_bit(SEND_PING, &connection->flags)) {
5463 if (drbd_send_ping(connection)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005464 drbd_err(connection, "drbd_send_ping has failed\n");
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01005465 goto reconnect;
5466 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005467 connection->meta.socket->sk->sk_rcvtimeo = ping_timeo * HZ / 10;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005468 ping_timeout_active = true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005469 }
5470
Philipp Reisner32862ec2011-02-08 16:41:01 +01005471 /* TODO: conditionally cork; it may hurt latency if we cork without
5472 much to send */
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005473 if (tcp_cork)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005474 drbd_tcp_cork(connection->meta.socket);
5475 if (connection_finish_peer_reqs(connection)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005476 drbd_err(connection, "connection_finish_peer_reqs() failed\n");
Philipp Reisner32862ec2011-02-08 16:41:01 +01005477 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005478 }
5479 /* but unconditionally uncork unless disabled */
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005480 if (tcp_cork)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005481 drbd_tcp_uncork(connection->meta.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005482
5483 /* short circuit, recv_msg would return EINTR anyways. */
5484 if (signal_pending(current))
5485 continue;
5486
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005487 rv = drbd_recv_short(connection->meta.socket, buf, expect-received, 0);
5488 clear_bit(SIGNAL_ASENDER, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005489
5490 flush_signals(current);
5491
5492 /* Note:
5493 * -EINTR (on meta) we got a signal
5494 * -EAGAIN (on meta) rcvtimeo expired
5495 * -ECONNRESET other side closed the connection
5496 * -ERESTARTSYS (on data) we got a signal
5497 * rv < 0 other than above: unexpected error!
5498 * rv == expected: full header or command
5499 * rv < expected: "woken" by signal during receive
5500 * rv == 0 : "connection shut down by peer"
5501 */
5502 if (likely(rv > 0)) {
5503 received += rv;
5504 buf += rv;
5505 } else if (rv == 0) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005506 if (test_bit(DISCONNECT_SENT, &connection->flags)) {
Philipp Reisnerb66623e2012-08-08 21:19:09 +02005507 long t;
5508 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005509 t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
Philipp Reisnerb66623e2012-08-08 21:19:09 +02005510 rcu_read_unlock();
5511
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005512 t = wait_event_timeout(connection->ping_wait,
5513 connection->cstate < C_WF_REPORT_PARAMS,
Philipp Reisnerb66623e2012-08-08 21:19:09 +02005514 t);
Philipp Reisner599377a2012-08-17 14:50:22 +02005515 if (t)
5516 break;
5517 }
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005518 drbd_err(connection, "meta connection shut down by peer.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005519 goto reconnect;
5520 } else if (rv == -EAGAIN) {
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02005521 /* If the data socket received something meanwhile,
5522 * that is good enough: peer is still alive. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005523 if (time_after(connection->last_received,
5524 jiffies - connection->meta.socket->sk->sk_rcvtimeo))
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02005525 continue;
Lars Ellenbergf36af182011-03-09 22:44:55 +01005526 if (ping_timeout_active) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005527 drbd_err(connection, "PingAck did not arrive in time.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005528 goto reconnect;
5529 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005530 set_bit(SEND_PING, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005531 continue;
5532 } else if (rv == -EINTR) {
5533 continue;
5534 } else {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005535 drbd_err(connection, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005536 goto reconnect;
5537 }
5538
5539 if (received == expect && cmd == NULL) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005540 if (decode_header(connection, connection->meta.rbuf, &pi))
Philipp Reisnerb411b362009-09-25 16:07:19 -07005541 goto reconnect;
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01005542 cmd = &asender_tbl[pi.cmd];
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005543 if (pi.cmd >= ARRAY_SIZE(asender_tbl) || !cmd->fn) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005544 drbd_err(connection, "Unexpected meta packet %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02005545 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005546 goto disconnect;
5547 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005548 expect = header_size + cmd->pkt_size;
Andreas Gruenbacher52b061a2011-03-30 11:38:49 +02005549 if (pi.size != expect - header_size) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005550 drbd_err(connection, "Wrong packet size on meta (c: %d, l: %d)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01005551 pi.cmd, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005552 goto reconnect;
Philipp Reisner257d0af2011-01-26 12:15:29 +01005553 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005554 }
5555 if (received == expect) {
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005556 bool err;
Philipp Reisnera4fbda82011-03-16 11:13:17 +01005557
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005558 err = cmd->fn(connection, &pi);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005559 if (err) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005560 drbd_err(connection, "%pf failed\n", cmd->fn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005561 goto reconnect;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005562 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005563
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005564 connection->last_received = jiffies;
Lars Ellenbergf36af182011-03-09 22:44:55 +01005565
Philipp Reisner44ed1672011-04-19 17:10:19 +02005566 if (cmd == &asender_tbl[P_PING_ACK]) {
5567 /* restore idle timeout */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005568 connection->meta.socket->sk->sk_rcvtimeo = ping_int * HZ;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005569 ping_timeout_active = false;
5570 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005571
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005572 buf = connection->meta.rbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005573 received = 0;
Andreas Gruenbacher52b061a2011-03-30 11:38:49 +02005574 expect = header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005575 cmd = NULL;
5576 }
5577 }
5578
5579 if (0) {
5580reconnect:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005581 conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
5582 conn_md_sync(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005583 }
5584 if (0) {
5585disconnect:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005586 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005587 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005588 clear_bit(SIGNAL_ASENDER, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005589
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005590 drbd_info(connection, "asender terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005591
5592 return 0;
5593}