blob: 125c9e89388fa9faab4e58a04434674bf47408b3 [file] [log] [blame]
Philipp Reisnerb411b362009-09-25 16:07:19 -07001/*
2 drbd_receiver.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23 */
24
25
Philipp Reisnerb411b362009-09-25 16:07:19 -070026#include <linux/module.h>
27
28#include <asm/uaccess.h>
29#include <net/sock.h>
30
Philipp Reisnerb411b362009-09-25 16:07:19 -070031#include <linux/drbd.h>
32#include <linux/fs.h>
33#include <linux/file.h>
34#include <linux/in.h>
35#include <linux/mm.h>
36#include <linux/memcontrol.h>
37#include <linux/mm_inline.h>
38#include <linux/slab.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070039#include <linux/pkt_sched.h>
40#define __KERNEL_SYSCALLS__
41#include <linux/unistd.h>
42#include <linux/vmalloc.h>
43#include <linux/random.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070044#include <linux/string.h>
45#include <linux/scatterlist.h>
46#include "drbd_int.h"
Andreas Gruenbachera3603a62011-05-30 11:47:37 +020047#include "drbd_protocol.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070048#include "drbd_req.h"
49
50#include "drbd_vli.h"
51
Philipp Reisner77351055b2011-02-07 17:24:26 +010052struct packet_info {
53 enum drbd_packet cmd;
Andreas Gruenbachere2857212011-03-25 00:57:38 +010054 unsigned int size;
55 unsigned int vnr;
Andreas Gruenbachere6589832011-03-30 12:54:42 +020056 void *data;
Philipp Reisner77351055b2011-02-07 17:24:26 +010057};
58
Philipp Reisnerb411b362009-09-25 16:07:19 -070059enum finish_epoch {
60 FE_STILL_LIVE,
61 FE_DESTROYED,
62 FE_RECYCLED,
63};
64
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +020065static int drbd_do_features(struct drbd_connection *connection);
66static int drbd_do_auth(struct drbd_connection *connection);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +020067static int drbd_disconnected(struct drbd_peer_device *);
Philipp Reisnerb411b362009-09-25 16:07:19 -070068
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +020069static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *, struct drbd_epoch *, enum epoch_event);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +010070static int e_end_block(struct drbd_work *, int);
Philipp Reisnerb411b362009-09-25 16:07:19 -070071
Philipp Reisnerb411b362009-09-25 16:07:19 -070072
73#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
74
Lars Ellenberg45bb9122010-05-14 17:10:48 +020075/*
76 * some helper functions to deal with single linked page lists,
77 * page->private being our "next" pointer.
78 */
79
80/* If at least n pages are linked at head, get n pages off.
81 * Otherwise, don't modify head, and return NULL.
82 * Locking is the responsibility of the caller.
83 */
84static struct page *page_chain_del(struct page **head, int n)
85{
86 struct page *page;
87 struct page *tmp;
88
89 BUG_ON(!n);
90 BUG_ON(!head);
91
92 page = *head;
Philipp Reisner23ce4222010-05-20 13:35:31 +020093
94 if (!page)
95 return NULL;
96
Lars Ellenberg45bb9122010-05-14 17:10:48 +020097 while (page) {
98 tmp = page_chain_next(page);
99 if (--n == 0)
100 break; /* found sufficient pages */
101 if (tmp == NULL)
102 /* insufficient pages, don't use any of them. */
103 return NULL;
104 page = tmp;
105 }
106
107 /* add end of list marker for the returned list */
108 set_page_private(page, 0);
109 /* actual return value, and adjustment of head */
110 page = *head;
111 *head = tmp;
112 return page;
113}
114
115/* may be used outside of locks to find the tail of a (usually short)
116 * "private" page chain, before adding it back to a global chain head
117 * with page_chain_add() under a spinlock. */
118static struct page *page_chain_tail(struct page *page, int *len)
119{
120 struct page *tmp;
121 int i = 1;
122 while ((tmp = page_chain_next(page)))
123 ++i, page = tmp;
124 if (len)
125 *len = i;
126 return page;
127}
128
129static int page_chain_free(struct page *page)
130{
131 struct page *tmp;
132 int i = 0;
133 page_chain_for_each_safe(page, tmp) {
134 put_page(page);
135 ++i;
136 }
137 return i;
138}
139
140static void page_chain_add(struct page **head,
141 struct page *chain_first, struct page *chain_last)
142{
143#if 1
144 struct page *tmp;
145 tmp = page_chain_tail(chain_first, NULL);
146 BUG_ON(tmp != chain_last);
147#endif
148
149 /* add chain to head */
150 set_page_private(chain_last, (unsigned long)*head);
151 *head = chain_first;
152}
153
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200154static struct page *__drbd_alloc_pages(struct drbd_device *device,
Andreas Gruenbacher18c2d522011-04-07 21:08:50 +0200155 unsigned int number)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700156{
157 struct page *page = NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200158 struct page *tmp = NULL;
Andreas Gruenbacher18c2d522011-04-07 21:08:50 +0200159 unsigned int i = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700160
161 /* Yes, testing drbd_pp_vacant outside the lock is racy.
162 * So what. It saves a spin_lock. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200163 if (drbd_pp_vacant >= number) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700164 spin_lock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200165 page = page_chain_del(&drbd_pp_pool, number);
166 if (page)
167 drbd_pp_vacant -= number;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700168 spin_unlock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200169 if (page)
170 return page;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700171 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200172
Philipp Reisnerb411b362009-09-25 16:07:19 -0700173 /* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD
174 * "criss-cross" setup, that might cause write-out on some other DRBD,
175 * which in turn might block on the other node at this very place. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200176 for (i = 0; i < number; i++) {
177 tmp = alloc_page(GFP_TRY);
178 if (!tmp)
179 break;
180 set_page_private(tmp, (unsigned long)page);
181 page = tmp;
182 }
183
184 if (i == number)
185 return page;
186
187 /* Not enough pages immediately available this time.
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200188 * No need to jump around here, drbd_alloc_pages will retry this
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200189 * function "soon". */
190 if (page) {
191 tmp = page_chain_tail(page, NULL);
192 spin_lock(&drbd_pp_lock);
193 page_chain_add(&drbd_pp_pool, page, tmp);
194 drbd_pp_vacant += i;
195 spin_unlock(&drbd_pp_lock);
196 }
197 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700198}
199
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200200static void reclaim_finished_net_peer_reqs(struct drbd_device *device,
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200201 struct list_head *to_be_freed)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700202{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200203 struct drbd_peer_request *peer_req, *tmp;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700204
205 /* The EEs are always appended to the end of the list. Since
206 they are sent in order over the wire, they have to finish
207 in order. As soon as we see the first not finished we can
208 stop to examine the list... */
209
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200210 list_for_each_entry_safe(peer_req, tmp, &device->net_ee, w.list) {
Andreas Gruenbacher045417f2011-04-07 21:34:24 +0200211 if (drbd_peer_req_has_active_page(peer_req))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700212 break;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200213 list_move(&peer_req->w.list, to_be_freed);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700214 }
215}
216
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200217static void drbd_kick_lo_and_reclaim_net(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700218{
219 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100220 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700221
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200222 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200223 reclaim_finished_net_peer_reqs(device, &reclaimed);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200224 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700225
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200226 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200227 drbd_free_net_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700228}
229
230/**
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200231 * drbd_alloc_pages() - Returns @number pages, retries forever (or until signalled)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200232 * @device: DRBD device.
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200233 * @number: number of pages requested
234 * @retry: whether to retry, if not enough pages are available right now
Philipp Reisnerb411b362009-09-25 16:07:19 -0700235 *
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200236 * Tries to allocate number pages, first from our own page pool, then from
237 * the kernel, unless this allocation would exceed the max_buffers setting.
238 * Possibly retry until DRBD frees sufficient pages somewhere else.
239 *
240 * Returns a page chain linked via page->private.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700241 */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200242struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int number,
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200243 bool retry)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700244{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200245 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700246 struct page *page = NULL;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200247 struct net_conf *nc;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700248 DEFINE_WAIT(wait);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200249 int mxb;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700250
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200251 /* Yes, we may run up to @number over max_buffers. If we
252 * follow it strictly, the admin will get it wrong anyways. */
Philipp Reisner44ed1672011-04-19 17:10:19 +0200253 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200254 nc = rcu_dereference(peer_device->connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200255 mxb = nc ? nc->max_buffers : 1000000;
256 rcu_read_unlock();
257
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200258 if (atomic_read(&device->pp_in_use) < mxb)
259 page = __drbd_alloc_pages(device, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700260
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200261 while (page == NULL) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700262 prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
263
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200264 drbd_kick_lo_and_reclaim_net(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700265
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200266 if (atomic_read(&device->pp_in_use) < mxb) {
267 page = __drbd_alloc_pages(device, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700268 if (page)
269 break;
270 }
271
272 if (!retry)
273 break;
274
275 if (signal_pending(current)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200276 drbd_warn(device, "drbd_alloc_pages interrupted!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700277 break;
278 }
279
280 schedule();
281 }
282 finish_wait(&drbd_pp_wait, &wait);
283
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200284 if (page)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200285 atomic_add(number, &device->pp_in_use);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700286 return page;
287}
288
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200289/* Must not be used from irq, as that may deadlock: see drbd_alloc_pages.
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200290 * Is also used from inside an other spin_lock_irq(&resource->req_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200291 * Either links the page chain back to the global pool,
292 * or returns all pages to the system. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200293static void drbd_free_pages(struct drbd_device *device, struct page *page, int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700294{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200295 atomic_t *a = is_net ? &device->pp_in_use_by_net : &device->pp_in_use;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700296 int i;
Lars Ellenberg435f0742010-09-06 12:30:25 +0200297
Lars Ellenberga73ff322012-06-25 19:15:38 +0200298 if (page == NULL)
299 return;
300
Philipp Reisner81a5d602011-02-22 19:53:16 -0500301 if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200302 i = page_chain_free(page);
303 else {
304 struct page *tmp;
305 tmp = page_chain_tail(page, &i);
306 spin_lock(&drbd_pp_lock);
307 page_chain_add(&drbd_pp_pool, page, tmp);
308 drbd_pp_vacant += i;
309 spin_unlock(&drbd_pp_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700310 }
Lars Ellenberg435f0742010-09-06 12:30:25 +0200311 i = atomic_sub_return(i, a);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200312 if (i < 0)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200313 drbd_warn(device, "ASSERTION FAILED: %s: %d < 0\n",
Lars Ellenberg435f0742010-09-06 12:30:25 +0200314 is_net ? "pp_in_use_by_net" : "pp_in_use", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700315 wake_up(&drbd_pp_wait);
316}
317
318/*
319You need to hold the req_lock:
320 _drbd_wait_ee_list_empty()
321
322You must not have the req_lock:
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +0200323 drbd_free_peer_req()
Andreas Gruenbacher0db55362011-04-06 16:09:15 +0200324 drbd_alloc_peer_req()
Andreas Gruenbacher7721f562011-04-06 17:14:02 +0200325 drbd_free_peer_reqs()
Philipp Reisnerb411b362009-09-25 16:07:19 -0700326 drbd_ee_fix_bhs()
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200327 drbd_finish_peer_reqs()
Philipp Reisnerb411b362009-09-25 16:07:19 -0700328 drbd_clear_done_ee()
329 drbd_wait_ee_list_empty()
330*/
331
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100332struct drbd_peer_request *
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200333drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
Andreas Gruenbacher0db55362011-04-06 16:09:15 +0200334 unsigned int data_size, gfp_t gfp_mask) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700335{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200336 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100337 struct drbd_peer_request *peer_req;
Lars Ellenberga73ff322012-06-25 19:15:38 +0200338 struct page *page = NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200339 unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700340
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200341 if (drbd_insert_fault(device, DRBD_FAULT_AL_EE))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700342 return NULL;
343
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100344 peer_req = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
345 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700346 if (!(gfp_mask & __GFP_NOWARN))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200347 drbd_err(device, "%s: allocation failed\n", __func__);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700348 return NULL;
349 }
350
Lars Ellenberga73ff322012-06-25 19:15:38 +0200351 if (data_size) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200352 page = drbd_alloc_pages(peer_device, nr_pages, (gfp_mask & __GFP_WAIT));
Lars Ellenberga73ff322012-06-25 19:15:38 +0200353 if (!page)
354 goto fail;
355 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700356
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100357 drbd_clear_interval(&peer_req->i);
358 peer_req->i.size = data_size;
359 peer_req->i.sector = sector;
360 peer_req->i.local = false;
361 peer_req->i.waiting = false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700362
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100363 peer_req->epoch = NULL;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200364 peer_req->peer_device = peer_device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100365 peer_req->pages = page;
366 atomic_set(&peer_req->pending_bios, 0);
367 peer_req->flags = 0;
Andreas Gruenbacher9a8e7752011-01-11 14:04:09 +0100368 /*
369 * The block_id is opaque to the receiver. It is not endianness
370 * converted, and sent back to the sender unchanged.
371 */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100372 peer_req->block_id = id;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700373
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100374 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700375
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200376 fail:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100377 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700378 return NULL;
379}
380
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200381void __drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request *peer_req,
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100382 int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700383{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100384 if (peer_req->flags & EE_HAS_DIGEST)
385 kfree(peer_req->digest);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200386 drbd_free_pages(device, peer_req->pages, is_net);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +0200387 D_ASSERT(device, atomic_read(&peer_req->pending_bios) == 0);
388 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100389 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700390}
391
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200392int drbd_free_peer_reqs(struct drbd_device *device, struct list_head *list)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700393{
394 LIST_HEAD(work_list);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100395 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700396 int count = 0;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200397 int is_net = list == &device->net_ee;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700398
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200399 spin_lock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700400 list_splice_init(list, &work_list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200401 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700402
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200403 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200404 __drbd_free_peer_req(device, peer_req, is_net);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700405 count++;
406 }
407 return count;
408}
409
Philipp Reisnerb411b362009-09-25 16:07:19 -0700410/*
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200411 * See also comments in _req_mod(,BARRIER_ACKED) and receive_Barrier.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700412 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200413static int drbd_finish_peer_reqs(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700414{
415 LIST_HEAD(work_list);
416 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100417 struct drbd_peer_request *peer_req, *t;
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100418 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700419
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200420 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200421 reclaim_finished_net_peer_reqs(device, &reclaimed);
422 list_splice_init(&device->done_ee, &work_list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200423 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700424
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200425 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200426 drbd_free_net_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700427
428 /* possible callbacks here:
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +0200429 * e_end_block, and e_end_resync_block, e_send_superseded.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700430 * all ignore the last argument.
431 */
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200432 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100433 int err2;
434
Philipp Reisnerb411b362009-09-25 16:07:19 -0700435 /* list_del not necessary, next/prev members not touched */
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200436 err2 = peer_req->w.cb(&peer_req->w, !!err);
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100437 if (!err)
438 err = err2;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200439 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700440 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200441 wake_up(&device->ee_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700442
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100443 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700444}
445
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200446static void _drbd_wait_ee_list_empty(struct drbd_device *device,
Andreas Gruenbacherd4da1532011-04-07 00:06:56 +0200447 struct list_head *head)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700448{
449 DEFINE_WAIT(wait);
450
451 /* avoids spin_lock/unlock
452 * and calling prepare_to_wait in the fast path */
453 while (!list_empty(head)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200454 prepare_to_wait(&device->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200455 spin_unlock_irq(&device->resource->req_lock);
Jens Axboe7eaceac2011-03-10 08:52:07 +0100456 io_schedule();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200457 finish_wait(&device->ee_wait, &wait);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200458 spin_lock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700459 }
460}
461
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200462static void drbd_wait_ee_list_empty(struct drbd_device *device,
Andreas Gruenbacherd4da1532011-04-07 00:06:56 +0200463 struct list_head *head)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700464{
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200465 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200466 _drbd_wait_ee_list_empty(device, head);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200467 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700468}
469
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100470static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700471{
Philipp Reisnerb411b362009-09-25 16:07:19 -0700472 struct kvec iov = {
473 .iov_base = buf,
474 .iov_len = size,
475 };
476 struct msghdr msg = {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700477 .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
478 };
Al Virof730c842014-02-08 21:07:38 -0500479 return kernel_recvmsg(sock, &msg, &iov, 1, size, msg.msg_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700480}
481
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200482static int drbd_recv(struct drbd_connection *connection, void *buf, size_t size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700483{
Philipp Reisnerb411b362009-09-25 16:07:19 -0700484 int rv;
485
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200486 rv = drbd_recv_short(connection->data.socket, buf, size, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700487
Philipp Reisnerdbd08202012-08-17 16:55:47 +0200488 if (rv < 0) {
489 if (rv == -ECONNRESET)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200490 drbd_info(connection, "sock was reset by peer\n");
Philipp Reisnerdbd08202012-08-17 16:55:47 +0200491 else if (rv != -ERESTARTSYS)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200492 drbd_err(connection, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerdbd08202012-08-17 16:55:47 +0200493 } else if (rv == 0) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200494 if (test_bit(DISCONNECT_SENT, &connection->flags)) {
Philipp Reisnerb66623e2012-08-08 21:19:09 +0200495 long t;
496 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200497 t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
Philipp Reisnerb66623e2012-08-08 21:19:09 +0200498 rcu_read_unlock();
499
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200500 t = wait_event_timeout(connection->ping_wait, connection->cstate < C_WF_REPORT_PARAMS, t);
Philipp Reisnerb66623e2012-08-08 21:19:09 +0200501
Philipp Reisner599377a2012-08-17 14:50:22 +0200502 if (t)
503 goto out;
504 }
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200505 drbd_info(connection, "sock was shut down by peer\n");
Philipp Reisner599377a2012-08-17 14:50:22 +0200506 }
507
Philipp Reisnerb411b362009-09-25 16:07:19 -0700508 if (rv != size)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200509 conn_request_state(connection, NS(conn, C_BROKEN_PIPE), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700510
Philipp Reisner599377a2012-08-17 14:50:22 +0200511out:
Philipp Reisnerb411b362009-09-25 16:07:19 -0700512 return rv;
513}
514
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200515static int drbd_recv_all(struct drbd_connection *connection, void *buf, size_t size)
Andreas Gruenbacherc6967742011-03-17 17:15:20 +0100516{
517 int err;
518
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200519 err = drbd_recv(connection, buf, size);
Andreas Gruenbacherc6967742011-03-17 17:15:20 +0100520 if (err != size) {
521 if (err >= 0)
522 err = -EIO;
523 } else
524 err = 0;
525 return err;
526}
527
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200528static int drbd_recv_all_warn(struct drbd_connection *connection, void *buf, size_t size)
Andreas Gruenbachera5c31902011-03-24 03:28:04 +0100529{
530 int err;
531
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200532 err = drbd_recv_all(connection, buf, size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +0100533 if (err && !signal_pending(current))
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200534 drbd_warn(connection, "short read (expected size %d)\n", (int)size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +0100535 return err;
536}
537
Lars Ellenberg5dbf1672010-05-25 16:18:01 +0200538/* quoting tcp(7):
539 * On individual connections, the socket buffer size must be set prior to the
540 * listen(2) or connect(2) calls in order to have it take effect.
541 * This is our wrapper to do so.
542 */
543static void drbd_setbufsize(struct socket *sock, unsigned int snd,
544 unsigned int rcv)
545{
546 /* open coded SO_SNDBUF, SO_RCVBUF */
547 if (snd) {
548 sock->sk->sk_sndbuf = snd;
549 sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
550 }
551 if (rcv) {
552 sock->sk->sk_rcvbuf = rcv;
553 sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
554 }
555}
556
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200557static struct socket *drbd_try_connect(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700558{
559 const char *what;
560 struct socket *sock;
561 struct sockaddr_in6 src_in6;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200562 struct sockaddr_in6 peer_in6;
563 struct net_conf *nc;
564 int err, peer_addr_len, my_addr_len;
Andreas Gruenbacher69ef82d2011-05-11 14:34:35 +0200565 int sndbuf_size, rcvbuf_size, connect_int;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700566 int disconnect_on_error = 1;
567
Philipp Reisner44ed1672011-04-19 17:10:19 +0200568 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200569 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200570 if (!nc) {
571 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700572 return NULL;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200573 }
Philipp Reisner44ed1672011-04-19 17:10:19 +0200574 sndbuf_size = nc->sndbuf_size;
575 rcvbuf_size = nc->rcvbuf_size;
Andreas Gruenbacher69ef82d2011-05-11 14:34:35 +0200576 connect_int = nc->connect_int;
Andreas Gruenbacher089c0752011-06-14 18:28:09 +0200577 rcu_read_unlock();
Philipp Reisner44ed1672011-04-19 17:10:19 +0200578
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200579 my_addr_len = min_t(int, connection->my_addr_len, sizeof(src_in6));
580 memcpy(&src_in6, &connection->my_addr, my_addr_len);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200581
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200582 if (((struct sockaddr *)&connection->my_addr)->sa_family == AF_INET6)
Philipp Reisner44ed1672011-04-19 17:10:19 +0200583 src_in6.sin6_port = 0;
584 else
585 ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
586
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200587 peer_addr_len = min_t(int, connection->peer_addr_len, sizeof(src_in6));
588 memcpy(&peer_in6, &connection->peer_addr, peer_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700589
590 what = "sock_create_kern";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200591 err = sock_create_kern(((struct sockaddr *)&src_in6)->sa_family,
592 SOCK_STREAM, IPPROTO_TCP, &sock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700593 if (err < 0) {
594 sock = NULL;
595 goto out;
596 }
597
598 sock->sk->sk_rcvtimeo =
Andreas Gruenbacher69ef82d2011-05-11 14:34:35 +0200599 sock->sk->sk_sndtimeo = connect_int * HZ;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200600 drbd_setbufsize(sock, sndbuf_size, rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700601
602 /* explicitly bind to the configured IP as source IP
603 * for the outgoing connections.
604 * This is needed for multihomed hosts and to be
605 * able to use lo: interfaces for drbd.
606 * Make sure to use 0 as port number, so linux selects
607 * a free one dynamically.
608 */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700609 what = "bind before connect";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200610 err = sock->ops->bind(sock, (struct sockaddr *) &src_in6, my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700611 if (err < 0)
612 goto out;
613
614 /* connect may fail, peer not yet available.
615 * stay C_WF_CONNECTION, don't go Disconnecting! */
616 disconnect_on_error = 0;
617 what = "connect";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200618 err = sock->ops->connect(sock, (struct sockaddr *) &peer_in6, peer_addr_len, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700619
620out:
621 if (err < 0) {
622 if (sock) {
623 sock_release(sock);
624 sock = NULL;
625 }
626 switch (-err) {
627 /* timeout, busy, signal pending */
628 case ETIMEDOUT: case EAGAIN: case EINPROGRESS:
629 case EINTR: case ERESTARTSYS:
630 /* peer not (yet) available, network problem */
631 case ECONNREFUSED: case ENETUNREACH:
632 case EHOSTDOWN: case EHOSTUNREACH:
633 disconnect_on_error = 0;
634 break;
635 default:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200636 drbd_err(connection, "%s failed, err = %d\n", what, err);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700637 }
638 if (disconnect_on_error)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200639 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700640 }
Philipp Reisner44ed1672011-04-19 17:10:19 +0200641
Philipp Reisnerb411b362009-09-25 16:07:19 -0700642 return sock;
643}
644
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200645struct accept_wait_data {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200646 struct drbd_connection *connection;
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200647 struct socket *s_listen;
648 struct completion door_bell;
649 void (*original_sk_state_change)(struct sock *sk);
650
651};
652
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200653static void drbd_incoming_connection(struct sock *sk)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700654{
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200655 struct accept_wait_data *ad = sk->sk_user_data;
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200656 void (*state_change)(struct sock *sk);
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200657
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200658 state_change = ad->original_sk_state_change;
659 if (sk->sk_state == TCP_ESTABLISHED)
660 complete(&ad->door_bell);
661 state_change(sk);
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200662}
663
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200664static int prepare_listen_socket(struct drbd_connection *connection, struct accept_wait_data *ad)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700665{
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200666 int err, sndbuf_size, rcvbuf_size, my_addr_len;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200667 struct sockaddr_in6 my_addr;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200668 struct socket *s_listen;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200669 struct net_conf *nc;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700670 const char *what;
671
Philipp Reisner44ed1672011-04-19 17:10:19 +0200672 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200673 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200674 if (!nc) {
675 rcu_read_unlock();
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200676 return -EIO;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200677 }
Philipp Reisner44ed1672011-04-19 17:10:19 +0200678 sndbuf_size = nc->sndbuf_size;
679 rcvbuf_size = nc->rcvbuf_size;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200680 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700681
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200682 my_addr_len = min_t(int, connection->my_addr_len, sizeof(struct sockaddr_in6));
683 memcpy(&my_addr, &connection->my_addr, my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700684
685 what = "sock_create_kern";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200686 err = sock_create_kern(((struct sockaddr *)&my_addr)->sa_family,
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200687 SOCK_STREAM, IPPROTO_TCP, &s_listen);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700688 if (err) {
689 s_listen = NULL;
690 goto out;
691 }
692
Philipp Reisner98683652012-11-09 14:18:43 +0100693 s_listen->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
Philipp Reisner44ed1672011-04-19 17:10:19 +0200694 drbd_setbufsize(s_listen, sndbuf_size, rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700695
696 what = "bind before listen";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200697 err = s_listen->ops->bind(s_listen, (struct sockaddr *)&my_addr, my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700698 if (err < 0)
699 goto out;
700
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200701 ad->s_listen = s_listen;
702 write_lock_bh(&s_listen->sk->sk_callback_lock);
703 ad->original_sk_state_change = s_listen->sk->sk_state_change;
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200704 s_listen->sk->sk_state_change = drbd_incoming_connection;
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200705 s_listen->sk->sk_user_data = ad;
706 write_unlock_bh(&s_listen->sk->sk_callback_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700707
Philipp Reisner2820fd32012-07-12 10:22:48 +0200708 what = "listen";
709 err = s_listen->ops->listen(s_listen, 5);
710 if (err < 0)
711 goto out;
712
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200713 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700714out:
715 if (s_listen)
716 sock_release(s_listen);
717 if (err < 0) {
718 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200719 drbd_err(connection, "%s failed, err = %d\n", what, err);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200720 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700721 }
722 }
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200723
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200724 return -EIO;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200725}
726
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200727static void unregister_state_change(struct sock *sk, struct accept_wait_data *ad)
728{
729 write_lock_bh(&sk->sk_callback_lock);
730 sk->sk_state_change = ad->original_sk_state_change;
731 sk->sk_user_data = NULL;
732 write_unlock_bh(&sk->sk_callback_lock);
733}
734
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200735static struct socket *drbd_wait_for_connect(struct drbd_connection *connection, struct accept_wait_data *ad)
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200736{
737 int timeo, connect_int, err = 0;
738 struct socket *s_estab = NULL;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200739 struct net_conf *nc;
740
741 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200742 nc = rcu_dereference(connection->net_conf);
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200743 if (!nc) {
744 rcu_read_unlock();
745 return NULL;
746 }
747 connect_int = nc->connect_int;
748 rcu_read_unlock();
749
750 timeo = connect_int * HZ;
Akinobu Mita38b682b22013-04-29 16:21:31 -0700751 /* 28.5% random jitter */
752 timeo += (prandom_u32() & 1) ? timeo / 7 : -timeo / 7;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200753
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200754 err = wait_for_completion_interruptible_timeout(&ad->door_bell, timeo);
755 if (err <= 0)
756 return NULL;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200757
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200758 err = kernel_accept(ad->s_listen, &s_estab, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700759 if (err < 0) {
760 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200761 drbd_err(connection, "accept failed, err = %d\n", err);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200762 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700763 }
764 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700765
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200766 if (s_estab)
767 unregister_state_change(s_estab->sk, ad);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700768
769 return s_estab;
770}
771
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200772static int decode_header(struct drbd_connection *, void *, struct packet_info *);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700773
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200774static int send_first_packet(struct drbd_connection *connection, struct drbd_socket *sock,
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200775 enum drbd_packet cmd)
776{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200777 if (!conn_prepare_command(connection, sock))
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200778 return -EIO;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200779 return conn_send_command(connection, sock, cmd, 0, NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700780}
781
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200782static int receive_first_packet(struct drbd_connection *connection, struct socket *sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700783{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200784 unsigned int header_size = drbd_header_size(connection);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200785 struct packet_info pi;
786 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700787
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200788 err = drbd_recv_short(sock, connection->data.rbuf, header_size, 0);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200789 if (err != header_size) {
790 if (err >= 0)
791 err = -EIO;
792 return err;
793 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200794 err = decode_header(connection, connection->data.rbuf, &pi);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200795 if (err)
796 return err;
797 return pi.cmd;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700798}
799
800/**
801 * drbd_socket_okay() - Free the socket if its connection is not okay
Philipp Reisnerb411b362009-09-25 16:07:19 -0700802 * @sock: pointer to the pointer to the socket.
803 */
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100804static int drbd_socket_okay(struct socket **sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700805{
806 int rr;
807 char tb[4];
808
809 if (!*sock)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100810 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700811
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100812 rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700813
814 if (rr > 0 || rr == -EAGAIN) {
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100815 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700816 } else {
817 sock_release(*sock);
818 *sock = NULL;
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100819 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700820 }
821}
Philipp Reisner2325eb62011-03-15 16:56:18 +0100822/* Gets called if a connection is established, or if a new minor gets created
823 in a connection */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200824int drbd_connected(struct drbd_peer_device *peer_device)
Philipp Reisner907599e2011-02-08 11:25:37 +0100825{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200826 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100827 int err;
Philipp Reisner907599e2011-02-08 11:25:37 +0100828
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200829 atomic_set(&device->packet_seq, 0);
830 device->peer_seq = 0;
Philipp Reisner907599e2011-02-08 11:25:37 +0100831
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200832 device->state_mutex = peer_device->connection->agreed_pro_version < 100 ?
833 &peer_device->connection->cstate_mutex :
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200834 &device->own_state_mutex;
Philipp Reisner8410da82011-02-11 20:11:10 +0100835
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200836 err = drbd_send_sync_param(peer_device);
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100837 if (!err)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200838 err = drbd_send_sizes(peer_device, 0, 0);
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100839 if (!err)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200840 err = drbd_send_uuids(peer_device);
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100841 if (!err)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200842 err = drbd_send_current_state(peer_device);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200843 clear_bit(USE_DEGR_WFC_T, &device->flags);
844 clear_bit(RESIZE_PENDING, &device->flags);
845 atomic_set(&device->ap_in_flight, 0);
846 mod_timer(&device->request_timer, jiffies + HZ); /* just start it here. */
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100847 return err;
Philipp Reisner907599e2011-02-08 11:25:37 +0100848}
Philipp Reisnerb411b362009-09-25 16:07:19 -0700849
850/*
851 * return values:
852 * 1 yes, we have a valid connection
853 * 0 oops, did not work out, please try again
854 * -1 peer talks different language,
855 * no point in trying again, please go standalone.
856 * -2 We do not have a network config...
857 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200858static int conn_connect(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700859{
Philipp Reisner7da35862011-12-19 22:42:56 +0100860 struct drbd_socket sock, msock;
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +0200861 struct drbd_peer_device *peer_device;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200862 struct net_conf *nc;
Philipp Reisner92f14952012-08-01 11:41:01 +0200863 int vnr, timeout, h, ok;
Philipp Reisner08b165b2011-09-05 16:22:33 +0200864 bool discard_my_data;
Philipp Reisner197296f2012-03-26 16:47:11 +0200865 enum drbd_state_rv rv;
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200866 struct accept_wait_data ad = {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200867 .connection = connection,
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200868 .door_bell = COMPLETION_INITIALIZER_ONSTACK(ad.door_bell),
869 };
Philipp Reisnerb411b362009-09-25 16:07:19 -0700870
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200871 clear_bit(DISCONNECT_SENT, &connection->flags);
872 if (conn_request_state(connection, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700873 return -2;
874
Philipp Reisner7da35862011-12-19 22:42:56 +0100875 mutex_init(&sock.mutex);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200876 sock.sbuf = connection->data.sbuf;
877 sock.rbuf = connection->data.rbuf;
Philipp Reisner7da35862011-12-19 22:42:56 +0100878 sock.socket = NULL;
879 mutex_init(&msock.mutex);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200880 msock.sbuf = connection->meta.sbuf;
881 msock.rbuf = connection->meta.rbuf;
Philipp Reisner7da35862011-12-19 22:42:56 +0100882 msock.socket = NULL;
883
Andreas Gruenbacher0916e0e2011-03-21 14:10:15 +0100884 /* Assume that the peer only understands protocol 80 until we know better. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200885 connection->agreed_pro_version = 80;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700886
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200887 if (prepare_listen_socket(connection, &ad))
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200888 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700889
890 do {
Andreas Gruenbacher2bf89622011-03-28 16:33:12 +0200891 struct socket *s;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700892
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200893 s = drbd_try_connect(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700894 if (s) {
Philipp Reisner7da35862011-12-19 22:42:56 +0100895 if (!sock.socket) {
896 sock.socket = s;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200897 send_first_packet(connection, &sock, P_INITIAL_DATA);
Philipp Reisner7da35862011-12-19 22:42:56 +0100898 } else if (!msock.socket) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200899 clear_bit(RESOLVE_CONFLICTS, &connection->flags);
Philipp Reisner7da35862011-12-19 22:42:56 +0100900 msock.socket = s;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200901 send_first_packet(connection, &msock, P_INITIAL_META);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700902 } else {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200903 drbd_err(connection, "Logic error in conn_connect()\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700904 goto out_release_sockets;
905 }
906 }
907
Philipp Reisner7da35862011-12-19 22:42:56 +0100908 if (sock.socket && msock.socket) {
909 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200910 nc = rcu_dereference(connection->net_conf);
Philipp Reisner7da35862011-12-19 22:42:56 +0100911 timeout = nc->ping_timeo * HZ / 10;
912 rcu_read_unlock();
913 schedule_timeout_interruptible(timeout);
914 ok = drbd_socket_okay(&sock.socket);
915 ok = drbd_socket_okay(&msock.socket) && ok;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700916 if (ok)
917 break;
918 }
919
920retry:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200921 s = drbd_wait_for_connect(connection, &ad);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700922 if (s) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200923 int fp = receive_first_packet(connection, s);
Philipp Reisner7da35862011-12-19 22:42:56 +0100924 drbd_socket_okay(&sock.socket);
925 drbd_socket_okay(&msock.socket);
Philipp Reisner92f14952012-08-01 11:41:01 +0200926 switch (fp) {
Andreas Gruenbachere5d6f332011-03-28 16:44:40 +0200927 case P_INITIAL_DATA:
Philipp Reisner7da35862011-12-19 22:42:56 +0100928 if (sock.socket) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200929 drbd_warn(connection, "initial packet S crossed\n");
Philipp Reisner7da35862011-12-19 22:42:56 +0100930 sock_release(sock.socket);
Philipp Reisner80c6eed2012-08-01 14:53:39 +0200931 sock.socket = s;
932 goto randomize;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700933 }
Philipp Reisner7da35862011-12-19 22:42:56 +0100934 sock.socket = s;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700935 break;
Andreas Gruenbachere5d6f332011-03-28 16:44:40 +0200936 case P_INITIAL_META:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200937 set_bit(RESOLVE_CONFLICTS, &connection->flags);
Philipp Reisner7da35862011-12-19 22:42:56 +0100938 if (msock.socket) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200939 drbd_warn(connection, "initial packet M crossed\n");
Philipp Reisner7da35862011-12-19 22:42:56 +0100940 sock_release(msock.socket);
Philipp Reisner80c6eed2012-08-01 14:53:39 +0200941 msock.socket = s;
942 goto randomize;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700943 }
Philipp Reisner7da35862011-12-19 22:42:56 +0100944 msock.socket = s;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700945 break;
946 default:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200947 drbd_warn(connection, "Error receiving initial packet\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700948 sock_release(s);
Philipp Reisner80c6eed2012-08-01 14:53:39 +0200949randomize:
Akinobu Mita38b682b22013-04-29 16:21:31 -0700950 if (prandom_u32() & 1)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700951 goto retry;
952 }
953 }
954
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200955 if (connection->cstate <= C_DISCONNECTING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700956 goto out_release_sockets;
957 if (signal_pending(current)) {
958 flush_signals(current);
959 smp_rmb();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200960 if (get_t_state(&connection->receiver) == EXITING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700961 goto out_release_sockets;
962 }
963
Philipp Reisnerb666dbf2012-07-26 14:12:59 +0200964 ok = drbd_socket_okay(&sock.socket);
965 ok = drbd_socket_okay(&msock.socket) && ok;
966 } while (!ok);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700967
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200968 if (ad.s_listen)
969 sock_release(ad.s_listen);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700970
Philipp Reisner98683652012-11-09 14:18:43 +0100971 sock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
972 msock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700973
Philipp Reisner7da35862011-12-19 22:42:56 +0100974 sock.socket->sk->sk_allocation = GFP_NOIO;
975 msock.socket->sk->sk_allocation = GFP_NOIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700976
Philipp Reisner7da35862011-12-19 22:42:56 +0100977 sock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
978 msock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700979
Philipp Reisnerb411b362009-09-25 16:07:19 -0700980 /* NOT YET ...
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200981 * sock.socket->sk->sk_sndtimeo = connection->net_conf->timeout*HZ/10;
Philipp Reisner7da35862011-12-19 22:42:56 +0100982 * sock.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
Andreas Gruenbacher60381782011-03-28 17:05:50 +0200983 * first set it to the P_CONNECTION_FEATURES timeout,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700984 * which we set to 4x the configured ping_timeout. */
Philipp Reisner44ed1672011-04-19 17:10:19 +0200985 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200986 nc = rcu_dereference(connection->net_conf);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700987
Philipp Reisner7da35862011-12-19 22:42:56 +0100988 sock.socket->sk->sk_sndtimeo =
989 sock.socket->sk->sk_rcvtimeo = nc->ping_timeo*4*HZ/10;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200990
Philipp Reisner7da35862011-12-19 22:42:56 +0100991 msock.socket->sk->sk_rcvtimeo = nc->ping_int*HZ;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200992 timeout = nc->timeout * HZ / 10;
Philipp Reisner08b165b2011-09-05 16:22:33 +0200993 discard_my_data = nc->discard_my_data;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200994 rcu_read_unlock();
995
Philipp Reisner7da35862011-12-19 22:42:56 +0100996 msock.socket->sk->sk_sndtimeo = timeout;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700997
998 /* we don't want delays.
Lucas De Marchi25985ed2011-03-30 22:57:33 -0300999 * we use TCP_CORK where appropriate, though */
Philipp Reisner7da35862011-12-19 22:42:56 +01001000 drbd_tcp_nodelay(sock.socket);
1001 drbd_tcp_nodelay(msock.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001002
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001003 connection->data.socket = sock.socket;
1004 connection->meta.socket = msock.socket;
1005 connection->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001006
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001007 h = drbd_do_features(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001008 if (h <= 0)
1009 return h;
1010
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001011 if (connection->cram_hmac_tfm) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001012 /* drbd_request_state(device, NS(conn, WFAuth)); */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001013 switch (drbd_do_auth(connection)) {
Johannes Thomab10d96c2010-01-07 16:02:50 +01001014 case -1:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001015 drbd_err(connection, "Authentication of peer failed\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001016 return -1;
Johannes Thomab10d96c2010-01-07 16:02:50 +01001017 case 0:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001018 drbd_err(connection, "Authentication of peer failed, trying again.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01001019 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001020 }
1021 }
1022
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001023 connection->data.socket->sk->sk_sndtimeo = timeout;
1024 connection->data.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001025
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001026 if (drbd_send_protocol(connection) == -EOPNOTSUPP)
Philipp Reisner7e2455c2010-04-22 14:50:23 +02001027 return -1;
Philipp Reisner1e86ac42011-08-04 10:33:08 +02001028
Philipp Reisner31007742014-04-28 18:43:12 +02001029 /* Prevent a race between resync-handshake and
1030 * being promoted to Primary.
1031 *
1032 * Grab and release the state mutex, so we know that any current
1033 * drbd_set_role() is finished, and any incoming drbd_set_role
1034 * will see the STATE_SENT flag, and wait for it to be cleared.
1035 */
1036 idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
1037 mutex_lock(peer_device->device->state_mutex);
1038
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001039 set_bit(STATE_SENT, &connection->flags);
Philipp Reisner197296f2012-03-26 16:47:11 +02001040
Philipp Reisner31007742014-04-28 18:43:12 +02001041 idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
1042 mutex_unlock(peer_device->device->state_mutex);
1043
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02001044 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001045 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1046 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001047 kref_get(&device->kref);
Andreas Gruenbacher26ea8f92013-06-25 16:50:03 +02001048 rcu_read_unlock();
1049
Philipp Reisner08b165b2011-09-05 16:22:33 +02001050 if (discard_my_data)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001051 set_bit(DISCARD_MY_DATA, &device->flags);
Philipp Reisner08b165b2011-09-05 16:22:33 +02001052 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001053 clear_bit(DISCARD_MY_DATA, &device->flags);
Philipp Reisner08b165b2011-09-05 16:22:33 +02001054
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001055 drbd_connected(peer_device);
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001056 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02001057 rcu_read_lock();
1058 }
1059 rcu_read_unlock();
1060
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001061 rv = conn_request_state(connection, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE);
1062 if (rv < SS_SUCCESS || connection->cstate != C_WF_REPORT_PARAMS) {
1063 clear_bit(STATE_SENT, &connection->flags);
Philipp Reisner1e86ac42011-08-04 10:33:08 +02001064 return 0;
Philipp Reisnera1096a62012-04-06 12:07:34 +02001065 }
Philipp Reisner1e86ac42011-08-04 10:33:08 +02001066
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001067 drbd_thread_start(&connection->asender);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001068
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001069 mutex_lock(&connection->resource->conf_update);
Philipp Reisner08b165b2011-09-05 16:22:33 +02001070 /* The discard_my_data flag is a single-shot modifier to the next
1071 * connection attempt, the handshake of which is now well underway.
1072 * No need for rcu style copying of the whole struct
1073 * just to clear a single value. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001074 connection->net_conf->discard_my_data = 0;
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001075 mutex_unlock(&connection->resource->conf_update);
Philipp Reisner08b165b2011-09-05 16:22:33 +02001076
Philipp Reisnerd3fcb492011-04-13 14:46:05 -07001077 return h;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001078
1079out_release_sockets:
Philipp Reisner7a426fd2012-07-12 14:22:37 +02001080 if (ad.s_listen)
1081 sock_release(ad.s_listen);
Philipp Reisner7da35862011-12-19 22:42:56 +01001082 if (sock.socket)
1083 sock_release(sock.socket);
1084 if (msock.socket)
1085 sock_release(msock.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001086 return -1;
1087}
1088
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001089static int decode_header(struct drbd_connection *connection, void *header, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001090{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001091 unsigned int header_size = drbd_header_size(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001092
Andreas Gruenbacher0c8e36d2011-03-30 16:00:17 +02001093 if (header_size == sizeof(struct p_header100) &&
1094 *(__be32 *)header == cpu_to_be32(DRBD_MAGIC_100)) {
1095 struct p_header100 *h = header;
1096 if (h->pad != 0) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001097 drbd_err(connection, "Header padding is not zero\n");
Andreas Gruenbacher0c8e36d2011-03-30 16:00:17 +02001098 return -EINVAL;
1099 }
1100 pi->vnr = be16_to_cpu(h->volume);
1101 pi->cmd = be16_to_cpu(h->command);
1102 pi->size = be32_to_cpu(h->length);
1103 } else if (header_size == sizeof(struct p_header95) &&
1104 *(__be16 *)header == cpu_to_be16(DRBD_MAGIC_BIG)) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001105 struct p_header95 *h = header;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001106 pi->cmd = be16_to_cpu(h->command);
Andreas Gruenbacherb55d84b2011-03-22 13:17:47 +01001107 pi->size = be32_to_cpu(h->length);
1108 pi->vnr = 0;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001109 } else if (header_size == sizeof(struct p_header80) &&
1110 *(__be32 *)header == cpu_to_be32(DRBD_MAGIC)) {
1111 struct p_header80 *h = header;
1112 pi->cmd = be16_to_cpu(h->command);
1113 pi->size = be16_to_cpu(h->length);
Philipp Reisner77351055b2011-02-07 17:24:26 +01001114 pi->vnr = 0;
Philipp Reisner02918be2010-08-20 14:35:10 +02001115 } else {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001116 drbd_err(connection, "Wrong magic value 0x%08x in protocol version %d\n",
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001117 be32_to_cpu(*(__be32 *)header),
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001118 connection->agreed_pro_version);
Andreas Gruenbacher8172f3e2011-03-16 17:22:39 +01001119 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001120 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001121 pi->data = header + header_size;
Andreas Gruenbacher8172f3e2011-03-16 17:22:39 +01001122 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001123}
1124
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001125static int drbd_recv_header(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisner257d0af2011-01-26 12:15:29 +01001126{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001127 void *buffer = connection->data.rbuf;
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001128 int err;
Philipp Reisner257d0af2011-01-26 12:15:29 +01001129
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001130 err = drbd_recv_all_warn(connection, buffer, drbd_header_size(connection));
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001131 if (err)
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001132 return err;
Philipp Reisner257d0af2011-01-26 12:15:29 +01001133
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001134 err = decode_header(connection, buffer, pi);
1135 connection->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001136
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001137 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001138}
1139
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001140static void drbd_flush(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001141{
1142 int rv;
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001143 struct drbd_peer_device *peer_device;
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001144 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001145
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001146 if (connection->write_ordering >= WO_bdev_flush) {
Lars Ellenberg615e0872011-11-17 14:32:12 +01001147 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001148 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1149 struct drbd_device *device = peer_device->device;
1150
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001151 if (!get_ldev(device))
Lars Ellenberg615e0872011-11-17 14:32:12 +01001152 continue;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001153 kref_get(&device->kref);
Lars Ellenberg615e0872011-11-17 14:32:12 +01001154 rcu_read_unlock();
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001155
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001156 rv = blkdev_issue_flush(device->ldev->backing_bdev,
Lars Ellenberg615e0872011-11-17 14:32:12 +01001157 GFP_NOIO, NULL);
1158 if (rv) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001159 drbd_info(device, "local disk flush failed with status %d\n", rv);
Lars Ellenberg615e0872011-11-17 14:32:12 +01001160 /* would rather check on EOPNOTSUPP, but that is not reliable.
1161 * don't try again for ANY return value != 0
1162 * if (rv == -EOPNOTSUPP) */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001163 drbd_bump_write_ordering(connection, WO_drain_io);
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001164 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001165 put_ldev(device);
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001166 kref_put(&device->kref, drbd_destroy_device);
Lars Ellenberg615e0872011-11-17 14:32:12 +01001167
1168 rcu_read_lock();
1169 if (rv)
1170 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001171 }
Lars Ellenberg615e0872011-11-17 14:32:12 +01001172 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07001173 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001174}
1175
1176/**
1177 * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it.
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001178 * @device: DRBD device.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001179 * @epoch: Epoch object.
1180 * @ev: Epoch event.
1181 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001182static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *connection,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001183 struct drbd_epoch *epoch,
1184 enum epoch_event ev)
1185{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001186 int epoch_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001187 struct drbd_epoch *next_epoch;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001188 enum finish_epoch rv = FE_STILL_LIVE;
1189
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001190 spin_lock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001191 do {
1192 next_epoch = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001193
1194 epoch_size = atomic_read(&epoch->epoch_size);
1195
1196 switch (ev & ~EV_CLEANUP) {
1197 case EV_PUT:
1198 atomic_dec(&epoch->active);
1199 break;
1200 case EV_GOT_BARRIER_NR:
1201 set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001202 break;
1203 case EV_BECAME_LAST:
1204 /* nothing to do*/
1205 break;
1206 }
1207
Philipp Reisnerb411b362009-09-25 16:07:19 -07001208 if (epoch_size != 0 &&
1209 atomic_read(&epoch->active) == 0 &&
Philipp Reisner80f9fd52011-07-18 15:45:15 +02001210 (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) || ev & EV_CLEANUP)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001211 if (!(ev & EV_CLEANUP)) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001212 spin_unlock(&connection->epoch_lock);
1213 drbd_send_b_ack(epoch->connection, epoch->barrier_nr, epoch_size);
1214 spin_lock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001215 }
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001216#if 0
1217 /* FIXME: dec unacked on connection, once we have
1218 * something to count pending connection packets in. */
Philipp Reisner80f9fd52011-07-18 15:45:15 +02001219 if (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags))
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001220 dec_unacked(epoch->connection);
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001221#endif
Philipp Reisnerb411b362009-09-25 16:07:19 -07001222
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001223 if (connection->current_epoch != epoch) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001224 next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
1225 list_del(&epoch->list);
1226 ev = EV_BECAME_LAST | (ev & EV_CLEANUP);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001227 connection->epochs--;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001228 kfree(epoch);
1229
1230 if (rv == FE_STILL_LIVE)
1231 rv = FE_DESTROYED;
1232 } else {
1233 epoch->flags = 0;
1234 atomic_set(&epoch->epoch_size, 0);
Uwe Kleine-König698f9312010-07-02 20:41:51 +02001235 /* atomic_set(&epoch->active, 0); is already zero */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001236 if (rv == FE_STILL_LIVE)
1237 rv = FE_RECYCLED;
1238 }
1239 }
1240
1241 if (!next_epoch)
1242 break;
1243
1244 epoch = next_epoch;
1245 } while (1);
1246
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001247 spin_unlock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001248
Philipp Reisnerb411b362009-09-25 16:07:19 -07001249 return rv;
1250}
1251
1252/**
1253 * drbd_bump_write_ordering() - Fall back to an other write ordering method
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001254 * @connection: DRBD connection.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001255 * @wo: Write ordering method to try.
1256 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001257void drbd_bump_write_ordering(struct drbd_connection *connection, enum write_ordering_e wo)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001258{
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001259 struct disk_conf *dc;
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001260 struct drbd_peer_device *peer_device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001261 enum write_ordering_e pwo;
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001262 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001263 static char *write_ordering_str[] = {
1264 [WO_none] = "none",
1265 [WO_drain_io] = "drain",
1266 [WO_bdev_flush] = "flush",
Philipp Reisnerb411b362009-09-25 16:07:19 -07001267 };
1268
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001269 pwo = connection->write_ordering;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001270 wo = min(pwo, wo);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001271 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001272 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1273 struct drbd_device *device = peer_device->device;
1274
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001275 if (!get_ldev_if_state(device, D_ATTACHING))
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001276 continue;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001277 dc = rcu_dereference(device->ldev->disk_conf);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001278
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001279 if (wo == WO_bdev_flush && !dc->disk_flushes)
1280 wo = WO_drain_io;
1281 if (wo == WO_drain_io && !dc->disk_drain)
1282 wo = WO_none;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001283 put_ldev(device);
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001284 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001285 rcu_read_unlock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001286 connection->write_ordering = wo;
1287 if (pwo != connection->write_ordering || wo == WO_bdev_flush)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001288 drbd_info(connection, "Method to ensure write ordering: %s\n", write_ordering_str[connection->write_ordering]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001289}
1290
1291/**
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001292 * drbd_submit_peer_request()
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001293 * @device: DRBD device.
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001294 * @peer_req: peer request
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001295 * @rw: flag field, see bio->bi_rw
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001296 *
1297 * May spread the pages to multiple bios,
1298 * depending on bio_add_page restrictions.
1299 *
1300 * Returns 0 if all bios have been submitted,
1301 * -ENOMEM if we could not allocate enough bios,
1302 * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a
1303 * single page to an empty bio (which should never happen and likely indicates
1304 * that the lower level IO stack is in some way broken). This has been observed
1305 * on certain Xen deployments.
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001306 */
1307/* TODO allocate from our own bio_set. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001308int drbd_submit_peer_request(struct drbd_device *device,
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001309 struct drbd_peer_request *peer_req,
1310 const unsigned rw, const int fault_type)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001311{
1312 struct bio *bios = NULL;
1313 struct bio *bio;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001314 struct page *page = peer_req->pages;
1315 sector_t sector = peer_req->i.sector;
1316 unsigned ds = peer_req->i.size;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001317 unsigned n_bios = 0;
1318 unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT;
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001319 int err = -ENOMEM;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001320
1321 /* In most cases, we will only need one bio. But in case the lower
1322 * level restrictions happen to be different at this offset on this
1323 * side than those of the sending peer, we may need to submit the
Lars Ellenberg9476f392011-02-23 17:02:01 +01001324 * request in more than one bio.
1325 *
1326 * Plain bio_alloc is good enough here, this is no DRBD internally
1327 * generated bio, but a bio allocated on behalf of the peer.
1328 */
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001329next_bio:
1330 bio = bio_alloc(GFP_NOIO, nr_pages);
1331 if (!bio) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001332 drbd_err(device, "submit_ee: Allocation of a bio failed\n");
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001333 goto fail;
1334 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001335 /* > peer_req->i.sector, unless this is the first bio */
Kent Overstreet4f024f32013-10-11 15:44:27 -07001336 bio->bi_iter.bi_sector = sector;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001337 bio->bi_bdev = device->ldev->backing_bdev;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001338 bio->bi_rw = rw;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001339 bio->bi_private = peer_req;
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001340 bio->bi_end_io = drbd_peer_request_endio;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001341
1342 bio->bi_next = bios;
1343 bios = bio;
1344 ++n_bios;
1345
1346 page_chain_for_each(page) {
1347 unsigned len = min_t(unsigned, ds, PAGE_SIZE);
1348 if (!bio_add_page(bio, page, len, 0)) {
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001349 /* A single page must always be possible!
1350 * But in case it fails anyways,
1351 * we deal with it, and complain (below). */
1352 if (bio->bi_vcnt == 0) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001353 drbd_err(device,
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001354 "bio_add_page failed for len=%u, "
1355 "bi_vcnt=0 (bi_sector=%llu)\n",
Kent Overstreet4f024f32013-10-11 15:44:27 -07001356 len, (uint64_t)bio->bi_iter.bi_sector);
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001357 err = -ENOSPC;
1358 goto fail;
1359 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001360 goto next_bio;
1361 }
1362 ds -= len;
1363 sector += len >> 9;
1364 --nr_pages;
1365 }
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001366 D_ASSERT(device, page == NULL);
1367 D_ASSERT(device, ds == 0);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001368
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001369 atomic_set(&peer_req->pending_bios, n_bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001370 do {
1371 bio = bios;
1372 bios = bios->bi_next;
1373 bio->bi_next = NULL;
1374
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001375 drbd_generic_make_request(device, fault_type, bio);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001376 } while (bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001377 return 0;
1378
1379fail:
1380 while (bios) {
1381 bio = bios;
1382 bios = bios->bi_next;
1383 bio_put(bio);
1384 }
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001385 return err;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001386}
1387
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001388static void drbd_remove_epoch_entry_interval(struct drbd_device *device,
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001389 struct drbd_peer_request *peer_req)
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001390{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001391 struct drbd_interval *i = &peer_req->i;
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001392
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001393 drbd_remove_interval(&device->write_requests, i);
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001394 drbd_clear_interval(i);
1395
Andreas Gruenbacher6c852be2011-02-04 15:38:52 +01001396 /* Wake up any processes waiting for this peer request to complete. */
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001397 if (i->waiting)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001398 wake_up(&device->misc_wait);
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001399}
1400
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001401static void conn_wait_active_ee_empty(struct drbd_connection *connection)
Philipp Reisner77fede52011-11-10 21:19:11 +01001402{
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001403 struct drbd_peer_device *peer_device;
Philipp Reisner77fede52011-11-10 21:19:11 +01001404 int vnr;
1405
1406 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001407 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1408 struct drbd_device *device = peer_device->device;
1409
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001410 kref_get(&device->kref);
Philipp Reisner77fede52011-11-10 21:19:11 +01001411 rcu_read_unlock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001412 drbd_wait_ee_list_empty(device, &device->active_ee);
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001413 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisner77fede52011-11-10 21:19:11 +01001414 rcu_read_lock();
1415 }
1416 rcu_read_unlock();
1417}
1418
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001419static struct drbd_peer_device *
1420conn_peer_device(struct drbd_connection *connection, int volume_number)
1421{
1422 return idr_find(&connection->peer_devices, volume_number);
1423}
1424
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001425static int receive_Barrier(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001426{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001427 int rv;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001428 struct p_barrier *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001429 struct drbd_epoch *epoch;
1430
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001431 /* FIXME these are unacked on connection,
1432 * not a specific (peer)device.
1433 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001434 connection->current_epoch->barrier_nr = p->barrier;
1435 connection->current_epoch->connection = connection;
1436 rv = drbd_may_finish_epoch(connection, connection->current_epoch, EV_GOT_BARRIER_NR);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001437
1438 /* P_BARRIER_ACK may imply that the corresponding extent is dropped from
1439 * the activity log, which means it would not be resynced in case the
1440 * R_PRIMARY crashes now.
1441 * Therefore we must send the barrier_ack after the barrier request was
1442 * completed. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001443 switch (connection->write_ordering) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001444 case WO_none:
1445 if (rv == FE_RECYCLED)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001446 return 0;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001447
1448 /* receiver context, in the writeout path of the other node.
1449 * avoid potential distributed deadlock */
1450 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1451 if (epoch)
1452 break;
1453 else
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001454 drbd_warn(connection, "Allocation of an epoch failed, slowing down\n");
Philipp Reisner2451fc32010-08-24 13:43:11 +02001455 /* Fall through */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001456
1457 case WO_bdev_flush:
1458 case WO_drain_io:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001459 conn_wait_active_ee_empty(connection);
1460 drbd_flush(connection);
Philipp Reisner2451fc32010-08-24 13:43:11 +02001461
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001462 if (atomic_read(&connection->current_epoch->epoch_size)) {
Philipp Reisner2451fc32010-08-24 13:43:11 +02001463 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1464 if (epoch)
1465 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001466 }
1467
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001468 return 0;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001469 default:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001470 drbd_err(connection, "Strangeness in connection->write_ordering %d\n", connection->write_ordering);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001471 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001472 }
1473
1474 epoch->flags = 0;
1475 atomic_set(&epoch->epoch_size, 0);
1476 atomic_set(&epoch->active, 0);
1477
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001478 spin_lock(&connection->epoch_lock);
1479 if (atomic_read(&connection->current_epoch->epoch_size)) {
1480 list_add(&epoch->list, &connection->current_epoch->list);
1481 connection->current_epoch = epoch;
1482 connection->epochs++;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001483 } else {
1484 /* The current_epoch got recycled while we allocated this one... */
1485 kfree(epoch);
1486 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001487 spin_unlock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001488
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001489 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001490}
1491
1492/* used from receive_RSDataReply (recv_resync_read)
1493 * and from receive_Data */
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +01001494static struct drbd_peer_request *
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001495read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +01001496 int data_size) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001497{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001498 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001499 const sector_t capacity = drbd_get_capacity(device->this_bdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001500 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001501 struct page *page;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001502 int dgs, ds, err;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001503 void *dig_in = peer_device->connection->int_dig_in;
1504 void *dig_vv = peer_device->connection->int_dig_vv;
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001505 unsigned long *data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001506
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001507 dgs = 0;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001508 if (peer_device->connection->peer_integrity_tfm) {
1509 dgs = crypto_hash_digestsize(peer_device->connection->peer_integrity_tfm);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001510 /*
1511 * FIXME: Receive the incoming digest into the receive buffer
1512 * here, together with its struct p_data?
1513 */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001514 err = drbd_recv_all_warn(peer_device->connection, dig_in, dgs);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001515 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001516 return NULL;
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001517 data_size -= dgs;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001518 }
1519
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01001520 if (!expect(IS_ALIGNED(data_size, 512)))
1521 return NULL;
1522 if (!expect(data_size <= DRBD_MAX_BIO_SIZE))
1523 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001524
Lars Ellenberg66660322010-04-06 12:15:04 +02001525 /* even though we trust out peer,
1526 * we sometimes have to double check. */
1527 if (sector + (data_size>>9) > capacity) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001528 drbd_err(device, "request from peer beyond end of local disk: "
Lars Ellenbergfdda6542011-01-24 15:11:01 +01001529 "capacity: %llus < sector: %llus + size: %u\n",
Lars Ellenberg66660322010-04-06 12:15:04 +02001530 (unsigned long long)capacity,
1531 (unsigned long long)sector, data_size);
1532 return NULL;
1533 }
1534
Philipp Reisnerb411b362009-09-25 16:07:19 -07001535 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
1536 * "criss-cross" setup, that might cause write-out on some other DRBD,
1537 * which in turn might block on the other node at this very place. */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001538 peer_req = drbd_alloc_peer_req(peer_device, id, sector, data_size, GFP_NOIO);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001539 if (!peer_req)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001540 return NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001541
Lars Ellenberga73ff322012-06-25 19:15:38 +02001542 if (!data_size)
Lars Ellenberg81a35372012-07-30 09:00:54 +02001543 return peer_req;
Lars Ellenberga73ff322012-06-25 19:15:38 +02001544
Philipp Reisnerb411b362009-09-25 16:07:19 -07001545 ds = data_size;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001546 page = peer_req->pages;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001547 page_chain_for_each(page) {
1548 unsigned len = min_t(int, ds, PAGE_SIZE);
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001549 data = kmap(page);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001550 err = drbd_recv_all_warn(peer_device->connection, data, len);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001551 if (drbd_insert_fault(device, DRBD_FAULT_RECEIVE)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001552 drbd_err(device, "Fault injection: Corrupting data on receive\n");
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001553 data[0] = data[0] ^ (unsigned long)-1;
1554 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001555 kunmap(page);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001556 if (err) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001557 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001558 return NULL;
1559 }
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001560 ds -= len;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001561 }
1562
1563 if (dgs) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001564 drbd_csum_ee(peer_device->connection->peer_integrity_tfm, peer_req, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001565 if (memcmp(dig_in, dig_vv, dgs)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001566 drbd_err(device, "Digest integrity check FAILED: %llus +%u\n",
Lars Ellenberg470be442010-11-10 10:36:52 +01001567 (unsigned long long)sector, data_size);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001568 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001569 return NULL;
1570 }
1571 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001572 device->recv_cnt += data_size>>9;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001573 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001574}
1575
1576/* drbd_drain_block() just takes a data block
1577 * out of the socket input buffer, and discards it.
1578 */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001579static int drbd_drain_block(struct drbd_peer_device *peer_device, int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001580{
1581 struct page *page;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001582 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001583 void *data;
1584
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001585 if (!data_size)
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001586 return 0;
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001587
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001588 page = drbd_alloc_pages(peer_device, 1, 1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001589
1590 data = kmap(page);
1591 while (data_size) {
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001592 unsigned int len = min_t(int, data_size, PAGE_SIZE);
1593
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001594 err = drbd_recv_all_warn(peer_device->connection, data, len);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001595 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001596 break;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001597 data_size -= len;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001598 }
1599 kunmap(page);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001600 drbd_free_pages(peer_device->device, page, 0);
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001601 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001602}
1603
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001604static int recv_dless_read(struct drbd_peer_device *peer_device, struct drbd_request *req,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001605 sector_t sector, int data_size)
1606{
Kent Overstreet79886132013-11-23 17:19:00 -08001607 struct bio_vec bvec;
1608 struct bvec_iter iter;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001609 struct bio *bio;
Kent Overstreet79886132013-11-23 17:19:00 -08001610 int dgs, err, expect;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001611 void *dig_in = peer_device->connection->int_dig_in;
1612 void *dig_vv = peer_device->connection->int_dig_vv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001613
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001614 dgs = 0;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001615 if (peer_device->connection->peer_integrity_tfm) {
1616 dgs = crypto_hash_digestsize(peer_device->connection->peer_integrity_tfm);
1617 err = drbd_recv_all_warn(peer_device->connection, dig_in, dgs);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001618 if (err)
1619 return err;
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001620 data_size -= dgs;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001621 }
1622
Philipp Reisnerb411b362009-09-25 16:07:19 -07001623 /* optimistically update recv_cnt. if receiving fails below,
1624 * we disconnect anyways, and counters will be reset. */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001625 peer_device->device->recv_cnt += data_size>>9;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001626
1627 bio = req->master_bio;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001628 D_ASSERT(peer_device->device, sector == bio->bi_iter.bi_sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001629
Kent Overstreet79886132013-11-23 17:19:00 -08001630 bio_for_each_segment(bvec, bio, iter) {
1631 void *mapped = kmap(bvec.bv_page) + bvec.bv_offset;
1632 expect = min_t(int, data_size, bvec.bv_len);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001633 err = drbd_recv_all_warn(peer_device->connection, mapped, expect);
Kent Overstreet79886132013-11-23 17:19:00 -08001634 kunmap(bvec.bv_page);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001635 if (err)
1636 return err;
1637 data_size -= expect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001638 }
1639
1640 if (dgs) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001641 drbd_csum_bio(peer_device->connection->peer_integrity_tfm, bio, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001642 if (memcmp(dig_in, dig_vv, dgs)) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001643 drbd_err(peer_device, "Digest integrity check FAILED. Broken NICs?\n");
Andreas Gruenbacher28284ce2011-03-16 17:54:02 +01001644 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001645 }
1646 }
1647
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001648 D_ASSERT(peer_device->device, data_size == 0);
Andreas Gruenbacher28284ce2011-03-16 17:54:02 +01001649 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001650}
1651
Andreas Gruenbachera990be42011-04-06 17:56:48 +02001652/*
1653 * e_end_resync_block() is called in asender context via
1654 * drbd_finish_peer_reqs().
1655 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001656static int e_end_resync_block(struct drbd_work *w, int unused)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001657{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001658 struct drbd_peer_request *peer_req =
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001659 container_of(w, struct drbd_peer_request, w);
1660 struct drbd_peer_device *peer_device = peer_req->peer_device;
1661 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001662 sector_t sector = peer_req->i.sector;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001663 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001664
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001665 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001666
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001667 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001668 drbd_set_in_sync(device, sector, peer_req->i.size);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001669 err = drbd_send_ack(peer_device, P_RS_WRITE_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001670 } else {
1671 /* Record failure to sync */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001672 drbd_rs_failed_io(device, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001673
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001674 err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001675 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001676 dec_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001677
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001678 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001679}
1680
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001681static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t sector,
1682 int data_size) __releases(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001683{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001684 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001685 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001686
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001687 peer_req = read_in_block(peer_device, ID_SYNCER, sector, data_size);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001688 if (!peer_req)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001689 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001690
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001691 dec_rs_pending(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001692
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001693 inc_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001694 /* corresponding dec_unacked() in e_end_resync_block()
1695 * respective _drbd_clear_done_ee */
1696
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001697 peer_req->w.cb = e_end_resync_block;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001698
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001699 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001700 list_add(&peer_req->w.list, &device->sync_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001701 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001702
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001703 atomic_add(data_size >> 9, &device->rs_sect_ev);
1704 if (drbd_submit_peer_request(device, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0)
Andreas Gruenbachere1c1b0f2011-03-16 17:58:27 +01001705 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001706
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001707 /* don't care for the reason here */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001708 drbd_err(device, "submit failed, triggering re-connect\n");
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001709 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001710 list_del(&peer_req->w.list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001711 spin_unlock_irq(&device->resource->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02001712
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001713 drbd_free_peer_req(device, peer_req);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001714fail:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001715 put_ldev(device);
Andreas Gruenbachere1c1b0f2011-03-16 17:58:27 +01001716 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001717}
1718
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001719static struct drbd_request *
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001720find_request(struct drbd_device *device, struct rb_root *root, u64 id,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001721 sector_t sector, bool missing_ok, const char *func)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001722{
1723 struct drbd_request *req;
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001724
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001725 /* Request object according to our peer */
1726 req = (struct drbd_request *)(unsigned long)id;
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001727 if (drbd_contains_interval(root, sector, &req->i) && req->i.local)
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001728 return req;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001729 if (!missing_ok) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001730 drbd_err(device, "%s: failed to find request 0x%lx, sector %llus\n", func,
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001731 (unsigned long)id, (unsigned long long)sector);
1732 }
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001733 return NULL;
1734}
1735
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001736static int receive_DataReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001737{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001738 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001739 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001740 struct drbd_request *req;
1741 sector_t sector;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001742 int err;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001743 struct p_data *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001744
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001745 peer_device = conn_peer_device(connection, pi->vnr);
1746 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001747 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001748 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001749
1750 sector = be64_to_cpu(p->sector);
1751
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001752 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001753 req = find_request(device, &device->read_requests, p->block_id, sector, false, __func__);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001754 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001755 if (unlikely(!req))
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001756 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001757
Bart Van Assche24c48302011-05-21 18:32:29 +02001758 /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid
Philipp Reisnerb411b362009-09-25 16:07:19 -07001759 * special casing it there for the various failure cases.
1760 * still no race with drbd_fail_pending_reads */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001761 err = recv_dless_read(peer_device, req, sector, pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001762 if (!err)
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001763 req_mod(req, DATA_RECEIVED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001764 /* else: nothing. handled from drbd_disconnect...
1765 * I don't think we may complete this just yet
1766 * in case we are "on-disconnect: freeze" */
1767
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001768 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001769}
1770
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001771static int receive_RSDataReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001772{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001773 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001774 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001775 sector_t sector;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001776 int err;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001777 struct p_data *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001778
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001779 peer_device = conn_peer_device(connection, pi->vnr);
1780 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001781 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001782 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001783
1784 sector = be64_to_cpu(p->sector);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001785 D_ASSERT(device, p->block_id == ID_SYNCER);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001786
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001787 if (get_ldev(device)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001788 /* data is submitted to disk within recv_resync_read.
1789 * corresponding put_ldev done below on error,
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001790 * or in drbd_peer_request_endio. */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001791 err = recv_resync_read(peer_device, sector, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001792 } else {
1793 if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001794 drbd_err(device, "Can not write resync data to local disk.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001795
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001796 err = drbd_drain_block(peer_device, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001797
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001798 drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001799 }
1800
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001801 atomic_add(pi->size >> 9, &device->rs_sect_in);
Philipp Reisner778f2712010-07-06 11:14:00 +02001802
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001803 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001804}
1805
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001806static void restart_conflicting_writes(struct drbd_device *device,
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001807 sector_t sector, int size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001808{
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001809 struct drbd_interval *i;
1810 struct drbd_request *req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001811
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001812 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001813 if (!i->local)
1814 continue;
1815 req = container_of(i, struct drbd_request, i);
1816 if (req->rq_state & RQ_LOCAL_PENDING ||
1817 !(req->rq_state & RQ_POSTPONED))
1818 continue;
Lars Ellenberg2312f0b32011-11-24 10:36:25 +01001819 /* as it is RQ_POSTPONED, this will cause it to
1820 * be queued on the retry workqueue. */
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02001821 __req_mod(req, CONFLICT_RESOLVED, NULL);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001822 }
1823}
1824
Andreas Gruenbachera990be42011-04-06 17:56:48 +02001825/*
1826 * e_end_block() is called in asender context via drbd_finish_peer_reqs().
Philipp Reisnerb411b362009-09-25 16:07:19 -07001827 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001828static int e_end_block(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001829{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001830 struct drbd_peer_request *peer_req =
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001831 container_of(w, struct drbd_peer_request, w);
1832 struct drbd_peer_device *peer_device = peer_req->peer_device;
1833 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001834 sector_t sector = peer_req->i.sector;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001835 int err = 0, pcmd;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001836
Philipp Reisner303d1442011-04-13 16:24:47 -07001837 if (peer_req->flags & EE_SEND_WRITE_ACK) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001838 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001839 pcmd = (device->state.conn >= C_SYNC_SOURCE &&
1840 device->state.conn <= C_PAUSED_SYNC_T &&
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001841 peer_req->flags & EE_MAY_SET_IN_SYNC) ?
Philipp Reisnerb411b362009-09-25 16:07:19 -07001842 P_RS_WRITE_ACK : P_WRITE_ACK;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001843 err = drbd_send_ack(peer_device, pcmd, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001844 if (pcmd == P_RS_WRITE_ACK)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001845 drbd_set_in_sync(device, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001846 } else {
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001847 err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001848 /* we expect it to be marked out of sync anyways...
1849 * maybe assert this? */
1850 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001851 dec_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001852 }
1853 /* we delete from the conflict detection hash _after_ we sent out the
1854 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */
Philipp Reisner302bdea2011-04-21 11:36:49 +02001855 if (peer_req->flags & EE_IN_INTERVAL_TREE) {
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001856 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001857 D_ASSERT(device, !drbd_interval_empty(&peer_req->i));
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001858 drbd_remove_epoch_entry_interval(device, peer_req);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001859 if (peer_req->flags & EE_RESTART_REQUESTS)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001860 restart_conflicting_writes(device, sector, peer_req->i.size);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001861 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +01001862 } else
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001863 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001864
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001865 drbd_may_finish_epoch(first_peer_device(device)->connection, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001866
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001867 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001868}
1869
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001870static int e_send_ack(struct drbd_work *w, enum drbd_packet ack)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001871{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001872 struct drbd_peer_request *peer_req =
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001873 container_of(w, struct drbd_peer_request, w);
1874 struct drbd_peer_device *peer_device = peer_req->peer_device;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001875 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001876
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001877 err = drbd_send_ack(peer_device, ack, peer_req);
1878 dec_unacked(peer_device->device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001879
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001880 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001881}
1882
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02001883static int e_send_superseded(struct drbd_work *w, int unused)
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001884{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001885 return e_send_ack(w, P_SUPERSEDED);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001886}
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001887
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001888static int e_send_retry_write(struct drbd_work *w, int unused)
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001889{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001890 struct drbd_peer_request *peer_req =
1891 container_of(w, struct drbd_peer_request, w);
1892 struct drbd_connection *connection = peer_req->peer_device->connection;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001893
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001894 return e_send_ack(w, connection->agreed_pro_version >= 100 ?
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02001895 P_RETRY_WRITE : P_SUPERSEDED);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001896}
1897
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001898static bool seq_greater(u32 a, u32 b)
1899{
1900 /*
1901 * We assume 32-bit wrap-around here.
1902 * For 24-bit wrap-around, we would have to shift:
1903 * a <<= 8; b <<= 8;
1904 */
1905 return (s32)a - (s32)b > 0;
1906}
1907
1908static u32 seq_max(u32 a, u32 b)
1909{
1910 return seq_greater(a, b) ? a : b;
1911}
1912
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001913static void update_peer_seq(struct drbd_peer_device *peer_device, unsigned int peer_seq)
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001914{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001915 struct drbd_device *device = peer_device->device;
Lars Ellenberg3c13b682011-02-23 16:10:01 +01001916 unsigned int newest_peer_seq;
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001917
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001918 if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001919 spin_lock(&device->peer_seq_lock);
1920 newest_peer_seq = seq_max(device->peer_seq, peer_seq);
1921 device->peer_seq = newest_peer_seq;
1922 spin_unlock(&device->peer_seq_lock);
1923 /* wake up only if we actually changed device->peer_seq */
Lars Ellenberg3c13b682011-02-23 16:10:01 +01001924 if (peer_seq == newest_peer_seq)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001925 wake_up(&device->seq_wait);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001926 }
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001927}
1928
Lars Ellenbergd93f6302012-03-26 15:49:13 +02001929static inline int overlaps(sector_t s1, int l1, sector_t s2, int l2)
1930{
1931 return !((s1 + (l1>>9) <= s2) || (s1 >= s2 + (l2>>9)));
1932}
1933
1934/* maybe change sync_ee into interval trees as well? */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001935static bool overlapping_resync_write(struct drbd_device *device, struct drbd_peer_request *peer_req)
Lars Ellenbergd93f6302012-03-26 15:49:13 +02001936{
1937 struct drbd_peer_request *rs_req;
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001938 bool rv = 0;
1939
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001940 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001941 list_for_each_entry(rs_req, &device->sync_ee, w.list) {
Lars Ellenbergd93f6302012-03-26 15:49:13 +02001942 if (overlaps(peer_req->i.sector, peer_req->i.size,
1943 rs_req->i.sector, rs_req->i.size)) {
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001944 rv = 1;
1945 break;
1946 }
1947 }
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001948 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001949
1950 return rv;
1951}
1952
Philipp Reisnerb411b362009-09-25 16:07:19 -07001953/* Called from receive_Data.
1954 * Synchronize packets on sock with packets on msock.
1955 *
1956 * This is here so even when a P_DATA packet traveling via sock overtook an Ack
1957 * packet traveling on msock, they are still processed in the order they have
1958 * been sent.
1959 *
1960 * Note: we don't care for Ack packets overtaking P_DATA packets.
1961 *
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001962 * In case packet_seq is larger than device->peer_seq number, there are
Philipp Reisnerb411b362009-09-25 16:07:19 -07001963 * outstanding packets on the msock. We wait for them to arrive.
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001964 * In case we are the logically next packet, we update device->peer_seq
Philipp Reisnerb411b362009-09-25 16:07:19 -07001965 * ourselves. Correctly handles 32bit wrap around.
1966 *
1967 * Assume we have a 10 GBit connection, that is about 1<<30 byte per second,
1968 * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds
1969 * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have
1970 * 1<<9 == 512 seconds aka ages for the 32bit wrap around...
1971 *
1972 * returns 0 if we may process the packet,
1973 * -ERESTARTSYS if we were interrupted (by disconnect signal). */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001974static int wait_for_and_update_peer_seq(struct drbd_peer_device *peer_device, const u32 peer_seq)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001975{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001976 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001977 DEFINE_WAIT(wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001978 long timeout;
Philipp Reisnerb874d232013-10-23 10:59:16 +02001979 int ret = 0, tp;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001980
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001981 if (!test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags))
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001982 return 0;
1983
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001984 spin_lock(&device->peer_seq_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001985 for (;;) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001986 if (!seq_greater(peer_seq - 1, device->peer_seq)) {
1987 device->peer_seq = seq_max(device->peer_seq, peer_seq);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001988 break;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001989 }
Philipp Reisnerb874d232013-10-23 10:59:16 +02001990
Philipp Reisnerb411b362009-09-25 16:07:19 -07001991 if (signal_pending(current)) {
1992 ret = -ERESTARTSYS;
1993 break;
1994 }
Philipp Reisnerb874d232013-10-23 10:59:16 +02001995
1996 rcu_read_lock();
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001997 tp = rcu_dereference(first_peer_device(device)->connection->net_conf)->two_primaries;
Philipp Reisnerb874d232013-10-23 10:59:16 +02001998 rcu_read_unlock();
1999
2000 if (!tp)
2001 break;
2002
2003 /* Only need to wait if two_primaries is enabled */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002004 prepare_to_wait(&device->seq_wait, &wait, TASK_INTERRUPTIBLE);
2005 spin_unlock(&device->peer_seq_lock);
Philipp Reisner44ed1672011-04-19 17:10:19 +02002006 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002007 timeout = rcu_dereference(peer_device->connection->net_conf)->ping_timeo*HZ/10;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002008 rcu_read_unlock();
Andreas Gruenbacher71b1c1e2011-03-01 15:40:43 +01002009 timeout = schedule_timeout(timeout);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002010 spin_lock(&device->peer_seq_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002011 if (!timeout) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002012 ret = -ETIMEDOUT;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002013 drbd_err(device, "Timed out waiting for missing ack packets; disconnecting\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002014 break;
2015 }
2016 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002017 spin_unlock(&device->peer_seq_lock);
2018 finish_wait(&device->seq_wait, &wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002019 return ret;
2020}
2021
Lars Ellenberg688593c2010-11-17 22:25:03 +01002022/* see also bio_flags_to_wire()
2023 * DRBD_REQ_*, because we need to semantically map the flags to data packet
2024 * flags and back. We may replicate to other kernel versions. */
Andreas Gruenbacher81f0ffd2011-08-30 16:22:33 +02002025static unsigned long wire_flags_to_bio(u32 dpf)
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02002026{
Lars Ellenberg688593c2010-11-17 22:25:03 +01002027 return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
2028 (dpf & DP_FUA ? REQ_FUA : 0) |
2029 (dpf & DP_FLUSH ? REQ_FLUSH : 0) |
2030 (dpf & DP_DISCARD ? REQ_DISCARD : 0);
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02002031}
2032
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002033static void fail_postponed_requests(struct drbd_device *device, sector_t sector,
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002034 unsigned int size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002035{
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002036 struct drbd_interval *i;
2037
2038 repeat:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002039 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002040 struct drbd_request *req;
2041 struct bio_and_error m;
2042
2043 if (!i->local)
2044 continue;
2045 req = container_of(i, struct drbd_request, i);
2046 if (!(req->rq_state & RQ_POSTPONED))
2047 continue;
2048 req->rq_state &= ~RQ_POSTPONED;
2049 __req_mod(req, NEG_ACKED, &m);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002050 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002051 if (m.bio)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002052 complete_master_bio(device, &m);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002053 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002054 goto repeat;
2055 }
2056}
2057
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002058static int handle_write_conflicts(struct drbd_device *device,
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002059 struct drbd_peer_request *peer_req)
2060{
Andreas Gruenbachere33b32d2011-08-30 15:38:04 +02002061 struct drbd_connection *connection = peer_req->peer_device->connection;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002062 bool resolve_conflicts = test_bit(RESOLVE_CONFLICTS, &connection->flags);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002063 sector_t sector = peer_req->i.sector;
2064 const unsigned int size = peer_req->i.size;
2065 struct drbd_interval *i;
2066 bool equal;
2067 int err;
2068
2069 /*
2070 * Inserting the peer request into the write_requests tree will prevent
2071 * new conflicting local requests from being added.
2072 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002073 drbd_insert_interval(&device->write_requests, &peer_req->i);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002074
2075 repeat:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002076 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002077 if (i == &peer_req->i)
2078 continue;
2079
2080 if (!i->local) {
2081 /*
2082 * Our peer has sent a conflicting remote request; this
2083 * should not happen in a two-node setup. Wait for the
2084 * earlier peer request to complete.
2085 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002086 err = drbd_wait_misc(device, i);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002087 if (err)
2088 goto out;
2089 goto repeat;
2090 }
2091
2092 equal = i->sector == sector && i->size == size;
2093 if (resolve_conflicts) {
2094 /*
2095 * If the peer request is fully contained within the
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002096 * overlapping request, it can be considered overwritten
2097 * and thus superseded; otherwise, it will be retried
2098 * once all overlapping requests have completed.
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002099 */
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002100 bool superseded = i->sector <= sector && i->sector +
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002101 (i->size >> 9) >= sector + (size >> 9);
2102
2103 if (!equal)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002104 drbd_alert(device, "Concurrent writes detected: "
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002105 "local=%llus +%u, remote=%llus +%u, "
2106 "assuming %s came first\n",
2107 (unsigned long long)i->sector, i->size,
2108 (unsigned long long)sector, size,
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002109 superseded ? "local" : "remote");
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002110
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002111 inc_unacked(device);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002112 peer_req->w.cb = superseded ? e_send_superseded :
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002113 e_send_retry_write;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002114 list_add_tail(&peer_req->w.list, &device->done_ee);
Andreas Gruenbachere33b32d2011-08-30 15:38:04 +02002115 wake_asender(connection);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002116
2117 err = -ENOENT;
2118 goto out;
2119 } else {
2120 struct drbd_request *req =
2121 container_of(i, struct drbd_request, i);
2122
2123 if (!equal)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002124 drbd_alert(device, "Concurrent writes detected: "
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002125 "local=%llus +%u, remote=%llus +%u\n",
2126 (unsigned long long)i->sector, i->size,
2127 (unsigned long long)sector, size);
2128
2129 if (req->rq_state & RQ_LOCAL_PENDING ||
2130 !(req->rq_state & RQ_POSTPONED)) {
2131 /*
2132 * Wait for the node with the discard flag to
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002133 * decide if this request has been superseded
2134 * or needs to be retried.
2135 * Requests that have been superseded will
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002136 * disappear from the write_requests tree.
2137 *
2138 * In addition, wait for the conflicting
2139 * request to finish locally before submitting
2140 * the conflicting peer request.
2141 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002142 err = drbd_wait_misc(device, &req->i);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002143 if (err) {
Andreas Gruenbachere33b32d2011-08-30 15:38:04 +02002144 _conn_request_state(connection, NS(conn, C_TIMEOUT), CS_HARD);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002145 fail_postponed_requests(device, sector, size);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002146 goto out;
2147 }
2148 goto repeat;
2149 }
2150 /*
2151 * Remember to restart the conflicting requests after
2152 * the new peer request has completed.
2153 */
2154 peer_req->flags |= EE_RESTART_REQUESTS;
2155 }
2156 }
2157 err = 0;
2158
2159 out:
2160 if (err)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002161 drbd_remove_epoch_entry_interval(device, peer_req);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002162 return err;
2163}
2164
Philipp Reisnerb411b362009-09-25 16:07:19 -07002165/* mirrored write */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002166static int receive_Data(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002167{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002168 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002169 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002170 sector_t sector;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002171 struct drbd_peer_request *peer_req;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02002172 struct p_data *p = pi->data;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002173 u32 peer_seq = be32_to_cpu(p->seq_num);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002174 int rw = WRITE;
2175 u32 dp_flags;
Philipp Reisner302bdea2011-04-21 11:36:49 +02002176 int err, tp;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002177
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002178 peer_device = conn_peer_device(connection, pi->vnr);
2179 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002180 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002181 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002182
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002183 if (!get_ldev(device)) {
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002184 int err2;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002185
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002186 err = wait_for_and_update_peer_seq(peer_device, peer_seq);
2187 drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002188 atomic_inc(&connection->current_epoch->epoch_size);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002189 err2 = drbd_drain_block(peer_device, pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002190 if (!err)
2191 err = err2;
2192 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002193 }
2194
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01002195 /*
2196 * Corresponding put_ldev done either below (on various errors), or in
2197 * drbd_peer_request_endio, if we successfully submit the data at the
2198 * end of this function.
2199 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002200
2201 sector = be64_to_cpu(p->sector);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002202 peer_req = read_in_block(peer_device, p->block_id, sector, pi->size);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002203 if (!peer_req) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002204 put_ldev(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002205 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002206 }
2207
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002208 peer_req->w.cb = e_end_block;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002209
Lars Ellenberg688593c2010-11-17 22:25:03 +01002210 dp_flags = be32_to_cpu(p->dp_flags);
Andreas Gruenbacher81f0ffd2011-08-30 16:22:33 +02002211 rw |= wire_flags_to_bio(dp_flags);
Lars Ellenberg81a35372012-07-30 09:00:54 +02002212 if (peer_req->pages == NULL) {
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02002213 D_ASSERT(device, peer_req->i.size == 0);
2214 D_ASSERT(device, dp_flags & DP_FLUSH);
Lars Ellenberga73ff322012-06-25 19:15:38 +02002215 }
Lars Ellenberg688593c2010-11-17 22:25:03 +01002216
2217 if (dp_flags & DP_MAY_SET_IN_SYNC)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002218 peer_req->flags |= EE_MAY_SET_IN_SYNC;
Lars Ellenberg688593c2010-11-17 22:25:03 +01002219
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002220 spin_lock(&connection->epoch_lock);
2221 peer_req->epoch = connection->current_epoch;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002222 atomic_inc(&peer_req->epoch->epoch_size);
2223 atomic_inc(&peer_req->epoch->active);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002224 spin_unlock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002225
Philipp Reisner302bdea2011-04-21 11:36:49 +02002226 rcu_read_lock();
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002227 tp = rcu_dereference(peer_device->connection->net_conf)->two_primaries;
Philipp Reisner302bdea2011-04-21 11:36:49 +02002228 rcu_read_unlock();
2229 if (tp) {
2230 peer_req->flags |= EE_IN_INTERVAL_TREE;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002231 err = wait_for_and_update_peer_seq(peer_device, peer_seq);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002232 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002233 goto out_interrupted;
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002234 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002235 err = handle_write_conflicts(device, peer_req);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002236 if (err) {
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002237 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002238 if (err == -ENOENT) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002239 put_ldev(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002240 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002241 }
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002242 goto out_interrupted;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002243 }
Philipp Reisnerb874d232013-10-23 10:59:16 +02002244 } else {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002245 update_peer_seq(peer_device, peer_seq);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002246 spin_lock_irq(&device->resource->req_lock);
Philipp Reisnerb874d232013-10-23 10:59:16 +02002247 }
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002248 list_add(&peer_req->w.list, &device->active_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002249 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002250
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002251 if (device->state.conn == C_SYNC_TARGET)
2252 wait_event(device->ee_wait, !overlapping_resync_write(device, peer_req));
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01002253
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002254 if (peer_device->connection->agreed_pro_version < 100) {
Philipp Reisner44ed1672011-04-19 17:10:19 +02002255 rcu_read_lock();
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002256 switch (rcu_dereference(peer_device->connection->net_conf)->wire_protocol) {
Philipp Reisner303d1442011-04-13 16:24:47 -07002257 case DRBD_PROT_C:
2258 dp_flags |= DP_SEND_WRITE_ACK;
2259 break;
2260 case DRBD_PROT_B:
2261 dp_flags |= DP_SEND_RECEIVE_ACK;
2262 break;
2263 }
Philipp Reisner44ed1672011-04-19 17:10:19 +02002264 rcu_read_unlock();
Philipp Reisner303d1442011-04-13 16:24:47 -07002265 }
2266
2267 if (dp_flags & DP_SEND_WRITE_ACK) {
2268 peer_req->flags |= EE_SEND_WRITE_ACK;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002269 inc_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002270 /* corresponding dec_unacked() in e_end_block()
2271 * respective _drbd_clear_done_ee */
Philipp Reisner303d1442011-04-13 16:24:47 -07002272 }
2273
2274 if (dp_flags & DP_SEND_RECEIVE_ACK) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002275 /* I really don't like it that the receiver thread
2276 * sends on the msock, but anyways */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002277 drbd_send_ack(first_peer_device(device), P_RECV_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002278 }
2279
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002280 if (device->state.pdsk < D_INCONSISTENT) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002281 /* In case we have the only disk of the cluster, */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002282 drbd_set_out_of_sync(device, peer_req->i.sector, peer_req->i.size);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002283 peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
2284 peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002285 drbd_al_begin_io(device, &peer_req->i, true);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002286 }
2287
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002288 err = drbd_submit_peer_request(device, peer_req, rw, DRBD_FAULT_DT_WR);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002289 if (!err)
2290 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002291
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002292 /* don't care for the reason here */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002293 drbd_err(device, "submit failed, triggering re-connect\n");
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002294 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002295 list_del(&peer_req->w.list);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002296 drbd_remove_epoch_entry_interval(device, peer_req);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002297 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002298 if (peer_req->flags & EE_CALL_AL_COMPLETE_IO)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002299 drbd_al_complete_io(device, &peer_req->i);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002300
Philipp Reisnerb411b362009-09-25 16:07:19 -07002301out_interrupted:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002302 drbd_may_finish_epoch(connection, peer_req->epoch, EV_PUT + EV_CLEANUP);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002303 put_ldev(device);
2304 drbd_free_peer_req(device, peer_req);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002305 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002306}
2307
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002308/* We may throttle resync, if the lower device seems to be busy,
2309 * and current sync rate is above c_min_rate.
2310 *
2311 * To decide whether or not the lower device is busy, we use a scheme similar
2312 * to MD RAID is_mddev_idle(): if the partition stats reveal "significant"
2313 * (more than 64 sectors) of activity we cannot account for with our own resync
2314 * activity, it obviously is "busy".
2315 *
2316 * The current sync rate used here uses only the most recent two step marks,
2317 * to have a short time average so we can react faster.
2318 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002319int drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002320{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002321 struct gendisk *disk = device->ldev->backing_bdev->bd_contains->bd_disk;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002322 unsigned long db, dt, dbdt;
Philipp Reisnere3555d82010-11-07 15:56:29 +01002323 struct lc_element *tmp;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002324 int curr_events;
2325 int throttle = 0;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002326 unsigned int c_min_rate;
2327
2328 rcu_read_lock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002329 c_min_rate = rcu_dereference(device->ldev->disk_conf)->c_min_rate;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002330 rcu_read_unlock();
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002331
2332 /* feature disabled? */
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002333 if (c_min_rate == 0)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002334 return 0;
2335
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002336 spin_lock_irq(&device->al_lock);
2337 tmp = lc_find(device->resync, BM_SECT_TO_EXT(sector));
Philipp Reisnere3555d82010-11-07 15:56:29 +01002338 if (tmp) {
2339 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
2340 if (test_bit(BME_PRIORITY, &bm_ext->flags)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002341 spin_unlock_irq(&device->al_lock);
Philipp Reisnere3555d82010-11-07 15:56:29 +01002342 return 0;
2343 }
2344 /* Do not slow down if app IO is already waiting for this extent */
2345 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002346 spin_unlock_irq(&device->al_lock);
Philipp Reisnere3555d82010-11-07 15:56:29 +01002347
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002348 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
2349 (int)part_stat_read(&disk->part0, sectors[1]) -
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002350 atomic_read(&device->rs_sect_ev);
Philipp Reisnere3555d82010-11-07 15:56:29 +01002351
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002352 if (!device->rs_last_events || curr_events - device->rs_last_events > 64) {
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002353 unsigned long rs_left;
2354 int i;
2355
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002356 device->rs_last_events = curr_events;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002357
2358 /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
2359 * approx. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002360 i = (device->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
Lars Ellenberg2649f082010-11-05 10:05:47 +01002361
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002362 if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T)
2363 rs_left = device->ov_left;
Lars Ellenberg2649f082010-11-05 10:05:47 +01002364 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002365 rs_left = drbd_bm_total_weight(device) - device->rs_failed;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002366
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002367 dt = ((long)jiffies - (long)device->rs_mark_time[i]) / HZ;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002368 if (!dt)
2369 dt++;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002370 db = device->rs_mark_left[i] - rs_left;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002371 dbdt = Bit2KB(db/dt);
2372
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002373 if (dbdt > c_min_rate)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002374 throttle = 1;
2375 }
2376 return throttle;
2377}
2378
2379
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002380static int receive_DataRequest(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002381{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002382 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002383 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002384 sector_t sector;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002385 sector_t capacity;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002386 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002387 struct digest_info *di = NULL;
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002388 int size, verb;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002389 unsigned int fault_type;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02002390 struct p_block_req *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002391
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002392 peer_device = conn_peer_device(connection, pi->vnr);
2393 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002394 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002395 device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002396 capacity = drbd_get_capacity(device->this_bdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002397
2398 sector = be64_to_cpu(p->sector);
2399 size = be32_to_cpu(p->blksize);
2400
Andreas Gruenbacherc670a392011-02-21 12:41:39 +01002401 if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002402 drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
Philipp Reisnerb411b362009-09-25 16:07:19 -07002403 (unsigned long long)sector, size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002404 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002405 }
2406 if (sector + (size>>9) > capacity) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002407 drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
Philipp Reisnerb411b362009-09-25 16:07:19 -07002408 (unsigned long long)sector, size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002409 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002410 }
2411
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002412 if (!get_ldev_if_state(device, D_UP_TO_DATE)) {
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002413 verb = 1;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002414 switch (pi->cmd) {
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002415 case P_DATA_REQUEST:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002416 drbd_send_ack_rp(peer_device, P_NEG_DREPLY, p);
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002417 break;
2418 case P_RS_DATA_REQUEST:
2419 case P_CSUM_RS_REQUEST:
2420 case P_OV_REQUEST:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002421 drbd_send_ack_rp(peer_device, P_NEG_RS_DREPLY , p);
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002422 break;
2423 case P_OV_REPLY:
2424 verb = 0;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002425 dec_rs_pending(device);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002426 drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size, ID_IN_SYNC);
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002427 break;
2428 default:
Andreas Gruenbacher49ba9b12011-03-25 00:35:45 +01002429 BUG();
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002430 }
2431 if (verb && __ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002432 drbd_err(device, "Can not satisfy peer's read request, "
Philipp Reisnerb411b362009-09-25 16:07:19 -07002433 "no local data.\n");
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002434
Lars Ellenberga821cc42010-09-06 12:31:37 +02002435 /* drain possibly payload */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002436 return drbd_drain_block(peer_device, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002437 }
2438
2439 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
2440 * "criss-cross" setup, that might cause write-out on some other DRBD,
2441 * which in turn might block on the other node at this very place. */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002442 peer_req = drbd_alloc_peer_req(peer_device, p->block_id, sector, size, GFP_NOIO);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002443 if (!peer_req) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002444 put_ldev(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002445 return -ENOMEM;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002446 }
2447
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002448 switch (pi->cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002449 case P_DATA_REQUEST:
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002450 peer_req->w.cb = w_e_end_data_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002451 fault_type = DRBD_FAULT_DT_RD;
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002452 /* application IO, don't drbd_rs_begin_io */
2453 goto submit;
2454
Philipp Reisnerb411b362009-09-25 16:07:19 -07002455 case P_RS_DATA_REQUEST:
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002456 peer_req->w.cb = w_e_end_rsdata_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002457 fault_type = DRBD_FAULT_RS_RD;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002458 /* used in the sector offset progress display */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002459 device->bm_resync_fo = BM_SECT_TO_BIT(sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002460 break;
2461
2462 case P_OV_REPLY:
2463 case P_CSUM_RS_REQUEST:
2464 fault_type = DRBD_FAULT_RS_RD;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002465 di = kmalloc(sizeof(*di) + pi->size, GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002466 if (!di)
2467 goto out_free_e;
2468
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002469 di->digest_size = pi->size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002470 di->digest = (((char *)di)+sizeof(struct digest_info));
2471
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002472 peer_req->digest = di;
2473 peer_req->flags |= EE_HAS_DIGEST;
Lars Ellenbergc36c3ce2010-08-11 20:42:55 +02002474
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002475 if (drbd_recv_all(peer_device->connection, di->digest, pi->size))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002476 goto out_free_e;
2477
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002478 if (pi->cmd == P_CSUM_RS_REQUEST) {
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002479 D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002480 peer_req->w.cb = w_e_end_csum_rs_req;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002481 /* used in the sector offset progress display */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002482 device->bm_resync_fo = BM_SECT_TO_BIT(sector);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002483 } else if (pi->cmd == P_OV_REPLY) {
Lars Ellenberg2649f082010-11-05 10:05:47 +01002484 /* track progress, we may need to throttle */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002485 atomic_add(size >> 9, &device->rs_sect_in);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002486 peer_req->w.cb = w_e_end_ov_reply;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002487 dec_rs_pending(device);
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002488 /* drbd_rs_begin_io done when we sent this request,
2489 * but accounting still needs to be done. */
2490 goto submit_for_resync;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002491 }
2492 break;
2493
2494 case P_OV_REQUEST:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002495 if (device->ov_start_sector == ~(sector_t)0 &&
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002496 peer_device->connection->agreed_pro_version >= 90) {
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002497 unsigned long now = jiffies;
2498 int i;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002499 device->ov_start_sector = sector;
2500 device->ov_position = sector;
2501 device->ov_left = drbd_bm_bits(device) - BM_SECT_TO_BIT(sector);
2502 device->rs_total = device->ov_left;
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002503 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002504 device->rs_mark_left[i] = device->ov_left;
2505 device->rs_mark_time[i] = now;
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002506 }
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002507 drbd_info(device, "Online Verify start sector: %llu\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07002508 (unsigned long long)sector);
2509 }
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002510 peer_req->w.cb = w_e_end_ov_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002511 fault_type = DRBD_FAULT_RS_RD;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002512 break;
2513
Philipp Reisnerb411b362009-09-25 16:07:19 -07002514 default:
Andreas Gruenbacher49ba9b12011-03-25 00:35:45 +01002515 BUG();
Philipp Reisnerb411b362009-09-25 16:07:19 -07002516 }
2517
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002518 /* Throttle, drbd_rs_begin_io and submit should become asynchronous
2519 * wrt the receiver, but it is not as straightforward as it may seem.
2520 * Various places in the resync start and stop logic assume resync
2521 * requests are processed in order, requeuing this on the worker thread
2522 * introduces a bunch of new code for synchronization between threads.
2523 *
2524 * Unlimited throttling before drbd_rs_begin_io may stall the resync
2525 * "forever", throttling after drbd_rs_begin_io will lock that extent
2526 * for application writes for the same time. For now, just throttle
2527 * here, where the rest of the code expects the receiver to sleep for
2528 * a while, anyways.
2529 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002530
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002531 /* Throttle before drbd_rs_begin_io, as that locks out application IO;
2532 * this defers syncer requests for some time, before letting at least
2533 * on request through. The resync controller on the receiving side
2534 * will adapt to the incoming rate accordingly.
2535 *
2536 * We cannot throttle here if remote is Primary/SyncTarget:
2537 * we would also throttle its application reads.
2538 * In that case, throttling is done on the SyncTarget only.
2539 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002540 if (device->state.peer != R_PRIMARY && drbd_rs_should_slow_down(device, sector))
Philipp Reisnere3555d82010-11-07 15:56:29 +01002541 schedule_timeout_uninterruptible(HZ/10);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002542 if (drbd_rs_begin_io(device, sector))
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002543 goto out_free_e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002544
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002545submit_for_resync:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002546 atomic_add(size >> 9, &device->rs_sect_ev);
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002547
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002548submit:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002549 inc_unacked(device);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002550 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002551 list_add_tail(&peer_req->w.list, &device->read_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002552 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002553
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002554 if (drbd_submit_peer_request(device, peer_req, READ, fault_type) == 0)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002555 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002556
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002557 /* don't care for the reason here */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002558 drbd_err(device, "submit failed, triggering re-connect\n");
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002559 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002560 list_del(&peer_req->w.list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002561 spin_unlock_irq(&device->resource->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002562 /* no drbd_rs_complete_io(), we are dropping the connection anyways */
2563
Philipp Reisnerb411b362009-09-25 16:07:19 -07002564out_free_e:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002565 put_ldev(device);
2566 drbd_free_peer_req(device, peer_req);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002567 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002568}
2569
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002570/**
2571 * drbd_asb_recover_0p - Recover after split-brain with no remaining primaries
2572 */
2573static int drbd_asb_recover_0p(struct drbd_peer_device *peer_device) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002574{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002575 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002576 int self, peer, rv = -100;
2577 unsigned long ch_self, ch_peer;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002578 enum drbd_after_sb_p after_sb_0p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002579
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002580 self = device->ldev->md.uuid[UI_BITMAP] & 1;
2581 peer = device->p_uuid[UI_BITMAP] & 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002582
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002583 ch_peer = device->p_uuid[UI_SIZE];
2584 ch_self = device->comm_bm_set;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002585
Philipp Reisner44ed1672011-04-19 17:10:19 +02002586 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002587 after_sb_0p = rcu_dereference(peer_device->connection->net_conf)->after_sb_0p;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002588 rcu_read_unlock();
2589 switch (after_sb_0p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002590 case ASB_CONSENSUS:
2591 case ASB_DISCARD_SECONDARY:
2592 case ASB_CALL_HELPER:
Philipp Reisner44ed1672011-04-19 17:10:19 +02002593 case ASB_VIOLENTLY:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002594 drbd_err(device, "Configuration error.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002595 break;
2596 case ASB_DISCONNECT:
2597 break;
2598 case ASB_DISCARD_YOUNGER_PRI:
2599 if (self == 0 && peer == 1) {
2600 rv = -1;
2601 break;
2602 }
2603 if (self == 1 && peer == 0) {
2604 rv = 1;
2605 break;
2606 }
2607 /* Else fall through to one of the other strategies... */
2608 case ASB_DISCARD_OLDER_PRI:
2609 if (self == 0 && peer == 1) {
2610 rv = 1;
2611 break;
2612 }
2613 if (self == 1 && peer == 0) {
2614 rv = -1;
2615 break;
2616 }
2617 /* Else fall through to one of the other strategies... */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002618 drbd_warn(device, "Discard younger/older primary did not find a decision\n"
Philipp Reisnerb411b362009-09-25 16:07:19 -07002619 "Using discard-least-changes instead\n");
2620 case ASB_DISCARD_ZERO_CHG:
2621 if (ch_peer == 0 && ch_self == 0) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002622 rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002623 ? -1 : 1;
2624 break;
2625 } else {
2626 if (ch_peer == 0) { rv = 1; break; }
2627 if (ch_self == 0) { rv = -1; break; }
2628 }
Philipp Reisner44ed1672011-04-19 17:10:19 +02002629 if (after_sb_0p == ASB_DISCARD_ZERO_CHG)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002630 break;
2631 case ASB_DISCARD_LEAST_CHG:
2632 if (ch_self < ch_peer)
2633 rv = -1;
2634 else if (ch_self > ch_peer)
2635 rv = 1;
2636 else /* ( ch_self == ch_peer ) */
2637 /* Well, then use something else. */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002638 rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002639 ? -1 : 1;
2640 break;
2641 case ASB_DISCARD_LOCAL:
2642 rv = -1;
2643 break;
2644 case ASB_DISCARD_REMOTE:
2645 rv = 1;
2646 }
2647
2648 return rv;
2649}
2650
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002651/**
2652 * drbd_asb_recover_1p - Recover after split-brain with one remaining primary
2653 */
2654static int drbd_asb_recover_1p(struct drbd_peer_device *peer_device) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002655{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002656 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002657 int hg, rv = -100;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002658 enum drbd_after_sb_p after_sb_1p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002659
Philipp Reisner44ed1672011-04-19 17:10:19 +02002660 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002661 after_sb_1p = rcu_dereference(peer_device->connection->net_conf)->after_sb_1p;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002662 rcu_read_unlock();
2663 switch (after_sb_1p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002664 case ASB_DISCARD_YOUNGER_PRI:
2665 case ASB_DISCARD_OLDER_PRI:
2666 case ASB_DISCARD_LEAST_CHG:
2667 case ASB_DISCARD_LOCAL:
2668 case ASB_DISCARD_REMOTE:
Philipp Reisner44ed1672011-04-19 17:10:19 +02002669 case ASB_DISCARD_ZERO_CHG:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002670 drbd_err(device, "Configuration error.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002671 break;
2672 case ASB_DISCONNECT:
2673 break;
2674 case ASB_CONSENSUS:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002675 hg = drbd_asb_recover_0p(peer_device);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002676 if (hg == -1 && device->state.role == R_SECONDARY)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002677 rv = hg;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002678 if (hg == 1 && device->state.role == R_PRIMARY)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002679 rv = hg;
2680 break;
2681 case ASB_VIOLENTLY:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002682 rv = drbd_asb_recover_0p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002683 break;
2684 case ASB_DISCARD_SECONDARY:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002685 return device->state.role == R_PRIMARY ? 1 : -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002686 case ASB_CALL_HELPER:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002687 hg = drbd_asb_recover_0p(peer_device);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002688 if (hg == -1 && device->state.role == R_PRIMARY) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002689 enum drbd_state_rv rv2;
2690
Philipp Reisnerb411b362009-09-25 16:07:19 -07002691 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2692 * we might be here in C_WF_REPORT_PARAMS which is transient.
2693 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002694 rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002695 if (rv2 != SS_SUCCESS) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002696 drbd_khelper(device, "pri-lost-after-sb");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002697 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002698 drbd_warn(device, "Successfully gave up primary role.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002699 rv = hg;
2700 }
2701 } else
2702 rv = hg;
2703 }
2704
2705 return rv;
2706}
2707
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002708/**
2709 * drbd_asb_recover_2p - Recover after split-brain with two remaining primaries
2710 */
2711static int drbd_asb_recover_2p(struct drbd_peer_device *peer_device) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002712{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002713 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002714 int hg, rv = -100;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002715 enum drbd_after_sb_p after_sb_2p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002716
Philipp Reisner44ed1672011-04-19 17:10:19 +02002717 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002718 after_sb_2p = rcu_dereference(peer_device->connection->net_conf)->after_sb_2p;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002719 rcu_read_unlock();
2720 switch (after_sb_2p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002721 case ASB_DISCARD_YOUNGER_PRI:
2722 case ASB_DISCARD_OLDER_PRI:
2723 case ASB_DISCARD_LEAST_CHG:
2724 case ASB_DISCARD_LOCAL:
2725 case ASB_DISCARD_REMOTE:
2726 case ASB_CONSENSUS:
2727 case ASB_DISCARD_SECONDARY:
Philipp Reisner44ed1672011-04-19 17:10:19 +02002728 case ASB_DISCARD_ZERO_CHG:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002729 drbd_err(device, "Configuration error.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002730 break;
2731 case ASB_VIOLENTLY:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002732 rv = drbd_asb_recover_0p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002733 break;
2734 case ASB_DISCONNECT:
2735 break;
2736 case ASB_CALL_HELPER:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002737 hg = drbd_asb_recover_0p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002738 if (hg == -1) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002739 enum drbd_state_rv rv2;
2740
Philipp Reisnerb411b362009-09-25 16:07:19 -07002741 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2742 * we might be here in C_WF_REPORT_PARAMS which is transient.
2743 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002744 rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002745 if (rv2 != SS_SUCCESS) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002746 drbd_khelper(device, "pri-lost-after-sb");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002747 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002748 drbd_warn(device, "Successfully gave up primary role.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002749 rv = hg;
2750 }
2751 } else
2752 rv = hg;
2753 }
2754
2755 return rv;
2756}
2757
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002758static void drbd_uuid_dump(struct drbd_device *device, char *text, u64 *uuid,
Philipp Reisnerb411b362009-09-25 16:07:19 -07002759 u64 bits, u64 flags)
2760{
2761 if (!uuid) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002762 drbd_info(device, "%s uuid info vanished while I was looking!\n", text);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002763 return;
2764 }
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002765 drbd_info(device, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07002766 text,
2767 (unsigned long long)uuid[UI_CURRENT],
2768 (unsigned long long)uuid[UI_BITMAP],
2769 (unsigned long long)uuid[UI_HISTORY_START],
2770 (unsigned long long)uuid[UI_HISTORY_END],
2771 (unsigned long long)bits,
2772 (unsigned long long)flags);
2773}
2774
2775/*
2776 100 after split brain try auto recover
2777 2 C_SYNC_SOURCE set BitMap
2778 1 C_SYNC_SOURCE use BitMap
2779 0 no Sync
2780 -1 C_SYNC_TARGET use BitMap
2781 -2 C_SYNC_TARGET set BitMap
2782 -100 after split brain, disconnect
2783-1000 unrelated data
Philipp Reisner4a23f262011-01-11 17:42:17 +01002784-1091 requires proto 91
2785-1096 requires proto 96
Philipp Reisnerb411b362009-09-25 16:07:19 -07002786 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002787static int drbd_uuid_compare(struct drbd_device *device, int *rule_nr) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002788{
2789 u64 self, peer;
2790 int i, j;
2791
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002792 self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2793 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002794
2795 *rule_nr = 10;
2796 if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED)
2797 return 0;
2798
2799 *rule_nr = 20;
2800 if ((self == UUID_JUST_CREATED || self == (u64)0) &&
2801 peer != UUID_JUST_CREATED)
2802 return -2;
2803
2804 *rule_nr = 30;
2805 if (self != UUID_JUST_CREATED &&
2806 (peer == UUID_JUST_CREATED || peer == (u64)0))
2807 return 2;
2808
2809 if (self == peer) {
2810 int rct, dc; /* roles at crash time */
2811
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002812 if (device->p_uuid[UI_BITMAP] == (u64)0 && device->ldev->md.uuid[UI_BITMAP] != (u64)0) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002813
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002814 if (first_peer_device(device)->connection->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002815 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002816
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002817 if ((device->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
2818 (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002819 drbd_info(device, "was SyncSource, missed the resync finished event, corrected myself:\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002820 drbd_uuid_move_history(device);
2821 device->ldev->md.uuid[UI_HISTORY_START] = device->ldev->md.uuid[UI_BITMAP];
2822 device->ldev->md.uuid[UI_BITMAP] = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002823
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002824 drbd_uuid_dump(device, "self", device->ldev->md.uuid,
2825 device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002826 *rule_nr = 34;
2827 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002828 drbd_info(device, "was SyncSource (peer failed to write sync_uuid)\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002829 *rule_nr = 36;
2830 }
2831
2832 return 1;
2833 }
2834
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002835 if (device->ldev->md.uuid[UI_BITMAP] == (u64)0 && device->p_uuid[UI_BITMAP] != (u64)0) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002836
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002837 if (first_peer_device(device)->connection->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002838 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002839
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002840 if ((device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_BITMAP] & ~((u64)1)) &&
2841 (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002842 drbd_info(device, "was SyncTarget, peer missed the resync finished event, corrected peer:\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002843
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002844 device->p_uuid[UI_HISTORY_START + 1] = device->p_uuid[UI_HISTORY_START];
2845 device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_BITMAP];
2846 device->p_uuid[UI_BITMAP] = 0UL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002847
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002848 drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002849 *rule_nr = 35;
2850 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002851 drbd_info(device, "was SyncTarget (failed to write sync_uuid)\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002852 *rule_nr = 37;
2853 }
2854
2855 return -1;
2856 }
2857
2858 /* Common power [off|failure] */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002859 rct = (test_bit(CRASHED_PRIMARY, &device->flags) ? 1 : 0) +
2860 (device->p_uuid[UI_FLAGS] & 2);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002861 /* lowest bit is set when we were primary,
2862 * next bit (weight 2) is set when peer was primary */
2863 *rule_nr = 40;
2864
2865 switch (rct) {
2866 case 0: /* !self_pri && !peer_pri */ return 0;
2867 case 1: /* self_pri && !peer_pri */ return 1;
2868 case 2: /* !self_pri && peer_pri */ return -1;
2869 case 3: /* self_pri && peer_pri */
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002870 dc = test_bit(RESOLVE_CONFLICTS, &first_peer_device(device)->connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002871 return dc ? -1 : 1;
2872 }
2873 }
2874
2875 *rule_nr = 50;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002876 peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002877 if (self == peer)
2878 return -1;
2879
2880 *rule_nr = 51;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002881 peer = device->p_uuid[UI_HISTORY_START] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002882 if (self == peer) {
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002883 if (first_peer_device(device)->connection->agreed_pro_version < 96 ?
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002884 (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
2885 (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
2886 peer + UUID_NEW_BM_OFFSET == (device->p_uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002887 /* The last P_SYNC_UUID did not get though. Undo the last start of
2888 resync as sync source modifications of the peer's UUIDs. */
2889
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002890 if (first_peer_device(device)->connection->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002891 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002892
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002893 device->p_uuid[UI_BITMAP] = device->p_uuid[UI_HISTORY_START];
2894 device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_HISTORY_START + 1];
Philipp Reisner4a23f262011-01-11 17:42:17 +01002895
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002896 drbd_info(device, "Lost last syncUUID packet, corrected:\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002897 drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
Philipp Reisner4a23f262011-01-11 17:42:17 +01002898
Philipp Reisnerb411b362009-09-25 16:07:19 -07002899 return -1;
2900 }
2901 }
2902
2903 *rule_nr = 60;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002904 self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002905 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002906 peer = device->p_uuid[i] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002907 if (self == peer)
2908 return -2;
2909 }
2910
2911 *rule_nr = 70;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002912 self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2913 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002914 if (self == peer)
2915 return 1;
2916
2917 *rule_nr = 71;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002918 self = device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002919 if (self == peer) {
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002920 if (first_peer_device(device)->connection->agreed_pro_version < 96 ?
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002921 (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
2922 (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
2923 self + UUID_NEW_BM_OFFSET == (device->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002924 /* The last P_SYNC_UUID did not get though. Undo the last start of
2925 resync as sync source modifications of our UUIDs. */
2926
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002927 if (first_peer_device(device)->connection->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002928 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002929
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002930 __drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_HISTORY_START]);
2931 __drbd_uuid_set(device, UI_HISTORY_START, device->ldev->md.uuid[UI_HISTORY_START + 1]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002932
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002933 drbd_info(device, "Last syncUUID did not get through, corrected:\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002934 drbd_uuid_dump(device, "self", device->ldev->md.uuid,
2935 device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002936
2937 return 1;
2938 }
2939 }
2940
2941
2942 *rule_nr = 80;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002943 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002944 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002945 self = device->ldev->md.uuid[i] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002946 if (self == peer)
2947 return 2;
2948 }
2949
2950 *rule_nr = 90;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002951 self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2952 peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002953 if (self == peer && self != ((u64)0))
2954 return 100;
2955
2956 *rule_nr = 100;
2957 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002958 self = device->ldev->md.uuid[i] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002959 for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002960 peer = device->p_uuid[j] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002961 if (self == peer)
2962 return -100;
2963 }
2964 }
2965
2966 return -1000;
2967}
2968
2969/* drbd_sync_handshake() returns the new conn state on success, or
2970 CONN_MASK (-1) on failure.
2971 */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002972static enum drbd_conns drbd_sync_handshake(struct drbd_peer_device *peer_device,
2973 enum drbd_role peer_role,
Philipp Reisnerb411b362009-09-25 16:07:19 -07002974 enum drbd_disk_state peer_disk) __must_hold(local)
2975{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002976 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002977 enum drbd_conns rv = C_MASK;
2978 enum drbd_disk_state mydisk;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002979 struct net_conf *nc;
Andreas Gruenbacher6dff2902011-06-28 14:18:12 +02002980 int hg, rule_nr, rr_conflict, tentative;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002981
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002982 mydisk = device->state.disk;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002983 if (mydisk == D_NEGOTIATING)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002984 mydisk = device->new_state_tmp.disk;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002985
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002986 drbd_info(device, "drbd_sync_handshake:\n");
Philipp Reisner9f2247b2012-08-16 14:25:58 +02002987
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002988 spin_lock_irq(&device->ldev->md.uuid_lock);
2989 drbd_uuid_dump(device, "self", device->ldev->md.uuid, device->comm_bm_set, 0);
2990 drbd_uuid_dump(device, "peer", device->p_uuid,
2991 device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002992
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002993 hg = drbd_uuid_compare(device, &rule_nr);
2994 spin_unlock_irq(&device->ldev->md.uuid_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002995
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002996 drbd_info(device, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002997
2998 if (hg == -1000) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002999 drbd_alert(device, "Unrelated data, aborting!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003000 return C_MASK;
3001 }
Philipp Reisner4a23f262011-01-11 17:42:17 +01003002 if (hg < -1000) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003003 drbd_alert(device, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003004 return C_MASK;
3005 }
3006
3007 if ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) ||
3008 (peer_disk == D_INCONSISTENT && mydisk > D_INCONSISTENT)) {
3009 int f = (hg == -100) || abs(hg) == 2;
3010 hg = mydisk > D_INCONSISTENT ? 1 : -1;
3011 if (f)
3012 hg = hg*2;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003013 drbd_info(device, "Becoming sync %s due to disk states.\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07003014 hg > 0 ? "source" : "target");
3015 }
3016
Adam Gandelman3a11a482010-04-08 16:48:23 -07003017 if (abs(hg) == 100)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003018 drbd_khelper(device, "initial-split-brain");
Adam Gandelman3a11a482010-04-08 16:48:23 -07003019
Philipp Reisner44ed1672011-04-19 17:10:19 +02003020 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003021 nc = rcu_dereference(peer_device->connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +02003022
3023 if (hg == 100 || (hg == -100 && nc->always_asbp)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003024 int pcount = (device->state.role == R_PRIMARY)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003025 + (peer_role == R_PRIMARY);
3026 int forced = (hg == -100);
3027
3028 switch (pcount) {
3029 case 0:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003030 hg = drbd_asb_recover_0p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003031 break;
3032 case 1:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003033 hg = drbd_asb_recover_1p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003034 break;
3035 case 2:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003036 hg = drbd_asb_recover_2p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003037 break;
3038 }
3039 if (abs(hg) < 100) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003040 drbd_warn(device, "Split-Brain detected, %d primaries, "
Philipp Reisnerb411b362009-09-25 16:07:19 -07003041 "automatically solved. Sync from %s node\n",
3042 pcount, (hg < 0) ? "peer" : "this");
3043 if (forced) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003044 drbd_warn(device, "Doing a full sync, since"
Philipp Reisnerb411b362009-09-25 16:07:19 -07003045 " UUIDs where ambiguous.\n");
3046 hg = hg*2;
3047 }
3048 }
3049 }
3050
3051 if (hg == -100) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003052 if (test_bit(DISCARD_MY_DATA, &device->flags) && !(device->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003053 hg = -1;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003054 if (!test_bit(DISCARD_MY_DATA, &device->flags) && (device->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003055 hg = 1;
3056
3057 if (abs(hg) < 100)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003058 drbd_warn(device, "Split-Brain detected, manually solved. "
Philipp Reisnerb411b362009-09-25 16:07:19 -07003059 "Sync from %s node\n",
3060 (hg < 0) ? "peer" : "this");
3061 }
Philipp Reisner44ed1672011-04-19 17:10:19 +02003062 rr_conflict = nc->rr_conflict;
Andreas Gruenbacher6dff2902011-06-28 14:18:12 +02003063 tentative = nc->tentative;
Philipp Reisner44ed1672011-04-19 17:10:19 +02003064 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07003065
3066 if (hg == -100) {
Lars Ellenberg580b9762010-02-26 23:15:23 +01003067 /* FIXME this log message is not correct if we end up here
3068 * after an attempted attach on a diskless node.
3069 * We just refuse to attach -- well, we drop the "connection"
3070 * to that disk, in a way... */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003071 drbd_alert(device, "Split-Brain detected but unresolved, dropping connection!\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003072 drbd_khelper(device, "split-brain");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003073 return C_MASK;
3074 }
3075
3076 if (hg > 0 && mydisk <= D_INCONSISTENT) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003077 drbd_err(device, "I shall become SyncSource, but I am inconsistent!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003078 return C_MASK;
3079 }
3080
3081 if (hg < 0 && /* by intention we do not use mydisk here. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003082 device->state.role == R_PRIMARY && device->state.disk >= D_CONSISTENT) {
Philipp Reisner44ed1672011-04-19 17:10:19 +02003083 switch (rr_conflict) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003084 case ASB_CALL_HELPER:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003085 drbd_khelper(device, "pri-lost");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003086 /* fall through */
3087 case ASB_DISCONNECT:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003088 drbd_err(device, "I shall become SyncTarget, but I am primary!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003089 return C_MASK;
3090 case ASB_VIOLENTLY:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003091 drbd_warn(device, "Becoming SyncTarget, violating the stable-data"
Philipp Reisnerb411b362009-09-25 16:07:19 -07003092 "assumption\n");
3093 }
3094 }
3095
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003096 if (tentative || test_bit(CONN_DRY_RUN, &peer_device->connection->flags)) {
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003097 if (hg == 0)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003098 drbd_info(device, "dry-run connect: No resync, would become Connected immediately.\n");
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003099 else
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003100 drbd_info(device, "dry-run connect: Would become %s, doing a %s resync.",
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003101 drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET),
3102 abs(hg) >= 2 ? "full" : "bit-map based");
3103 return C_MASK;
3104 }
3105
Philipp Reisnerb411b362009-09-25 16:07:19 -07003106 if (abs(hg) >= 2) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003107 drbd_info(device, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003108 if (drbd_bitmap_io(device, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003109 BM_LOCKED_SET_ALLOWED))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003110 return C_MASK;
3111 }
3112
3113 if (hg > 0) { /* become sync source. */
3114 rv = C_WF_BITMAP_S;
3115 } else if (hg < 0) { /* become sync target */
3116 rv = C_WF_BITMAP_T;
3117 } else {
3118 rv = C_CONNECTED;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003119 if (drbd_bm_total_weight(device)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003120 drbd_info(device, "No resync, but %lu bits in bitmap!\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003121 drbd_bm_total_weight(device));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003122 }
3123 }
3124
3125 return rv;
3126}
3127
Philipp Reisnerf179d762011-05-16 17:31:47 +02003128static enum drbd_after_sb_p convert_after_sb(enum drbd_after_sb_p peer)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003129{
3130 /* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */
Philipp Reisnerf179d762011-05-16 17:31:47 +02003131 if (peer == ASB_DISCARD_REMOTE)
3132 return ASB_DISCARD_LOCAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003133
3134 /* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */
Philipp Reisnerf179d762011-05-16 17:31:47 +02003135 if (peer == ASB_DISCARD_LOCAL)
3136 return ASB_DISCARD_REMOTE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003137
3138 /* everything else is valid if they are equal on both sides. */
Philipp Reisnerf179d762011-05-16 17:31:47 +02003139 return peer;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003140}
3141
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003142static int receive_protocol(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003143{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003144 struct p_protocol *p = pi->data;
Philipp Reisner036b17e2011-05-16 17:38:11 +02003145 enum drbd_after_sb_p p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
3146 int p_proto, p_discard_my_data, p_two_primaries, cf;
3147 struct net_conf *nc, *old_net_conf, *new_net_conf = NULL;
3148 char integrity_alg[SHARED_SECRET_MAX] = "";
Andreas Gruenbacheraccdbcc2011-07-15 17:41:09 +02003149 struct crypto_hash *peer_integrity_tfm = NULL;
Philipp Reisner7aca6c72011-05-17 10:12:56 +02003150 void *int_dig_in = NULL, *int_dig_vv = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003151
Philipp Reisnerb411b362009-09-25 16:07:19 -07003152 p_proto = be32_to_cpu(p->protocol);
3153 p_after_sb_0p = be32_to_cpu(p->after_sb_0p);
3154 p_after_sb_1p = be32_to_cpu(p->after_sb_1p);
3155 p_after_sb_2p = be32_to_cpu(p->after_sb_2p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003156 p_two_primaries = be32_to_cpu(p->two_primaries);
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003157 cf = be32_to_cpu(p->conn_flags);
Andreas Gruenbacher6139f602011-05-06 20:00:02 +02003158 p_discard_my_data = cf & CF_DISCARD_MY_DATA;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003159
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003160 if (connection->agreed_pro_version >= 87) {
Andreas Gruenbacher86db0612011-04-28 15:24:18 +02003161 int err;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003162
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02003163 if (pi->size > sizeof(integrity_alg))
Andreas Gruenbacher86db0612011-04-28 15:24:18 +02003164 return -EIO;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003165 err = drbd_recv_all(connection, integrity_alg, pi->size);
Andreas Gruenbacher86db0612011-04-28 15:24:18 +02003166 if (err)
3167 return err;
Philipp Reisner036b17e2011-05-16 17:38:11 +02003168 integrity_alg[SHARED_SECRET_MAX - 1] = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003169 }
3170
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003171 if (pi->cmd != P_PROTOCOL_UPDATE) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003172 clear_bit(CONN_DRY_RUN, &connection->flags);
Philipp Reisner036b17e2011-05-16 17:38:11 +02003173
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003174 if (cf & CF_DRY_RUN)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003175 set_bit(CONN_DRY_RUN, &connection->flags);
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003176
3177 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003178 nc = rcu_dereference(connection->net_conf);
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003179
3180 if (p_proto != nc->wire_protocol) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003181 drbd_err(connection, "incompatible %s settings\n", "protocol");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003182 goto disconnect_rcu_unlock;
3183 }
3184
3185 if (convert_after_sb(p_after_sb_0p) != nc->after_sb_0p) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003186 drbd_err(connection, "incompatible %s settings\n", "after-sb-0pri");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003187 goto disconnect_rcu_unlock;
3188 }
3189
3190 if (convert_after_sb(p_after_sb_1p) != nc->after_sb_1p) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003191 drbd_err(connection, "incompatible %s settings\n", "after-sb-1pri");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003192 goto disconnect_rcu_unlock;
3193 }
3194
3195 if (convert_after_sb(p_after_sb_2p) != nc->after_sb_2p) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003196 drbd_err(connection, "incompatible %s settings\n", "after-sb-2pri");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003197 goto disconnect_rcu_unlock;
3198 }
3199
3200 if (p_discard_my_data && nc->discard_my_data) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003201 drbd_err(connection, "incompatible %s settings\n", "discard-my-data");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003202 goto disconnect_rcu_unlock;
3203 }
3204
3205 if (p_two_primaries != nc->two_primaries) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003206 drbd_err(connection, "incompatible %s settings\n", "allow-two-primaries");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003207 goto disconnect_rcu_unlock;
3208 }
3209
3210 if (strcmp(integrity_alg, nc->integrity_alg)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003211 drbd_err(connection, "incompatible %s settings\n", "data-integrity-alg");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003212 goto disconnect_rcu_unlock;
3213 }
3214
3215 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07003216 }
3217
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003218 if (integrity_alg[0]) {
3219 int hash_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003220
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003221 /*
3222 * We can only change the peer data integrity algorithm
3223 * here. Changing our own data integrity algorithm
3224 * requires that we send a P_PROTOCOL_UPDATE packet at
3225 * the same time; otherwise, the peer has no way to
3226 * tell between which packets the algorithm should
3227 * change.
3228 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003229
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003230 peer_integrity_tfm = crypto_alloc_hash(integrity_alg, 0, CRYPTO_ALG_ASYNC);
3231 if (!peer_integrity_tfm) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003232 drbd_err(connection, "peer data-integrity-alg %s not supported\n",
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003233 integrity_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003234 goto disconnect;
3235 }
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003236
3237 hash_size = crypto_hash_digestsize(peer_integrity_tfm);
3238 int_dig_in = kmalloc(hash_size, GFP_KERNEL);
3239 int_dig_vv = kmalloc(hash_size, GFP_KERNEL);
3240 if (!(int_dig_in && int_dig_vv)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003241 drbd_err(connection, "Allocation of buffers for data integrity checking failed\n");
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003242 goto disconnect;
3243 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003244 }
3245
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003246 new_net_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL);
3247 if (!new_net_conf) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003248 drbd_err(connection, "Allocation of new net_conf failed\n");
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003249 goto disconnect;
3250 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003251
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003252 mutex_lock(&connection->data.mutex);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003253 mutex_lock(&connection->resource->conf_update);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003254 old_net_conf = connection->net_conf;
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003255 *new_net_conf = *old_net_conf;
3256
3257 new_net_conf->wire_protocol = p_proto;
3258 new_net_conf->after_sb_0p = convert_after_sb(p_after_sb_0p);
3259 new_net_conf->after_sb_1p = convert_after_sb(p_after_sb_1p);
3260 new_net_conf->after_sb_2p = convert_after_sb(p_after_sb_2p);
3261 new_net_conf->two_primaries = p_two_primaries;
3262
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003263 rcu_assign_pointer(connection->net_conf, new_net_conf);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003264 mutex_unlock(&connection->resource->conf_update);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003265 mutex_unlock(&connection->data.mutex);
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003266
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003267 crypto_free_hash(connection->peer_integrity_tfm);
3268 kfree(connection->int_dig_in);
3269 kfree(connection->int_dig_vv);
3270 connection->peer_integrity_tfm = peer_integrity_tfm;
3271 connection->int_dig_in = int_dig_in;
3272 connection->int_dig_vv = int_dig_vv;
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003273
3274 if (strcmp(old_net_conf->integrity_alg, integrity_alg))
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003275 drbd_info(connection, "peer data-integrity-alg: %s\n",
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003276 integrity_alg[0] ? integrity_alg : "(none)");
3277
3278 synchronize_rcu();
3279 kfree(old_net_conf);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003280 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003281
Philipp Reisner44ed1672011-04-19 17:10:19 +02003282disconnect_rcu_unlock:
3283 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07003284disconnect:
Andreas Gruenbacherb792c352011-07-15 16:48:49 +02003285 crypto_free_hash(peer_integrity_tfm);
Philipp Reisner036b17e2011-05-16 17:38:11 +02003286 kfree(int_dig_in);
3287 kfree(int_dig_vv);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003288 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003289 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003290}
3291
3292/* helper function
3293 * input: alg name, feature name
3294 * return: NULL (alg name was "")
3295 * ERR_PTR(error) if something goes wrong
3296 * or the crypto hash ptr, if it worked out ok. */
Rashika Kheriaf63e6312013-12-19 15:11:09 +05303297static
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003298struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_device *device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003299 const char *alg, const char *name)
3300{
3301 struct crypto_hash *tfm;
3302
3303 if (!alg[0])
3304 return NULL;
3305
3306 tfm = crypto_alloc_hash(alg, 0, CRYPTO_ALG_ASYNC);
3307 if (IS_ERR(tfm)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003308 drbd_err(device, "Can not allocate \"%s\" as %s (reason: %ld)\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07003309 alg, name, PTR_ERR(tfm));
3310 return tfm;
3311 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003312 return tfm;
3313}
3314
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003315static int ignore_remaining_packet(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003316{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003317 void *buffer = connection->data.rbuf;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003318 int size = pi->size;
3319
3320 while (size) {
3321 int s = min_t(int, size, DRBD_SOCKET_BUFFER_SIZE);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003322 s = drbd_recv(connection, buffer, s);
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003323 if (s <= 0) {
3324 if (s < 0)
3325 return s;
3326 break;
3327 }
3328 size -= s;
3329 }
3330 if (size)
3331 return -EIO;
3332 return 0;
3333}
3334
3335/*
3336 * config_unknown_volume - device configuration command for unknown volume
3337 *
3338 * When a device is added to an existing connection, the node on which the
3339 * device is added first will send configuration commands to its peer but the
3340 * peer will not know about the device yet. It will warn and ignore these
3341 * commands. Once the device is added on the second node, the second node will
3342 * send the same device configuration commands, but in the other direction.
3343 *
3344 * (We can also end up here if drbd is misconfigured.)
3345 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003346static int config_unknown_volume(struct drbd_connection *connection, struct packet_info *pi)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003347{
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003348 drbd_warn(connection, "%s packet received for volume %u, which is not configured locally\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02003349 cmdname(pi->cmd), pi->vnr);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003350 return ignore_remaining_packet(connection, pi);
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003351}
3352
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003353static int receive_SyncParam(struct drbd_connection *connection, struct packet_info *pi)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003354{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003355 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003356 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003357 struct p_rs_param_95 *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003358 unsigned int header_size, data_size, exp_max_sz;
3359 struct crypto_hash *verify_tfm = NULL;
3360 struct crypto_hash *csums_tfm = NULL;
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003361 struct net_conf *old_net_conf, *new_net_conf = NULL;
Philipp Reisner813472c2011-05-03 16:47:02 +02003362 struct disk_conf *old_disk_conf = NULL, *new_disk_conf = NULL;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003363 const int apv = connection->agreed_pro_version;
Philipp Reisner813472c2011-05-03 16:47:02 +02003364 struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
Philipp Reisner778f2712010-07-06 11:14:00 +02003365 int fifo_size = 0;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003366 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003367
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003368 peer_device = conn_peer_device(connection, pi->vnr);
3369 if (!peer_device)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003370 return config_unknown_volume(connection, pi);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003371 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003372
3373 exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param)
3374 : apv == 88 ? sizeof(struct p_rs_param)
3375 + SHARED_SECRET_MAX
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003376 : apv <= 94 ? sizeof(struct p_rs_param_89)
3377 : /* apv >= 95 */ sizeof(struct p_rs_param_95);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003378
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003379 if (pi->size > exp_max_sz) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003380 drbd_err(device, "SyncParam packet too long: received %u, expected <= %u bytes\n",
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003381 pi->size, exp_max_sz);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003382 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003383 }
3384
3385 if (apv <= 88) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003386 header_size = sizeof(struct p_rs_param);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003387 data_size = pi->size - header_size;
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003388 } else if (apv <= 94) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003389 header_size = sizeof(struct p_rs_param_89);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003390 data_size = pi->size - header_size;
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02003391 D_ASSERT(device, data_size == 0);
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003392 } else {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003393 header_size = sizeof(struct p_rs_param_95);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003394 data_size = pi->size - header_size;
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02003395 D_ASSERT(device, data_size == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003396 }
3397
3398 /* initialize verify_alg and csums_alg */
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003399 p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003400 memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
3401
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003402 err = drbd_recv_all(peer_device->connection, p, header_size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003403 if (err)
3404 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003405
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003406 mutex_lock(&connection->resource->conf_update);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003407 old_net_conf = peer_device->connection->net_conf;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003408 if (get_ldev(device)) {
Philipp Reisner813472c2011-05-03 16:47:02 +02003409 new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3410 if (!new_disk_conf) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003411 put_ldev(device);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003412 mutex_unlock(&connection->resource->conf_update);
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003413 drbd_err(device, "Allocation of new disk_conf failed\n");
Philipp Reisner813472c2011-05-03 16:47:02 +02003414 return -ENOMEM;
3415 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003416
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003417 old_disk_conf = device->ldev->disk_conf;
Philipp Reisner813472c2011-05-03 16:47:02 +02003418 *new_disk_conf = *old_disk_conf;
3419
Andreas Gruenbacher6394b932011-05-11 14:29:52 +02003420 new_disk_conf->resync_rate = be32_to_cpu(p->resync_rate);
Philipp Reisner813472c2011-05-03 16:47:02 +02003421 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003422
3423 if (apv >= 88) {
3424 if (apv == 88) {
Philipp Reisner5de73822012-03-28 10:17:32 +02003425 if (data_size > SHARED_SECRET_MAX || data_size == 0) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003426 drbd_err(device, "verify-alg of wrong size, "
Philipp Reisner5de73822012-03-28 10:17:32 +02003427 "peer wants %u, accepting only up to %u byte\n",
3428 data_size, SHARED_SECRET_MAX);
Philipp Reisner813472c2011-05-03 16:47:02 +02003429 err = -EIO;
3430 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003431 }
3432
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003433 err = drbd_recv_all(peer_device->connection, p->verify_alg, data_size);
Philipp Reisner813472c2011-05-03 16:47:02 +02003434 if (err)
3435 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003436 /* we expect NUL terminated string */
3437 /* but just in case someone tries to be evil */
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02003438 D_ASSERT(device, p->verify_alg[data_size-1] == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003439 p->verify_alg[data_size-1] = 0;
3440
3441 } else /* apv >= 89 */ {
3442 /* we still expect NUL terminated strings */
3443 /* but just in case someone tries to be evil */
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02003444 D_ASSERT(device, p->verify_alg[SHARED_SECRET_MAX-1] == 0);
3445 D_ASSERT(device, p->csums_alg[SHARED_SECRET_MAX-1] == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003446 p->verify_alg[SHARED_SECRET_MAX-1] = 0;
3447 p->csums_alg[SHARED_SECRET_MAX-1] = 0;
3448 }
3449
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003450 if (strcmp(old_net_conf->verify_alg, p->verify_alg)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003451 if (device->state.conn == C_WF_REPORT_PARAMS) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003452 drbd_err(device, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003453 old_net_conf->verify_alg, p->verify_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003454 goto disconnect;
3455 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003456 verify_tfm = drbd_crypto_alloc_digest_safe(device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003457 p->verify_alg, "verify-alg");
3458 if (IS_ERR(verify_tfm)) {
3459 verify_tfm = NULL;
3460 goto disconnect;
3461 }
3462 }
3463
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003464 if (apv >= 89 && strcmp(old_net_conf->csums_alg, p->csums_alg)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003465 if (device->state.conn == C_WF_REPORT_PARAMS) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003466 drbd_err(device, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003467 old_net_conf->csums_alg, p->csums_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003468 goto disconnect;
3469 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003470 csums_tfm = drbd_crypto_alloc_digest_safe(device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003471 p->csums_alg, "csums-alg");
3472 if (IS_ERR(csums_tfm)) {
3473 csums_tfm = NULL;
3474 goto disconnect;
3475 }
3476 }
3477
Philipp Reisner813472c2011-05-03 16:47:02 +02003478 if (apv > 94 && new_disk_conf) {
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003479 new_disk_conf->c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
3480 new_disk_conf->c_delay_target = be32_to_cpu(p->c_delay_target);
3481 new_disk_conf->c_fill_target = be32_to_cpu(p->c_fill_target);
3482 new_disk_conf->c_max_rate = be32_to_cpu(p->c_max_rate);
Philipp Reisner778f2712010-07-06 11:14:00 +02003483
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003484 fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003485 if (fifo_size != device->rs_plan_s->size) {
Philipp Reisner813472c2011-05-03 16:47:02 +02003486 new_plan = fifo_alloc(fifo_size);
3487 if (!new_plan) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003488 drbd_err(device, "kmalloc of fifo_buffer failed");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003489 put_ldev(device);
Philipp Reisner778f2712010-07-06 11:14:00 +02003490 goto disconnect;
3491 }
3492 }
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003493 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003494
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003495 if (verify_tfm || csums_tfm) {
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003496 new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
3497 if (!new_net_conf) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003498 drbd_err(device, "Allocation of new net_conf failed\n");
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003499 goto disconnect;
3500 }
3501
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003502 *new_net_conf = *old_net_conf;
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003503
3504 if (verify_tfm) {
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003505 strcpy(new_net_conf->verify_alg, p->verify_alg);
3506 new_net_conf->verify_alg_len = strlen(p->verify_alg) + 1;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003507 crypto_free_hash(peer_device->connection->verify_tfm);
3508 peer_device->connection->verify_tfm = verify_tfm;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003509 drbd_info(device, "using verify-alg: \"%s\"\n", p->verify_alg);
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003510 }
3511 if (csums_tfm) {
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003512 strcpy(new_net_conf->csums_alg, p->csums_alg);
3513 new_net_conf->csums_alg_len = strlen(p->csums_alg) + 1;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003514 crypto_free_hash(peer_device->connection->csums_tfm);
3515 peer_device->connection->csums_tfm = csums_tfm;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003516 drbd_info(device, "using csums-alg: \"%s\"\n", p->csums_alg);
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003517 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003518 rcu_assign_pointer(connection->net_conf, new_net_conf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003519 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003520 }
3521
Philipp Reisner813472c2011-05-03 16:47:02 +02003522 if (new_disk_conf) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003523 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
3524 put_ldev(device);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003525 }
Philipp Reisner813472c2011-05-03 16:47:02 +02003526
3527 if (new_plan) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003528 old_plan = device->rs_plan_s;
3529 rcu_assign_pointer(device->rs_plan_s, new_plan);
Philipp Reisner813472c2011-05-03 16:47:02 +02003530 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003531
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003532 mutex_unlock(&connection->resource->conf_update);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003533 synchronize_rcu();
3534 if (new_net_conf)
3535 kfree(old_net_conf);
3536 kfree(old_disk_conf);
Philipp Reisner813472c2011-05-03 16:47:02 +02003537 kfree(old_plan);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003538
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003539 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003540
Philipp Reisner813472c2011-05-03 16:47:02 +02003541reconnect:
3542 if (new_disk_conf) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003543 put_ldev(device);
Philipp Reisner813472c2011-05-03 16:47:02 +02003544 kfree(new_disk_conf);
3545 }
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003546 mutex_unlock(&connection->resource->conf_update);
Philipp Reisner813472c2011-05-03 16:47:02 +02003547 return -EIO;
3548
Philipp Reisnerb411b362009-09-25 16:07:19 -07003549disconnect:
Philipp Reisner813472c2011-05-03 16:47:02 +02003550 kfree(new_plan);
3551 if (new_disk_conf) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003552 put_ldev(device);
Philipp Reisner813472c2011-05-03 16:47:02 +02003553 kfree(new_disk_conf);
3554 }
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003555 mutex_unlock(&connection->resource->conf_update);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003556 /* just for completeness: actually not needed,
3557 * as this is not reached if csums_tfm was ok. */
3558 crypto_free_hash(csums_tfm);
3559 /* but free the verify_tfm again, if csums_tfm did not work out */
3560 crypto_free_hash(verify_tfm);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003561 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003562 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003563}
3564
Philipp Reisnerb411b362009-09-25 16:07:19 -07003565/* warn if the arguments differ by more than 12.5% */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003566static void warn_if_differ_considerably(struct drbd_device *device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003567 const char *s, sector_t a, sector_t b)
3568{
3569 sector_t d;
3570 if (a == 0 || b == 0)
3571 return;
3572 d = (a > b) ? (a - b) : (b - a);
3573 if (d > (a>>3) || d > (b>>3))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003574 drbd_warn(device, "Considerable difference in %s: %llus vs. %llus\n", s,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003575 (unsigned long long)a, (unsigned long long)b);
3576}
3577
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003578static int receive_sizes(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003579{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003580 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003581 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003582 struct p_sizes *p = pi->data;
Philipp Reisnere96c9632013-06-25 16:50:07 +02003583 enum determine_dev_size dd = DS_UNCHANGED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003584 sector_t p_size, p_usize, my_usize;
3585 int ldsc = 0; /* local disk size changed */
Philipp Reisnere89b5912010-03-24 17:11:33 +01003586 enum dds_flags ddsf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003587
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003588 peer_device = conn_peer_device(connection, pi->vnr);
3589 if (!peer_device)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003590 return config_unknown_volume(connection, pi);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003591 device = peer_device->device;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003592
Philipp Reisnerb411b362009-09-25 16:07:19 -07003593 p_size = be64_to_cpu(p->d_size);
3594 p_usize = be64_to_cpu(p->u_size);
3595
Philipp Reisnerb411b362009-09-25 16:07:19 -07003596 /* just store the peer's disk size for now.
3597 * we still need to figure out whether we accept that. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003598 device->p_size = p_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003599
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003600 if (get_ldev(device)) {
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003601 rcu_read_lock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003602 my_usize = rcu_dereference(device->ldev->disk_conf)->disk_size;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003603 rcu_read_unlock();
3604
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003605 warn_if_differ_considerably(device, "lower level device sizes",
3606 p_size, drbd_get_max_capacity(device->ldev));
3607 warn_if_differ_considerably(device, "user requested size",
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003608 p_usize, my_usize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003609
3610 /* if this is the first connect, or an otherwise expected
3611 * param exchange, choose the minimum */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003612 if (device->state.conn == C_WF_REPORT_PARAMS)
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003613 p_usize = min_not_zero(my_usize, p_usize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003614
3615 /* Never shrink a device with usable data during connect.
3616 But allow online shrinking if we are connected. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003617 if (drbd_new_dev_size(device, device->ldev, p_usize, 0) <
3618 drbd_get_capacity(device->this_bdev) &&
3619 device->state.disk >= D_OUTDATED &&
3620 device->state.conn < C_CONNECTED) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003621 drbd_err(device, "The peer's disk size is too small!\n");
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003622 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003623 put_ldev(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003624 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003625 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003626
3627 if (my_usize != p_usize) {
3628 struct disk_conf *old_disk_conf, *new_disk_conf = NULL;
3629
3630 new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3631 if (!new_disk_conf) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003632 drbd_err(device, "Allocation of new disk_conf failed\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003633 put_ldev(device);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003634 return -ENOMEM;
3635 }
3636
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003637 mutex_lock(&connection->resource->conf_update);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003638 old_disk_conf = device->ldev->disk_conf;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003639 *new_disk_conf = *old_disk_conf;
3640 new_disk_conf->disk_size = p_usize;
3641
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003642 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003643 mutex_unlock(&connection->resource->conf_update);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003644 synchronize_rcu();
3645 kfree(old_disk_conf);
3646
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003647 drbd_info(device, "Peer sets u_size to %lu sectors\n",
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003648 (unsigned long)my_usize);
3649 }
3650
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003651 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003652 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003653
Philipp Reisnere89b5912010-03-24 17:11:33 +01003654 ddsf = be16_to_cpu(p->dds_flags);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003655 if (get_ldev(device)) {
3656 dd = drbd_determine_dev_size(device, ddsf, NULL);
3657 put_ldev(device);
Philipp Reisnere96c9632013-06-25 16:50:07 +02003658 if (dd == DS_ERROR)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003659 return -EIO;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003660 drbd_md_sync(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003661 } else {
3662 /* I am diskless, need to accept the peer's size. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003663 drbd_set_my_capacity(device, p_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003664 }
3665
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003666 device->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
3667 drbd_reconsider_max_bio_size(device);
Philipp Reisner99432fc2011-05-20 16:39:13 +02003668
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003669 if (get_ldev(device)) {
3670 if (device->ldev->known_size != drbd_get_capacity(device->ldev->backing_bdev)) {
3671 device->ldev->known_size = drbd_get_capacity(device->ldev->backing_bdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003672 ldsc = 1;
3673 }
3674
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003675 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003676 }
3677
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003678 if (device->state.conn > C_WF_REPORT_PARAMS) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003679 if (be64_to_cpu(p->c_size) !=
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003680 drbd_get_capacity(device->this_bdev) || ldsc) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003681 /* we have different sizes, probably peer
3682 * needs to know my new size... */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003683 drbd_send_sizes(peer_device, 0, ddsf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003684 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003685 if (test_and_clear_bit(RESIZE_PENDING, &device->flags) ||
3686 (dd == DS_GREW && device->state.conn == C_CONNECTED)) {
3687 if (device->state.pdsk >= D_INCONSISTENT &&
3688 device->state.disk >= D_INCONSISTENT) {
Philipp Reisnere89b5912010-03-24 17:11:33 +01003689 if (ddsf & DDSF_NO_RESYNC)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003690 drbd_info(device, "Resync of new storage suppressed with --assume-clean\n");
Philipp Reisnere89b5912010-03-24 17:11:33 +01003691 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003692 resync_after_online_grow(device);
Philipp Reisnere89b5912010-03-24 17:11:33 +01003693 } else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003694 set_bit(RESYNC_AFTER_NEG, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003695 }
3696 }
3697
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003698 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003699}
3700
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003701static int receive_uuids(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003702{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003703 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003704 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003705 struct p_uuids *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003706 u64 *p_uuid;
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003707 int i, updated_uuids = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003708
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003709 peer_device = conn_peer_device(connection, pi->vnr);
3710 if (!peer_device)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003711 return config_unknown_volume(connection, pi);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003712 device = peer_device->device;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003713
Philipp Reisnerb411b362009-09-25 16:07:19 -07003714 p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO);
Jing Wang063eacf2012-10-25 15:00:56 +08003715 if (!p_uuid) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003716 drbd_err(device, "kmalloc of p_uuid failed\n");
Jing Wang063eacf2012-10-25 15:00:56 +08003717 return false;
3718 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003719
3720 for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
3721 p_uuid[i] = be64_to_cpu(p->uuid[i]);
3722
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003723 kfree(device->p_uuid);
3724 device->p_uuid = p_uuid;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003725
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003726 if (device->state.conn < C_CONNECTED &&
3727 device->state.disk < D_INCONSISTENT &&
3728 device->state.role == R_PRIMARY &&
3729 (device->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003730 drbd_err(device, "Can only connect to data with current UUID=%016llX\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003731 (unsigned long long)device->ed_uuid);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003732 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003733 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003734 }
3735
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003736 if (get_ldev(device)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003737 int skip_initial_sync =
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003738 device->state.conn == C_CONNECTED &&
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003739 peer_device->connection->agreed_pro_version >= 90 &&
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003740 device->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003741 (p_uuid[UI_FLAGS] & 8);
3742 if (skip_initial_sync) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003743 drbd_info(device, "Accepted new current UUID, preparing to skip initial sync\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003744 drbd_bitmap_io(device, &drbd_bmio_clear_n_write,
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003745 "clear_n_write from receive_uuids",
3746 BM_LOCKED_TEST_ALLOWED);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003747 _drbd_uuid_set(device, UI_CURRENT, p_uuid[UI_CURRENT]);
3748 _drbd_uuid_set(device, UI_BITMAP, 0);
3749 _drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
Philipp Reisnerb411b362009-09-25 16:07:19 -07003750 CS_VERBOSE, NULL);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003751 drbd_md_sync(device);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003752 updated_uuids = 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003753 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003754 put_ldev(device);
3755 } else if (device->state.disk < D_INCONSISTENT &&
3756 device->state.role == R_PRIMARY) {
Philipp Reisner18a50fa2010-06-21 14:14:15 +02003757 /* I am a diskless primary, the peer just created a new current UUID
3758 for me. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003759 updated_uuids = drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003760 }
3761
3762 /* Before we test for the disk state, we should wait until an eventually
3763 ongoing cluster wide state change is finished. That is important if
3764 we are primary and are detaching from our disk. We need to see the
3765 new disk state... */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003766 mutex_lock(device->state_mutex);
3767 mutex_unlock(device->state_mutex);
3768 if (device->state.conn >= C_CONNECTED && device->state.disk < D_INCONSISTENT)
3769 updated_uuids |= drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003770
3771 if (updated_uuids)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003772 drbd_print_uuids(device, "receiver updated UUIDs to");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003773
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003774 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003775}
3776
3777/**
3778 * convert_state() - Converts the peer's view of the cluster state to our point of view
3779 * @ps: The state as seen by the peer.
3780 */
3781static union drbd_state convert_state(union drbd_state ps)
3782{
3783 union drbd_state ms;
3784
3785 static enum drbd_conns c_tab[] = {
Philipp Reisner369bea62011-07-06 23:04:44 +02003786 [C_WF_REPORT_PARAMS] = C_WF_REPORT_PARAMS,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003787 [C_CONNECTED] = C_CONNECTED,
3788
3789 [C_STARTING_SYNC_S] = C_STARTING_SYNC_T,
3790 [C_STARTING_SYNC_T] = C_STARTING_SYNC_S,
3791 [C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */
3792 [C_VERIFY_S] = C_VERIFY_T,
3793 [C_MASK] = C_MASK,
3794 };
3795
3796 ms.i = ps.i;
3797
3798 ms.conn = c_tab[ps.conn];
3799 ms.peer = ps.role;
3800 ms.role = ps.peer;
3801 ms.pdsk = ps.disk;
3802 ms.disk = ps.pdsk;
3803 ms.peer_isp = (ps.aftr_isp | ps.user_isp);
3804
3805 return ms;
3806}
3807
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003808static int receive_req_state(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003809{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003810 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003811 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003812 struct p_req_state *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003813 union drbd_state mask, val;
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +01003814 enum drbd_state_rv rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003815
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003816 peer_device = conn_peer_device(connection, pi->vnr);
3817 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003818 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003819 device = peer_device->device;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003820
Philipp Reisnerb411b362009-09-25 16:07:19 -07003821 mask.i = be32_to_cpu(p->mask);
3822 val.i = be32_to_cpu(p->val);
3823
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003824 if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags) &&
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003825 mutex_is_locked(device->state_mutex)) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003826 drbd_send_sr_reply(peer_device, SS_CONCURRENT_ST_CHG);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003827 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003828 }
3829
3830 mask = convert_state(mask);
3831 val = convert_state(val);
3832
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003833 rv = drbd_change_state(device, CS_VERBOSE, mask, val);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003834 drbd_send_sr_reply(peer_device, rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003835
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003836 drbd_md_sync(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003837
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003838 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003839}
3840
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003841static int receive_req_conn_state(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003842{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003843 struct p_req_state *p = pi->data;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003844 union drbd_state mask, val;
3845 enum drbd_state_rv rv;
3846
3847 mask.i = be32_to_cpu(p->mask);
3848 val.i = be32_to_cpu(p->val);
3849
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003850 if (test_bit(RESOLVE_CONFLICTS, &connection->flags) &&
3851 mutex_is_locked(&connection->cstate_mutex)) {
3852 conn_send_sr_reply(connection, SS_CONCURRENT_ST_CHG);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003853 return 0;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003854 }
3855
3856 mask = convert_state(mask);
3857 val = convert_state(val);
3858
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003859 rv = conn_request_state(connection, mask, val, CS_VERBOSE | CS_LOCAL_ONLY | CS_IGN_OUTD_FAIL);
3860 conn_send_sr_reply(connection, rv);
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003861
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003862 return 0;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003863}
3864
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003865static int receive_state(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003866{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003867 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003868 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003869 struct p_state *p = pi->data;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003870 union drbd_state os, ns, peer_state;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003871 enum drbd_disk_state real_peer_disk;
Philipp Reisner65d922c2010-06-16 16:18:09 +02003872 enum chg_state_flags cs_flags;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003873 int rv;
3874
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003875 peer_device = conn_peer_device(connection, pi->vnr);
3876 if (!peer_device)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003877 return config_unknown_volume(connection, pi);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003878 device = peer_device->device;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003879
Philipp Reisnerb411b362009-09-25 16:07:19 -07003880 peer_state.i = be32_to_cpu(p->state);
3881
3882 real_peer_disk = peer_state.disk;
3883 if (peer_state.disk == D_NEGOTIATING) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003884 real_peer_disk = device->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003885 drbd_info(device, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003886 }
3887
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003888 spin_lock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003889 retry:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003890 os = ns = drbd_read_state(device);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003891 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003892
Lars Ellenberg545752d2011-12-05 14:39:25 +01003893 /* If some other part of the code (asender thread, timeout)
3894 * already decided to close the connection again,
3895 * we must not "re-establish" it here. */
3896 if (os.conn <= C_TEAR_DOWN)
Lars Ellenberg58ffa582012-07-26 14:09:49 +02003897 return -ECONNRESET;
Lars Ellenberg545752d2011-12-05 14:39:25 +01003898
Lars Ellenberg40424e42011-09-26 15:24:56 +02003899 /* If this is the "end of sync" confirmation, usually the peer disk
3900 * transitions from D_INCONSISTENT to D_UP_TO_DATE. For empty (0 bits
3901 * set) resync started in PausedSyncT, or if the timing of pause-/
3902 * unpause-sync events has been "just right", the peer disk may
3903 * transition from D_CONSISTENT to D_UP_TO_DATE as well.
3904 */
3905 if ((os.pdsk == D_INCONSISTENT || os.pdsk == D_CONSISTENT) &&
3906 real_peer_disk == D_UP_TO_DATE &&
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003907 os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
3908 /* If we are (becoming) SyncSource, but peer is still in sync
3909 * preparation, ignore its uptodate-ness to avoid flapping, it
3910 * will change to inconsistent once the peer reaches active
3911 * syncing states.
3912 * It may have changed syncer-paused flags, however, so we
3913 * cannot ignore this completely. */
3914 if (peer_state.conn > C_CONNECTED &&
3915 peer_state.conn < C_SYNC_SOURCE)
3916 real_peer_disk = D_INCONSISTENT;
3917
3918 /* if peer_state changes to connected at the same time,
3919 * it explicitly notifies us that it finished resync.
3920 * Maybe we should finish it up, too? */
3921 else if (os.conn >= C_SYNC_SOURCE &&
3922 peer_state.conn == C_CONNECTED) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003923 if (drbd_bm_total_weight(device) <= device->rs_failed)
3924 drbd_resync_finished(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003925 return 0;
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003926 }
3927 }
3928
Lars Ellenberg02b91b52012-06-28 18:26:52 +02003929 /* explicit verify finished notification, stop sector reached. */
3930 if (os.conn == C_VERIFY_T && os.disk == D_UP_TO_DATE &&
3931 peer_state.conn == C_CONNECTED && real_peer_disk == D_UP_TO_DATE) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003932 ov_out_of_sync_print(device);
3933 drbd_resync_finished(device);
Lars Ellenberg58ffa582012-07-26 14:09:49 +02003934 return 0;
Lars Ellenberg02b91b52012-06-28 18:26:52 +02003935 }
3936
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003937 /* peer says his disk is inconsistent, while we think it is uptodate,
3938 * and this happens while the peer still thinks we have a sync going on,
3939 * but we think we are already done with the sync.
3940 * We ignore this to avoid flapping pdsk.
3941 * This should not happen, if the peer is a recent version of drbd. */
3942 if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT &&
3943 os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE)
3944 real_peer_disk = D_UP_TO_DATE;
3945
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003946 if (ns.conn == C_WF_REPORT_PARAMS)
3947 ns.conn = C_CONNECTED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003948
Philipp Reisner67531712010-10-27 12:21:30 +02003949 if (peer_state.conn == C_AHEAD)
3950 ns.conn = C_BEHIND;
3951
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003952 if (device->p_uuid && peer_state.disk >= D_NEGOTIATING &&
3953 get_ldev_if_state(device, D_NEGOTIATING)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003954 int cr; /* consider resync */
3955
3956 /* if we established a new connection */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003957 cr = (os.conn < C_CONNECTED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003958 /* if we had an established connection
3959 * and one of the nodes newly attaches a disk */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003960 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003961 (peer_state.disk == D_NEGOTIATING ||
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003962 os.disk == D_NEGOTIATING));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003963 /* if we have both been inconsistent, and the peer has been
3964 * forced to be UpToDate with --overwrite-data */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003965 cr |= test_bit(CONSIDER_RESYNC, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003966 /* if we had been plain connected, and the admin requested to
3967 * start a sync by "invalidate" or "invalidate-remote" */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003968 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003969 (peer_state.conn >= C_STARTING_SYNC_S &&
3970 peer_state.conn <= C_WF_BITMAP_T));
3971
3972 if (cr)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003973 ns.conn = drbd_sync_handshake(peer_device, peer_state.role, real_peer_disk);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003974
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003975 put_ldev(device);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003976 if (ns.conn == C_MASK) {
3977 ns.conn = C_CONNECTED;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003978 if (device->state.disk == D_NEGOTIATING) {
3979 drbd_force_state(device, NS(disk, D_FAILED));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003980 } else if (peer_state.disk == D_NEGOTIATING) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003981 drbd_err(device, "Disk attach process on the peer node was aborted.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003982 peer_state.disk = D_DISKLESS;
Lars Ellenberg580b9762010-02-26 23:15:23 +01003983 real_peer_disk = D_DISKLESS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003984 } else {
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003985 if (test_and_clear_bit(CONN_DRY_RUN, &peer_device->connection->flags))
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003986 return -EIO;
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02003987 D_ASSERT(device, os.conn == C_WF_REPORT_PARAMS);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003988 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003989 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003990 }
3991 }
3992 }
3993
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003994 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003995 if (os.i != drbd_read_state(device).i)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003996 goto retry;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003997 clear_bit(CONSIDER_RESYNC, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003998 ns.peer = peer_state.role;
3999 ns.pdsk = real_peer_disk;
4000 ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004001 if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004002 ns.disk = device->new_state_tmp.disk;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004003 cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004004 if (ns.pdsk == D_CONSISTENT && drbd_suspended(device) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
4005 test_bit(NEW_CUR_UUID, &device->flags)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004006 /* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this
Philipp Reisner481c6f52010-06-22 14:03:27 +02004007 for temporal network outages! */
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004008 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004009 drbd_err(device, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004010 tl_clear(peer_device->connection);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004011 drbd_uuid_new_current(device);
4012 clear_bit(NEW_CUR_UUID, &device->flags);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004013 conn_request_state(peer_device->connection, NS2(conn, C_PROTOCOL_ERROR, susp, 0), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004014 return -EIO;
Philipp Reisner481c6f52010-06-22 14:03:27 +02004015 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004016 rv = _drbd_set_state(device, ns, cs_flags, NULL);
4017 ns = drbd_read_state(device);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004018 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004019
4020 if (rv < SS_SUCCESS) {
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004021 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004022 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004023 }
4024
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004025 if (os.conn > C_WF_REPORT_PARAMS) {
4026 if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07004027 peer_state.disk != D_NEGOTIATING ) {
4028 /* we want resync, peer has not yet decided to sync... */
4029 /* Nowadays only used when forcing a node into primary role and
4030 setting its disk to UpToDate with that */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004031 drbd_send_uuids(peer_device);
4032 drbd_send_current_state(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004033 }
4034 }
4035
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004036 clear_bit(DISCARD_MY_DATA, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004037
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004038 drbd_md_sync(device); /* update connected indicator, la_size_sect, ... */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004039
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004040 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004041}
4042
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004043static int receive_sync_uuid(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004044{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004045 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004046 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004047 struct p_rs_uuid *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004048
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004049 peer_device = conn_peer_device(connection, pi->vnr);
4050 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004051 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004052 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004053
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004054 wait_event(device->misc_wait,
4055 device->state.conn == C_WF_SYNC_UUID ||
4056 device->state.conn == C_BEHIND ||
4057 device->state.conn < C_CONNECTED ||
4058 device->state.disk < D_NEGOTIATING);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004059
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02004060 /* D_ASSERT(device, device->state.conn == C_WF_SYNC_UUID ); */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004061
Philipp Reisnerb411b362009-09-25 16:07:19 -07004062 /* Here the _drbd_uuid_ functions are right, current should
4063 _not_ be rotated into the history */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004064 if (get_ldev_if_state(device, D_NEGOTIATING)) {
4065 _drbd_uuid_set(device, UI_CURRENT, be64_to_cpu(p->uuid));
4066 _drbd_uuid_set(device, UI_BITMAP, 0UL);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004067
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004068 drbd_print_uuids(device, "updated sync uuid");
4069 drbd_start_resync(device, C_SYNC_TARGET);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004070
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004071 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004072 } else
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004073 drbd_err(device, "Ignoring SyncUUID packet!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004074
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004075 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004076}
4077
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004078/**
4079 * receive_bitmap_plain
4080 *
4081 * Return 0 when done, 1 when another iteration is needed, and a negative error
4082 * code upon failure.
4083 */
4084static int
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004085receive_bitmap_plain(struct drbd_peer_device *peer_device, unsigned int size,
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004086 unsigned long *p, struct bm_xfer_ctx *c)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004087{
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004088 unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE -
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004089 drbd_header_size(peer_device->connection);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004090 unsigned int num_words = min_t(size_t, data_size / sizeof(*p),
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004091 c->bm_words - c->word_offset);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004092 unsigned int want = num_words * sizeof(*p);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004093 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004094
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004095 if (want != size) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004096 drbd_err(peer_device, "%s:want (%u) != size (%u)\n", __func__, want, size);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004097 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004098 }
4099 if (want == 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004100 return 0;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004101 err = drbd_recv_all(peer_device->connection, p, want);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004102 if (err)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004103 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004104
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004105 drbd_bm_merge_lel(peer_device->device, c->word_offset, num_words, p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004106
4107 c->word_offset += num_words;
4108 c->bit_offset = c->word_offset * BITS_PER_LONG;
4109 if (c->bit_offset > c->bm_bits)
4110 c->bit_offset = c->bm_bits;
4111
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004112 return 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004113}
4114
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004115static enum drbd_bitmap_code dcbp_get_code(struct p_compressed_bm *p)
4116{
4117 return (enum drbd_bitmap_code)(p->encoding & 0x0f);
4118}
4119
4120static int dcbp_get_start(struct p_compressed_bm *p)
4121{
4122 return (p->encoding & 0x80) != 0;
4123}
4124
4125static int dcbp_get_pad_bits(struct p_compressed_bm *p)
4126{
4127 return (p->encoding >> 4) & 0x7;
4128}
4129
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004130/**
4131 * recv_bm_rle_bits
4132 *
4133 * Return 0 when done, 1 when another iteration is needed, and a negative error
4134 * code upon failure.
4135 */
4136static int
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004137recv_bm_rle_bits(struct drbd_peer_device *peer_device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07004138 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01004139 struct bm_xfer_ctx *c,
4140 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004141{
4142 struct bitstream bs;
4143 u64 look_ahead;
4144 u64 rl;
4145 u64 tmp;
4146 unsigned long s = c->bit_offset;
4147 unsigned long e;
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004148 int toggle = dcbp_get_start(p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004149 int have;
4150 int bits;
4151
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004152 bitstream_init(&bs, p->code, len, dcbp_get_pad_bits(p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004153
4154 bits = bitstream_get_bits(&bs, &look_ahead, 64);
4155 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004156 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004157
4158 for (have = bits; have > 0; s += rl, toggle = !toggle) {
4159 bits = vli_decode_bits(&rl, look_ahead);
4160 if (bits <= 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004161 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004162
4163 if (toggle) {
4164 e = s + rl -1;
4165 if (e >= c->bm_bits) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004166 drbd_err(peer_device, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004167 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004168 }
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004169 _drbd_bm_set_bits(peer_device->device, s, e);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004170 }
4171
4172 if (have < bits) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004173 drbd_err(peer_device, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07004174 have, bits, look_ahead,
4175 (unsigned int)(bs.cur.b - p->code),
4176 (unsigned int)bs.buf_len);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004177 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004178 }
Lars Ellenbergd2da5b02013-10-23 10:59:18 +02004179 /* if we consumed all 64 bits, assign 0; >> 64 is "undefined"; */
4180 if (likely(bits < 64))
4181 look_ahead >>= bits;
4182 else
4183 look_ahead = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004184 have -= bits;
4185
4186 bits = bitstream_get_bits(&bs, &tmp, 64 - have);
4187 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004188 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004189 look_ahead |= tmp << have;
4190 have += bits;
4191 }
4192
4193 c->bit_offset = s;
4194 bm_xfer_ctx_bit_to_word_offset(c);
4195
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004196 return (s != c->bm_bits);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004197}
4198
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004199/**
4200 * decode_bitmap_c
4201 *
4202 * Return 0 when done, 1 when another iteration is needed, and a negative error
4203 * code upon failure.
4204 */
4205static int
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004206decode_bitmap_c(struct drbd_peer_device *peer_device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07004207 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01004208 struct bm_xfer_ctx *c,
4209 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004210{
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004211 if (dcbp_get_code(p) == RLE_VLI_Bits)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004212 return recv_bm_rle_bits(peer_device, p, c, len - sizeof(*p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004213
4214 /* other variants had been implemented for evaluation,
4215 * but have been dropped as this one turned out to be "best"
4216 * during all our tests. */
4217
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004218 drbd_err(peer_device, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
4219 conn_request_state(peer_device->connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004220 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004221}
4222
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004223void INFO_bm_xfer_stats(struct drbd_device *device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07004224 const char *direction, struct bm_xfer_ctx *c)
4225{
4226 /* what would it take to transfer it "plaintext" */
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02004227 unsigned int header_size = drbd_header_size(first_peer_device(device)->connection);
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004228 unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE - header_size;
4229 unsigned int plain =
4230 header_size * (DIV_ROUND_UP(c->bm_words, data_size) + 1) +
4231 c->bm_words * sizeof(unsigned long);
4232 unsigned int total = c->bytes[0] + c->bytes[1];
4233 unsigned int r;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004234
4235 /* total can not be zero. but just in case: */
4236 if (total == 0)
4237 return;
4238
4239 /* don't report if not compressed */
4240 if (total >= plain)
4241 return;
4242
4243 /* total < plain. check for overflow, still */
4244 r = (total > UINT_MAX/1000) ? (total / (plain/1000))
4245 : (1000 * total / plain);
4246
4247 if (r > 1000)
4248 r = 1000;
4249
4250 r = 1000 - r;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004251 drbd_info(device, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
Philipp Reisnerb411b362009-09-25 16:07:19 -07004252 "total %u; compression: %u.%u%%\n",
4253 direction,
4254 c->bytes[1], c->packets[1],
4255 c->bytes[0], c->packets[0],
4256 total, r/10, r % 10);
4257}
4258
4259/* Since we are processing the bitfield from lower addresses to higher,
4260 it does not matter if the process it in 32 bit chunks or 64 bit
4261 chunks as long as it is little endian. (Understand it as byte stream,
4262 beginning with the lowest byte...) If we would use big endian
4263 we would need to process it from the highest address to the lowest,
4264 in order to be agnostic to the 32 vs 64 bits issue.
4265
4266 returns 0 on failure, 1 if we successfully received it. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004267static int receive_bitmap(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004268{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004269 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004270 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004271 struct bm_xfer_ctx c;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004272 int err;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004273
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004274 peer_device = conn_peer_device(connection, pi->vnr);
4275 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004276 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004277 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004278
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004279 drbd_bm_lock(device, "receive bitmap", BM_LOCKED_SET_ALLOWED);
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004280 /* you are supposed to send additional out-of-sync information
4281 * if you actually set bits during this phase */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004282
Philipp Reisnerb411b362009-09-25 16:07:19 -07004283 c = (struct bm_xfer_ctx) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004284 .bm_bits = drbd_bm_bits(device),
4285 .bm_words = drbd_bm_words(device),
Philipp Reisnerb411b362009-09-25 16:07:19 -07004286 };
4287
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004288 for(;;) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004289 if (pi->cmd == P_BITMAP)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004290 err = receive_bitmap_plain(peer_device, pi->size, pi->data, &c);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004291 else if (pi->cmd == P_COMPRESSED_BITMAP) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004292 /* MAYBE: sanity check that we speak proto >= 90,
4293 * and the feature is enabled! */
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004294 struct p_compressed_bm *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004295
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004296 if (pi->size > DRBD_SOCKET_BUFFER_SIZE - drbd_header_size(connection)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004297 drbd_err(device, "ReportCBitmap packet too large\n");
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004298 err = -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004299 goto out;
4300 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004301 if (pi->size <= sizeof(*p)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004302 drbd_err(device, "ReportCBitmap packet too small (l:%u)\n", pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004303 err = -EIO;
Andreas Gruenbacher78fcbda2010-12-10 22:18:27 +01004304 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004305 }
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004306 err = drbd_recv_all(peer_device->connection, p, pi->size);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004307 if (err)
4308 goto out;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004309 err = decode_bitmap_c(peer_device, p, &c, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004310 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004311 drbd_warn(device, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", pi->cmd);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004312 err = -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004313 goto out;
4314 }
4315
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004316 c.packets[pi->cmd == P_BITMAP]++;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004317 c.bytes[pi->cmd == P_BITMAP] += drbd_header_size(connection) + pi->size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004318
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004319 if (err <= 0) {
4320 if (err < 0)
4321 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004322 break;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004323 }
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004324 err = drbd_recv_header(peer_device->connection, pi);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004325 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004326 goto out;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004327 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004328
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004329 INFO_bm_xfer_stats(device, "receive", &c);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004330
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004331 if (device->state.conn == C_WF_BITMAP_T) {
Andreas Gruenbacherde1f8e42010-12-10 21:04:00 +01004332 enum drbd_state_rv rv;
4333
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004334 err = drbd_send_bitmap(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004335 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004336 goto out;
4337 /* Omit CS_ORDERED with this state transition to avoid deadlocks. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004338 rv = _drbd_request_state(device, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02004339 D_ASSERT(device, rv == SS_SUCCESS);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004340 } else if (device->state.conn != C_WF_BITMAP_S) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004341 /* admin may have requested C_DISCONNECTING,
4342 * other threads may have noticed network errors */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004343 drbd_info(device, "unexpected cstate (%s) in receive_bitmap\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004344 drbd_conn_str(device->state.conn));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004345 }
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004346 err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004347
Philipp Reisnerb411b362009-09-25 16:07:19 -07004348 out:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004349 drbd_bm_unlock(device);
4350 if (!err && device->state.conn == C_WF_BITMAP_S)
4351 drbd_start_resync(device, C_SYNC_SOURCE);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004352 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004353}
4354
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004355static int receive_skip(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004356{
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004357 drbd_warn(connection, "skipping unknown optional packet type %d, l: %d!\n",
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004358 pi->cmd, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004359
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004360 return ignore_remaining_packet(connection, pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004361}
4362
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004363static int receive_UnplugRemote(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004364{
Philipp Reisnerb411b362009-09-25 16:07:19 -07004365 /* Make sure we've acked all the TCP data associated
4366 * with the data requests being unplugged */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004367 drbd_tcp_quickack(connection->data.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004368
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004369 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004370}
4371
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004372static int receive_out_of_sync(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisner73a01a12010-10-27 14:33:00 +02004373{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004374 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004375 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004376 struct p_block_desc *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004377
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004378 peer_device = conn_peer_device(connection, pi->vnr);
4379 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004380 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004381 device = peer_device->device;
Philipp Reisner73a01a12010-10-27 14:33:00 +02004382
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004383 switch (device->state.conn) {
Lars Ellenbergf735e3632010-12-17 21:06:18 +01004384 case C_WF_SYNC_UUID:
4385 case C_WF_BITMAP_T:
4386 case C_BEHIND:
4387 break;
4388 default:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004389 drbd_err(device, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004390 drbd_conn_str(device->state.conn));
Lars Ellenbergf735e3632010-12-17 21:06:18 +01004391 }
4392
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004393 drbd_set_out_of_sync(device, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
Philipp Reisner73a01a12010-10-27 14:33:00 +02004394
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004395 return 0;
Philipp Reisner73a01a12010-10-27 14:33:00 +02004396}
4397
Philipp Reisner02918be2010-08-20 14:35:10 +02004398struct data_cmd {
4399 int expect_payload;
4400 size_t pkt_size;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004401 int (*fn)(struct drbd_connection *, struct packet_info *);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004402};
4403
Philipp Reisner02918be2010-08-20 14:35:10 +02004404static struct data_cmd drbd_cmd_handler[] = {
4405 [P_DATA] = { 1, sizeof(struct p_data), receive_Data },
4406 [P_DATA_REPLY] = { 1, sizeof(struct p_data), receive_DataReply },
4407 [P_RS_DATA_REPLY] = { 1, sizeof(struct p_data), receive_RSDataReply } ,
4408 [P_BARRIER] = { 0, sizeof(struct p_barrier), receive_Barrier } ,
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004409 [P_BITMAP] = { 1, 0, receive_bitmap } ,
4410 [P_COMPRESSED_BITMAP] = { 1, 0, receive_bitmap } ,
4411 [P_UNPLUG_REMOTE] = { 0, 0, receive_UnplugRemote },
Philipp Reisner02918be2010-08-20 14:35:10 +02004412 [P_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
4413 [P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004414 [P_SYNC_PARAM] = { 1, 0, receive_SyncParam },
4415 [P_SYNC_PARAM89] = { 1, 0, receive_SyncParam },
Philipp Reisner02918be2010-08-20 14:35:10 +02004416 [P_PROTOCOL] = { 1, sizeof(struct p_protocol), receive_protocol },
4417 [P_UUIDS] = { 0, sizeof(struct p_uuids), receive_uuids },
4418 [P_SIZES] = { 0, sizeof(struct p_sizes), receive_sizes },
4419 [P_STATE] = { 0, sizeof(struct p_state), receive_state },
4420 [P_STATE_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_state },
4421 [P_SYNC_UUID] = { 0, sizeof(struct p_rs_uuid), receive_sync_uuid },
4422 [P_OV_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
4423 [P_OV_REPLY] = { 1, sizeof(struct p_block_req), receive_DataRequest },
4424 [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest },
4425 [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip },
Philipp Reisner73a01a12010-10-27 14:33:00 +02004426 [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004427 [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_conn_state },
Philipp Reisner036b17e2011-05-16 17:38:11 +02004428 [P_PROTOCOL_UPDATE] = { 1, sizeof(struct p_protocol), receive_protocol },
Philipp Reisner02918be2010-08-20 14:35:10 +02004429};
4430
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004431static void drbdd(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004432{
Philipp Reisner77351055b2011-02-07 17:24:26 +01004433 struct packet_info pi;
Philipp Reisner02918be2010-08-20 14:35:10 +02004434 size_t shs; /* sub header size */
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004435 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004436
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004437 while (get_t_state(&connection->receiver) == RUNNING) {
Andreas Gruenbacherdeebe192011-03-25 00:01:04 +01004438 struct data_cmd *cmd;
4439
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004440 drbd_thread_current_set_cpu(&connection->receiver);
4441 if (drbd_recv_header(connection, &pi))
Philipp Reisner02918be2010-08-20 14:35:10 +02004442 goto err_out;
4443
Andreas Gruenbacherdeebe192011-03-25 00:01:04 +01004444 cmd = &drbd_cmd_handler[pi.cmd];
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004445 if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) || !cmd->fn)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004446 drbd_err(connection, "Unexpected data packet %s (0x%04x)",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004447 cmdname(pi.cmd), pi.cmd);
Philipp Reisner02918be2010-08-20 14:35:10 +02004448 goto err_out;
Lars Ellenberg0b33a912009-11-16 15:58:04 +01004449 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004450
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004451 shs = cmd->pkt_size;
4452 if (pi.size > shs && !cmd->expect_payload) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004453 drbd_err(connection, "No payload expected %s l:%d\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004454 cmdname(pi.cmd), pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02004455 goto err_out;
4456 }
4457
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02004458 if (shs) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004459 err = drbd_recv_all_warn(connection, pi.data, shs);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004460 if (err)
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02004461 goto err_out;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004462 pi.size -= shs;
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02004463 }
4464
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004465 err = cmd->fn(connection, &pi);
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004466 if (err) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004467 drbd_err(connection, "error receiving %s, e: %d l: %d!\n",
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004468 cmdname(pi.cmd), err, pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02004469 goto err_out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004470 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004471 }
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004472 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004473
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004474 err_out:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004475 conn_request_state(connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004476}
4477
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004478static void conn_disconnect(struct drbd_connection *connection)
Philipp Reisnerf70b35112010-06-24 14:34:40 +02004479{
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02004480 struct drbd_peer_device *peer_device;
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004481 enum drbd_conns oc;
Philipp Reisner376694a2011-11-07 10:54:28 +01004482 int vnr;
Philipp Reisnerf70b35112010-06-24 14:34:40 +02004483
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004484 if (connection->cstate == C_STANDALONE)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004485 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004486
Lars Ellenberg545752d2011-12-05 14:39:25 +01004487 /* We are about to start the cleanup after connection loss.
4488 * Make sure drbd_make_request knows about that.
4489 * Usually we should be in some network failure state already,
4490 * but just in case we are not, we fix it up here.
4491 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004492 conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
Lars Ellenberg545752d2011-12-05 14:39:25 +01004493
Philipp Reisnerb411b362009-09-25 16:07:19 -07004494 /* asender does not clean up anything. it must not interfere, either */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004495 drbd_thread_stop(&connection->asender);
4496 drbd_free_sock(connection);
Philipp Reisner360cc742011-02-08 14:29:53 +01004497
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02004498 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02004499 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
4500 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004501 kref_get(&device->kref);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02004502 rcu_read_unlock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004503 drbd_disconnected(peer_device);
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02004504 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02004505 rcu_read_lock();
4506 }
4507 rcu_read_unlock();
4508
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004509 if (!list_empty(&connection->current_epoch->list))
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004510 drbd_err(connection, "ASSERTION FAILED: connection->current_epoch->list not empty\n");
Philipp Reisner12038a32011-11-09 19:18:00 +01004511 /* ok, no more ee's on the fly, it is safe to reset the epoch_size */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004512 atomic_set(&connection->current_epoch->epoch_size, 0);
4513 connection->send.seen_any_write_yet = false;
Philipp Reisner12038a32011-11-09 19:18:00 +01004514
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004515 drbd_info(connection, "Connection closed\n");
Philipp Reisner360cc742011-02-08 14:29:53 +01004516
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004517 if (conn_highest_role(connection) == R_PRIMARY && conn_highest_pdsk(connection) >= D_UNKNOWN)
4518 conn_try_outdate_peer_async(connection);
Philipp Reisnercb703452011-03-24 11:03:07 +01004519
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004520 spin_lock_irq(&connection->resource->req_lock);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004521 oc = connection->cstate;
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004522 if (oc >= C_UNCONNECTED)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004523 _conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE);
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004524
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004525 spin_unlock_irq(&connection->resource->req_lock);
Philipp Reisner360cc742011-02-08 14:29:53 +01004526
Lars Ellenbergf3dfa402011-05-02 10:45:05 +02004527 if (oc == C_DISCONNECTING)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004528 conn_request_state(connection, NS(conn, C_STANDALONE), CS_VERBOSE | CS_HARD);
Philipp Reisner360cc742011-02-08 14:29:53 +01004529}
4530
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004531static int drbd_disconnected(struct drbd_peer_device *peer_device)
Philipp Reisner360cc742011-02-08 14:29:53 +01004532{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004533 struct drbd_device *device = peer_device->device;
Philipp Reisner360cc742011-02-08 14:29:53 +01004534 unsigned int i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004535
Philipp Reisner85719572010-07-21 10:20:17 +02004536 /* wait for current activity to cease. */
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004537 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004538 _drbd_wait_ee_list_empty(device, &device->active_ee);
4539 _drbd_wait_ee_list_empty(device, &device->sync_ee);
4540 _drbd_wait_ee_list_empty(device, &device->read_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004541 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004542
4543 /* We do not have data structures that would allow us to
4544 * get the rs_pending_cnt down to 0 again.
4545 * * On C_SYNC_TARGET we do not have any data structures describing
4546 * the pending RSDataRequest's we have sent.
4547 * * On C_SYNC_SOURCE there is no data structure that tracks
4548 * the P_RS_DATA_REPLY blocks that we sent to the SyncTarget.
4549 * And no, it is not the sum of the reference counts in the
4550 * resync_LRU. The resync_LRU tracks the whole operation including
4551 * the disk-IO, while the rs_pending_cnt only tracks the blocks
4552 * on the fly. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004553 drbd_rs_cancel_all(device);
4554 device->rs_total = 0;
4555 device->rs_failed = 0;
4556 atomic_set(&device->rs_pending_cnt, 0);
4557 wake_up(&device->misc_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004558
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004559 del_timer_sync(&device->resync_timer);
4560 resync_timer_fn((unsigned long)device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004561
Philipp Reisnerb411b362009-09-25 16:07:19 -07004562 /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
4563 * w_make_resync_request etc. which may still be on the worker queue
4564 * to be "canceled" */
Andreas Gruenbacherb5043c52011-07-28 15:56:02 +02004565 drbd_flush_workqueue(&peer_device->connection->sender_work);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004566
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004567 drbd_finish_peer_reqs(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004568
Philipp Reisnerd10b4ea2011-11-30 23:25:36 +01004569 /* This second workqueue flush is necessary, since drbd_finish_peer_reqs()
4570 might have issued a work again. The one before drbd_finish_peer_reqs() is
4571 necessary to reclain net_ee in drbd_finish_peer_reqs(). */
Andreas Gruenbacherb5043c52011-07-28 15:56:02 +02004572 drbd_flush_workqueue(&peer_device->connection->sender_work);
Philipp Reisnerd10b4ea2011-11-30 23:25:36 +01004573
Lars Ellenberg08332d72012-08-17 15:09:13 +02004574 /* need to do it again, drbd_finish_peer_reqs() may have populated it
4575 * again via drbd_try_clear_on_disk_bm(). */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004576 drbd_rs_cancel_all(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004577
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004578 kfree(device->p_uuid);
4579 device->p_uuid = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004580
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004581 if (!drbd_suspended(device))
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004582 tl_clear(peer_device->connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004583
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004584 drbd_md_sync(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004585
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004586 /* serialize with bitmap writeout triggered by the state change,
4587 * if any. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004588 wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004589
Philipp Reisnerb411b362009-09-25 16:07:19 -07004590 /* tcp_close and release of sendpage pages can be deferred. I don't
4591 * want to use SO_LINGER, because apparently it can be deferred for
4592 * more than 20 seconds (longest time I checked).
4593 *
4594 * Actually we don't care for exactly when the network stack does its
4595 * put_page(), but release our reference on these pages right here.
4596 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004597 i = drbd_free_peer_reqs(device, &device->net_ee);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004598 if (i)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004599 drbd_info(device, "net_ee not empty, killed %u entries\n", i);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004600 i = atomic_read(&device->pp_in_use_by_net);
Lars Ellenberg435f0742010-09-06 12:30:25 +02004601 if (i)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004602 drbd_info(device, "pp_in_use_by_net = %d, expected 0\n", i);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004603 i = atomic_read(&device->pp_in_use);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004604 if (i)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004605 drbd_info(device, "pp_in_use = %d, expected 0\n", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004606
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02004607 D_ASSERT(device, list_empty(&device->read_ee));
4608 D_ASSERT(device, list_empty(&device->active_ee));
4609 D_ASSERT(device, list_empty(&device->sync_ee));
4610 D_ASSERT(device, list_empty(&device->done_ee));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004611
Philipp Reisner360cc742011-02-08 14:29:53 +01004612 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004613}
4614
4615/*
4616 * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version
4617 * we can agree on is stored in agreed_pro_version.
4618 *
4619 * feature flags and the reserved array should be enough room for future
4620 * enhancements of the handshake protocol, and possible plugins...
4621 *
4622 * for now, they are expected to be zero, but ignored.
4623 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004624static int drbd_send_features(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004625{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004626 struct drbd_socket *sock;
4627 struct p_connection_features *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004628
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004629 sock = &connection->data;
4630 p = conn_prepare_command(connection, sock);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004631 if (!p)
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004632 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004633 memset(p, 0, sizeof(*p));
4634 p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
4635 p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004636 return conn_send_command(connection, sock, P_CONNECTION_FEATURES, sizeof(*p), NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004637}
4638
4639/*
4640 * return values:
4641 * 1 yes, we have a valid connection
4642 * 0 oops, did not work out, please try again
4643 * -1 peer talks different language,
4644 * no point in trying again, please go standalone.
4645 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004646static int drbd_do_features(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004647{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004648 /* ASSERT current == connection->receiver ... */
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004649 struct p_connection_features *p;
4650 const int expect = sizeof(struct p_connection_features);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004651 struct packet_info pi;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004652 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004653
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004654 err = drbd_send_features(connection);
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004655 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004656 return 0;
4657
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004658 err = drbd_recv_header(connection, &pi);
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004659 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004660 return 0;
4661
Andreas Gruenbacher60381782011-03-28 17:05:50 +02004662 if (pi.cmd != P_CONNECTION_FEATURES) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004663 drbd_err(connection, "expected ConnectionFeatures packet, received: %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004664 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004665 return -1;
4666 }
4667
Philipp Reisner77351055b2011-02-07 17:24:26 +01004668 if (pi.size != expect) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004669 drbd_err(connection, "expected ConnectionFeatures length: %u, received: %u\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004670 expect, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004671 return -1;
4672 }
4673
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004674 p = pi.data;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004675 err = drbd_recv_all_warn(connection, p, expect);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004676 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004677 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004678
Philipp Reisnerb411b362009-09-25 16:07:19 -07004679 p->protocol_min = be32_to_cpu(p->protocol_min);
4680 p->protocol_max = be32_to_cpu(p->protocol_max);
4681 if (p->protocol_max == 0)
4682 p->protocol_max = p->protocol_min;
4683
4684 if (PRO_VERSION_MAX < p->protocol_min ||
4685 PRO_VERSION_MIN > p->protocol_max)
4686 goto incompat;
4687
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004688 connection->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004689
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004690 drbd_info(connection, "Handshake successful: "
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004691 "Agreed network protocol version %d\n", connection->agreed_pro_version);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004692
4693 return 1;
4694
4695 incompat:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004696 drbd_err(connection, "incompatible DRBD dialects: "
Philipp Reisnerb411b362009-09-25 16:07:19 -07004697 "I support %d-%d, peer supports %d-%d\n",
4698 PRO_VERSION_MIN, PRO_VERSION_MAX,
4699 p->protocol_min, p->protocol_max);
4700 return -1;
4701}
4702
4703#if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004704static int drbd_do_auth(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004705{
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004706 drbd_err(connection, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
4707 drbd_err(connection, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004708 return -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004709}
4710#else
4711#define CHALLENGE_LEN 64
Johannes Thomab10d96c2010-01-07 16:02:50 +01004712
4713/* Return value:
4714 1 - auth succeeded,
4715 0 - failed, try again (network error),
4716 -1 - auth failed, don't try again.
4717*/
4718
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004719static int drbd_do_auth(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004720{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004721 struct drbd_socket *sock;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004722 char my_challenge[CHALLENGE_LEN]; /* 64 Bytes... */
4723 struct scatterlist sg;
4724 char *response = NULL;
4725 char *right_response = NULL;
4726 char *peers_ch = NULL;
Philipp Reisner44ed1672011-04-19 17:10:19 +02004727 unsigned int key_len;
4728 char secret[SHARED_SECRET_MAX]; /* 64 byte */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004729 unsigned int resp_size;
4730 struct hash_desc desc;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004731 struct packet_info pi;
Philipp Reisner44ed1672011-04-19 17:10:19 +02004732 struct net_conf *nc;
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004733 int err, rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004734
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004735 /* FIXME: Put the challenge/response into the preallocated socket buffer. */
4736
Philipp Reisner44ed1672011-04-19 17:10:19 +02004737 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004738 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +02004739 key_len = strlen(nc->shared_secret);
4740 memcpy(secret, nc->shared_secret, key_len);
4741 rcu_read_unlock();
4742
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004743 desc.tfm = connection->cram_hmac_tfm;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004744 desc.flags = 0;
4745
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004746 rv = crypto_hash_setkey(connection->cram_hmac_tfm, (u8 *)secret, key_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004747 if (rv) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004748 drbd_err(connection, "crypto_hash_setkey() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004749 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004750 goto fail;
4751 }
4752
4753 get_random_bytes(my_challenge, CHALLENGE_LEN);
4754
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004755 sock = &connection->data;
4756 if (!conn_prepare_command(connection, sock)) {
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004757 rv = 0;
4758 goto fail;
4759 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004760 rv = !conn_send_command(connection, sock, P_AUTH_CHALLENGE, 0,
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004761 my_challenge, CHALLENGE_LEN);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004762 if (!rv)
4763 goto fail;
4764
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004765 err = drbd_recv_header(connection, &pi);
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004766 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004767 rv = 0;
4768 goto fail;
4769 }
4770
Philipp Reisner77351055b2011-02-07 17:24:26 +01004771 if (pi.cmd != P_AUTH_CHALLENGE) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004772 drbd_err(connection, "expected AuthChallenge packet, received: %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004773 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004774 rv = 0;
4775 goto fail;
4776 }
4777
Philipp Reisner77351055b2011-02-07 17:24:26 +01004778 if (pi.size > CHALLENGE_LEN * 2) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004779 drbd_err(connection, "expected AuthChallenge payload too big.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004780 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004781 goto fail;
4782 }
4783
Philipp Reisner77351055b2011-02-07 17:24:26 +01004784 peers_ch = kmalloc(pi.size, GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004785 if (peers_ch == NULL) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004786 drbd_err(connection, "kmalloc of peers_ch failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004787 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004788 goto fail;
4789 }
4790
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004791 err = drbd_recv_all_warn(connection, peers_ch, pi.size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004792 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004793 rv = 0;
4794 goto fail;
4795 }
4796
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004797 resp_size = crypto_hash_digestsize(connection->cram_hmac_tfm);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004798 response = kmalloc(resp_size, GFP_NOIO);
4799 if (response == NULL) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004800 drbd_err(connection, "kmalloc of response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004801 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004802 goto fail;
4803 }
4804
4805 sg_init_table(&sg, 1);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004806 sg_set_buf(&sg, peers_ch, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004807
4808 rv = crypto_hash_digest(&desc, &sg, sg.length, response);
4809 if (rv) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004810 drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004811 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004812 goto fail;
4813 }
4814
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004815 if (!conn_prepare_command(connection, sock)) {
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004816 rv = 0;
4817 goto fail;
4818 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004819 rv = !conn_send_command(connection, sock, P_AUTH_RESPONSE, 0,
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004820 response, resp_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004821 if (!rv)
4822 goto fail;
4823
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004824 err = drbd_recv_header(connection, &pi);
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004825 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004826 rv = 0;
4827 goto fail;
4828 }
4829
Philipp Reisner77351055b2011-02-07 17:24:26 +01004830 if (pi.cmd != P_AUTH_RESPONSE) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004831 drbd_err(connection, "expected AuthResponse packet, received: %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004832 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004833 rv = 0;
4834 goto fail;
4835 }
4836
Philipp Reisner77351055b2011-02-07 17:24:26 +01004837 if (pi.size != resp_size) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004838 drbd_err(connection, "expected AuthResponse payload of wrong size\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004839 rv = 0;
4840 goto fail;
4841 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004842
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004843 err = drbd_recv_all_warn(connection, response , resp_size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004844 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004845 rv = 0;
4846 goto fail;
4847 }
4848
4849 right_response = kmalloc(resp_size, GFP_NOIO);
Julia Lawall2d1ee872009-12-27 22:27:11 +01004850 if (right_response == NULL) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004851 drbd_err(connection, "kmalloc of right_response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004852 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004853 goto fail;
4854 }
4855
4856 sg_set_buf(&sg, my_challenge, CHALLENGE_LEN);
4857
4858 rv = crypto_hash_digest(&desc, &sg, sg.length, right_response);
4859 if (rv) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004860 drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004861 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004862 goto fail;
4863 }
4864
4865 rv = !memcmp(response, right_response, resp_size);
4866
4867 if (rv)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004868 drbd_info(connection, "Peer authenticated using %d bytes HMAC\n",
Philipp Reisner44ed1672011-04-19 17:10:19 +02004869 resp_size);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004870 else
4871 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004872
4873 fail:
4874 kfree(peers_ch);
4875 kfree(response);
4876 kfree(right_response);
4877
4878 return rv;
4879}
4880#endif
4881
Andreas Gruenbacher8fe60552011-07-22 11:04:36 +02004882int drbd_receiver(struct drbd_thread *thi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004883{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004884 struct drbd_connection *connection = thi->connection;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004885 int h;
4886
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004887 drbd_info(connection, "receiver (re)started\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004888
4889 do {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004890 h = conn_connect(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004891 if (h == 0) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004892 conn_disconnect(connection);
Philipp Reisner20ee6392011-01-18 15:28:59 +01004893 schedule_timeout_interruptible(HZ);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004894 }
4895 if (h == -1) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004896 drbd_warn(connection, "Discarding network configuration.\n");
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004897 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004898 }
4899 } while (h == 0);
4900
Philipp Reisner91fd4da2011-04-20 17:47:29 +02004901 if (h > 0)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004902 drbdd(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004903
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004904 conn_disconnect(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004905
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004906 drbd_info(connection, "receiver terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004907 return 0;
4908}
4909
4910/* ********* acknowledge sender ******** */
4911
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004912static int got_conn_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004913{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004914 struct p_req_state_reply *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004915 int retcode = be32_to_cpu(p->retcode);
4916
4917 if (retcode >= SS_SUCCESS) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004918 set_bit(CONN_WD_ST_CHG_OKAY, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004919 } else {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004920 set_bit(CONN_WD_ST_CHG_FAIL, &connection->flags);
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004921 drbd_err(connection, "Requested state change failed by peer: %s (%d)\n",
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004922 drbd_set_st_err_str(retcode), retcode);
4923 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004924 wake_up(&connection->ping_wait);
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004925
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004926 return 0;
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004927}
4928
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004929static int got_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004930{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004931 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004932 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004933 struct p_req_state_reply *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004934 int retcode = be32_to_cpu(p->retcode);
4935
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004936 peer_device = conn_peer_device(connection, pi->vnr);
4937 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004938 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004939 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004940
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004941 if (test_bit(CONN_WD_ST_CHG_REQ, &connection->flags)) {
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02004942 D_ASSERT(device, connection->agreed_pro_version < 100);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004943 return got_conn_RqSReply(connection, pi);
Philipp Reisner4d0fc3f2012-01-20 13:52:27 +01004944 }
4945
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004946 if (retcode >= SS_SUCCESS) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004947 set_bit(CL_ST_CHG_SUCCESS, &device->flags);
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004948 } else {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004949 set_bit(CL_ST_CHG_FAIL, &device->flags);
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004950 drbd_err(device, "Requested state change failed by peer: %s (%d)\n",
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004951 drbd_set_st_err_str(retcode), retcode);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004952 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004953 wake_up(&device->state_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004954
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004955 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004956}
4957
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004958static int got_Ping(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004959{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004960 return drbd_send_ping_ack(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004961
4962}
4963
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004964static int got_PingAck(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004965{
4966 /* restore idle timeout */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004967 connection->meta.socket->sk->sk_rcvtimeo = connection->net_conf->ping_int*HZ;
4968 if (!test_and_set_bit(GOT_PING_ACK, &connection->flags))
4969 wake_up(&connection->ping_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004970
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004971 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004972}
4973
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004974static int got_IsInSync(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004975{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004976 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004977 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004978 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004979 sector_t sector = be64_to_cpu(p->sector);
4980 int blksize = be32_to_cpu(p->blksize);
4981
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004982 peer_device = conn_peer_device(connection, pi->vnr);
4983 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004984 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004985 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004986
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004987 D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004988
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004989 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004990
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004991 if (get_ldev(device)) {
4992 drbd_rs_complete_io(device, sector);
4993 drbd_set_in_sync(device, sector, blksize);
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004994 /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004995 device->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
4996 put_ldev(device);
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004997 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004998 dec_rs_pending(device);
4999 atomic_add(blksize >> 9, &device->rs_sect_in);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005000
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005001 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005002}
5003
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01005004static int
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005005validate_req_change_req_state(struct drbd_device *device, u64 id, sector_t sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01005006 struct rb_root *root, const char *func,
5007 enum drbd_req_event what, bool missing_ok)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005008{
5009 struct drbd_request *req;
5010 struct bio_and_error m;
5011
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005012 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005013 req = find_request(device, root, id, sector, missing_ok, func);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005014 if (unlikely(!req)) {
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005015 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005016 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005017 }
5018 __req_mod(req, what, &m);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005019 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005020
5021 if (m.bio)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005022 complete_master_bio(device, &m);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005023 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005024}
5025
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005026static int got_BlockAck(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005027{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005028 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005029 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005030 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005031 sector_t sector = be64_to_cpu(p->sector);
5032 int blksize = be32_to_cpu(p->blksize);
5033 enum drbd_req_event what;
5034
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005035 peer_device = conn_peer_device(connection, pi->vnr);
5036 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005037 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005038 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005039
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005040 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005041
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01005042 if (p->block_id == ID_SYNCER) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005043 drbd_set_in_sync(device, sector, blksize);
5044 dec_rs_pending(device);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005045 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005046 }
Andreas Gruenbachere05e1e52011-03-25 15:16:26 +01005047 switch (pi->cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07005048 case P_RS_WRITE_ACK:
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01005049 what = WRITE_ACKED_BY_PEER_AND_SIS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005050 break;
5051 case P_WRITE_ACK:
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01005052 what = WRITE_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005053 break;
5054 case P_RECV_ACK:
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01005055 what = RECV_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005056 break;
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02005057 case P_SUPERSEDED:
5058 what = CONFLICT_RESOLVED;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01005059 break;
5060 case P_RETRY_WRITE:
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01005061 what = POSTPONE_WRITE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005062 break;
5063 default:
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005064 BUG();
Philipp Reisnerb411b362009-09-25 16:07:19 -07005065 }
5066
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005067 return validate_req_change_req_state(device, p->block_id, sector,
5068 &device->write_requests, __func__,
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005069 what, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005070}
5071
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005072static int got_NegAck(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005073{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005074 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005075 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005076 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005077 sector_t sector = be64_to_cpu(p->sector);
Philipp Reisner2deb8332011-01-17 18:39:18 +01005078 int size = be32_to_cpu(p->blksize);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005079 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005080
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005081 peer_device = conn_peer_device(connection, pi->vnr);
5082 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005083 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005084 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005085
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005086 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005087
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01005088 if (p->block_id == ID_SYNCER) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005089 dec_rs_pending(device);
5090 drbd_rs_failed_io(device, sector, size);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005091 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005092 }
Philipp Reisner2deb8332011-01-17 18:39:18 +01005093
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005094 err = validate_req_change_req_state(device, p->block_id, sector,
5095 &device->write_requests, __func__,
Philipp Reisner303d1442011-04-13 16:24:47 -07005096 NEG_ACKED, true);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005097 if (err) {
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01005098 /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs.
5099 The master bio might already be completed, therefore the
5100 request is no longer in the collision hash. */
5101 /* In Protocol B we might already have got a P_RECV_ACK
5102 but then get a P_NEG_ACK afterwards. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005103 drbd_set_out_of_sync(device, sector, size);
Philipp Reisner2deb8332011-01-17 18:39:18 +01005104 }
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005105 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005106}
5107
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005108static int got_NegDReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005109{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005110 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005111 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005112 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005113 sector_t sector = be64_to_cpu(p->sector);
5114
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005115 peer_device = conn_peer_device(connection, pi->vnr);
5116 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005117 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005118 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005119
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005120 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01005121
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02005122 drbd_err(device, "Got NegDReply; Sector %llus, len %u.\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07005123 (unsigned long long)sector, be32_to_cpu(p->blksize));
5124
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005125 return validate_req_change_req_state(device, p->block_id, sector,
5126 &device->read_requests, __func__,
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005127 NEG_ACKED, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005128}
5129
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005130static int got_NegRSDReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005131{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005132 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005133 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005134 sector_t sector;
5135 int size;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005136 struct p_block_ack *p = pi->data;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005137
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005138 peer_device = conn_peer_device(connection, pi->vnr);
5139 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005140 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005141 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005142
5143 sector = be64_to_cpu(p->sector);
5144 size = be32_to_cpu(p->blksize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005145
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005146 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005147
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005148 dec_rs_pending(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005149
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005150 if (get_ldev_if_state(device, D_FAILED)) {
5151 drbd_rs_complete_io(device, sector);
Andreas Gruenbachere05e1e52011-03-25 15:16:26 +01005152 switch (pi->cmd) {
Philipp Reisnerd612d302010-12-27 10:53:28 +01005153 case P_NEG_RS_DREPLY:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005154 drbd_rs_failed_io(device, sector, size);
Philipp Reisnerd612d302010-12-27 10:53:28 +01005155 case P_RS_CANCEL:
5156 break;
5157 default:
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005158 BUG();
Philipp Reisnerd612d302010-12-27 10:53:28 +01005159 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005160 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005161 }
5162
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005163 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005164}
5165
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005166static int got_BarrierAck(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005167{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005168 struct p_barrier_ack *p = pi->data;
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005169 struct drbd_peer_device *peer_device;
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005170 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005171
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005172 tl_release(connection, p->barrier, be32_to_cpu(p->set_size));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005173
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005174 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005175 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5176 struct drbd_device *device = peer_device->device;
5177
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005178 if (device->state.conn == C_AHEAD &&
5179 atomic_read(&device->ap_in_flight) == 0 &&
5180 !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &device->flags)) {
5181 device->start_resync_timer.expires = jiffies + HZ;
5182 add_timer(&device->start_resync_timer);
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005183 }
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02005184 }
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005185 rcu_read_unlock();
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02005186
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005187 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005188}
5189
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005190static int got_OVResult(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005191{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005192 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005193 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005194 struct p_block_ack *p = pi->data;
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02005195 struct drbd_device_work *dw;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005196 sector_t sector;
5197 int size;
5198
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005199 peer_device = conn_peer_device(connection, pi->vnr);
5200 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005201 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005202 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005203
Philipp Reisnerb411b362009-09-25 16:07:19 -07005204 sector = be64_to_cpu(p->sector);
5205 size = be32_to_cpu(p->blksize);
5206
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005207 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005208
5209 if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005210 drbd_ov_out_of_sync_found(device, sector, size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005211 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005212 ov_out_of_sync_print(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005213
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005214 if (!get_ldev(device))
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005215 return 0;
Lars Ellenberg1d53f092010-09-05 01:13:24 +02005216
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005217 drbd_rs_complete_io(device, sector);
5218 dec_rs_pending(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005219
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005220 --device->ov_left;
Lars Ellenbergea5442a2010-11-05 09:48:01 +01005221
5222 /* let's advance progress step marks only for every other megabyte */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005223 if ((device->ov_left & 0x200) == 0x200)
5224 drbd_advance_rs_marks(device, device->ov_left);
Lars Ellenbergea5442a2010-11-05 09:48:01 +01005225
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005226 if (device->ov_left == 0) {
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02005227 dw = kmalloc(sizeof(*dw), GFP_NOIO);
5228 if (dw) {
5229 dw->w.cb = w_ov_finished;
5230 dw->device = device;
5231 drbd_queue_work(&peer_device->connection->sender_work, &dw->w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005232 } else {
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02005233 drbd_err(device, "kmalloc(dw) failed.");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005234 ov_out_of_sync_print(device);
5235 drbd_resync_finished(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005236 }
5237 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005238 put_ldev(device);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005239 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005240}
5241
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005242static int got_skip(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisner0ced55a2010-04-30 15:26:20 +02005243{
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005244 return 0;
Philipp Reisner0ced55a2010-04-30 15:26:20 +02005245}
5246
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005247static int connection_finish_peer_reqs(struct drbd_connection *connection)
Philipp Reisner32862ec2011-02-08 16:41:01 +01005248{
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005249 struct drbd_peer_device *peer_device;
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005250 int vnr, not_empty = 0;
Philipp Reisner32862ec2011-02-08 16:41:01 +01005251
5252 do {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005253 clear_bit(SIGNAL_ASENDER, &connection->flags);
Philipp Reisner32862ec2011-02-08 16:41:01 +01005254 flush_signals(current);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005255
5256 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005257 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5258 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005259 kref_get(&device->kref);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005260 rcu_read_unlock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005261 if (drbd_finish_peer_reqs(device)) {
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02005262 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005263 return 1;
Philipp Reisnerd3fcb492011-04-13 14:46:05 -07005264 }
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02005265 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005266 rcu_read_lock();
Philipp Reisner082a3432011-03-15 16:05:42 +01005267 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005268 set_bit(SIGNAL_ASENDER, &connection->flags);
Philipp Reisner082a3432011-03-15 16:05:42 +01005269
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005270 spin_lock_irq(&connection->resource->req_lock);
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005271 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5272 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005273 not_empty = !list_empty(&device->done_ee);
Philipp Reisner082a3432011-03-15 16:05:42 +01005274 if (not_empty)
5275 break;
5276 }
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005277 spin_unlock_irq(&connection->resource->req_lock);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005278 rcu_read_unlock();
Philipp Reisner32862ec2011-02-08 16:41:01 +01005279 } while (not_empty);
5280
5281 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005282}
5283
5284struct asender_cmd {
5285 size_t pkt_size;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005286 int (*fn)(struct drbd_connection *connection, struct packet_info *);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005287};
5288
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01005289static struct asender_cmd asender_tbl[] = {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005290 [P_PING] = { 0, got_Ping },
5291 [P_PING_ACK] = { 0, got_PingAck },
Philipp Reisnerb411b362009-09-25 16:07:19 -07005292 [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
5293 [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
5294 [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02005295 [P_SUPERSEDED] = { sizeof(struct p_block_ack), got_BlockAck },
Philipp Reisnerb411b362009-09-25 16:07:19 -07005296 [P_NEG_ACK] = { sizeof(struct p_block_ack), got_NegAck },
5297 [P_NEG_DREPLY] = { sizeof(struct p_block_ack), got_NegDReply },
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005298 [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), got_NegRSDReply },
Philipp Reisnerb411b362009-09-25 16:07:19 -07005299 [P_OV_RESULT] = { sizeof(struct p_block_ack), got_OVResult },
5300 [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck },
5301 [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
5302 [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync },
Philipp Reisner02918be2010-08-20 14:35:10 +02005303 [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip },
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005304 [P_RS_CANCEL] = { sizeof(struct p_block_ack), got_NegRSDReply },
5305 [P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), got_conn_RqSReply },
5306 [P_RETRY_WRITE] = { sizeof(struct p_block_ack), got_BlockAck },
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01005307};
Philipp Reisnerb411b362009-09-25 16:07:19 -07005308
5309int drbd_asender(struct drbd_thread *thi)
5310{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005311 struct drbd_connection *connection = thi->connection;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005312 struct asender_cmd *cmd = NULL;
Philipp Reisner77351055b2011-02-07 17:24:26 +01005313 struct packet_info pi;
Philipp Reisner257d0af2011-01-26 12:15:29 +01005314 int rv;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005315 void *buf = connection->meta.rbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005316 int received = 0;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005317 unsigned int header_size = drbd_header_size(connection);
Andreas Gruenbacher52b061a2011-03-30 11:38:49 +02005318 int expect = header_size;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005319 bool ping_timeout_active = false;
5320 struct net_conf *nc;
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005321 int ping_timeo, tcp_cork, ping_int;
Philipp Reisner3990e042013-03-27 14:08:48 +01005322 struct sched_param param = { .sched_priority = 2 };
Philipp Reisnerb411b362009-09-25 16:07:19 -07005323
Philipp Reisner3990e042013-03-27 14:08:48 +01005324 rv = sched_setscheduler(current, SCHED_RR, &param);
5325 if (rv < 0)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005326 drbd_err(connection, "drbd_asender: ERROR set priority, ret=%d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005327
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +01005328 while (get_t_state(thi) == RUNNING) {
Philipp Reisner80822282011-02-08 12:46:30 +01005329 drbd_thread_current_set_cpu(thi);
Philipp Reisner44ed1672011-04-19 17:10:19 +02005330
5331 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005332 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +02005333 ping_timeo = nc->ping_timeo;
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005334 tcp_cork = nc->tcp_cork;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005335 ping_int = nc->ping_int;
5336 rcu_read_unlock();
5337
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005338 if (test_and_clear_bit(SEND_PING, &connection->flags)) {
5339 if (drbd_send_ping(connection)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005340 drbd_err(connection, "drbd_send_ping has failed\n");
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01005341 goto reconnect;
5342 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005343 connection->meta.socket->sk->sk_rcvtimeo = ping_timeo * HZ / 10;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005344 ping_timeout_active = true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005345 }
5346
Philipp Reisner32862ec2011-02-08 16:41:01 +01005347 /* TODO: conditionally cork; it may hurt latency if we cork without
5348 much to send */
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005349 if (tcp_cork)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005350 drbd_tcp_cork(connection->meta.socket);
5351 if (connection_finish_peer_reqs(connection)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005352 drbd_err(connection, "connection_finish_peer_reqs() failed\n");
Philipp Reisner32862ec2011-02-08 16:41:01 +01005353 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005354 }
5355 /* but unconditionally uncork unless disabled */
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005356 if (tcp_cork)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005357 drbd_tcp_uncork(connection->meta.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005358
5359 /* short circuit, recv_msg would return EINTR anyways. */
5360 if (signal_pending(current))
5361 continue;
5362
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005363 rv = drbd_recv_short(connection->meta.socket, buf, expect-received, 0);
5364 clear_bit(SIGNAL_ASENDER, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005365
5366 flush_signals(current);
5367
5368 /* Note:
5369 * -EINTR (on meta) we got a signal
5370 * -EAGAIN (on meta) rcvtimeo expired
5371 * -ECONNRESET other side closed the connection
5372 * -ERESTARTSYS (on data) we got a signal
5373 * rv < 0 other than above: unexpected error!
5374 * rv == expected: full header or command
5375 * rv < expected: "woken" by signal during receive
5376 * rv == 0 : "connection shut down by peer"
5377 */
5378 if (likely(rv > 0)) {
5379 received += rv;
5380 buf += rv;
5381 } else if (rv == 0) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005382 if (test_bit(DISCONNECT_SENT, &connection->flags)) {
Philipp Reisnerb66623e2012-08-08 21:19:09 +02005383 long t;
5384 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005385 t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
Philipp Reisnerb66623e2012-08-08 21:19:09 +02005386 rcu_read_unlock();
5387
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005388 t = wait_event_timeout(connection->ping_wait,
5389 connection->cstate < C_WF_REPORT_PARAMS,
Philipp Reisnerb66623e2012-08-08 21:19:09 +02005390 t);
Philipp Reisner599377a2012-08-17 14:50:22 +02005391 if (t)
5392 break;
5393 }
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005394 drbd_err(connection, "meta connection shut down by peer.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005395 goto reconnect;
5396 } else if (rv == -EAGAIN) {
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02005397 /* If the data socket received something meanwhile,
5398 * that is good enough: peer is still alive. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005399 if (time_after(connection->last_received,
5400 jiffies - connection->meta.socket->sk->sk_rcvtimeo))
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02005401 continue;
Lars Ellenbergf36af182011-03-09 22:44:55 +01005402 if (ping_timeout_active) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005403 drbd_err(connection, "PingAck did not arrive in time.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005404 goto reconnect;
5405 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005406 set_bit(SEND_PING, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005407 continue;
5408 } else if (rv == -EINTR) {
5409 continue;
5410 } else {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005411 drbd_err(connection, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005412 goto reconnect;
5413 }
5414
5415 if (received == expect && cmd == NULL) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005416 if (decode_header(connection, connection->meta.rbuf, &pi))
Philipp Reisnerb411b362009-09-25 16:07:19 -07005417 goto reconnect;
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01005418 cmd = &asender_tbl[pi.cmd];
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005419 if (pi.cmd >= ARRAY_SIZE(asender_tbl) || !cmd->fn) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005420 drbd_err(connection, "Unexpected meta packet %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02005421 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005422 goto disconnect;
5423 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005424 expect = header_size + cmd->pkt_size;
Andreas Gruenbacher52b061a2011-03-30 11:38:49 +02005425 if (pi.size != expect - header_size) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005426 drbd_err(connection, "Wrong packet size on meta (c: %d, l: %d)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01005427 pi.cmd, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005428 goto reconnect;
Philipp Reisner257d0af2011-01-26 12:15:29 +01005429 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005430 }
5431 if (received == expect) {
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005432 bool err;
Philipp Reisnera4fbda82011-03-16 11:13:17 +01005433
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005434 err = cmd->fn(connection, &pi);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005435 if (err) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005436 drbd_err(connection, "%pf failed\n", cmd->fn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005437 goto reconnect;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005438 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005439
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005440 connection->last_received = jiffies;
Lars Ellenbergf36af182011-03-09 22:44:55 +01005441
Philipp Reisner44ed1672011-04-19 17:10:19 +02005442 if (cmd == &asender_tbl[P_PING_ACK]) {
5443 /* restore idle timeout */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005444 connection->meta.socket->sk->sk_rcvtimeo = ping_int * HZ;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005445 ping_timeout_active = false;
5446 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005447
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005448 buf = connection->meta.rbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005449 received = 0;
Andreas Gruenbacher52b061a2011-03-30 11:38:49 +02005450 expect = header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005451 cmd = NULL;
5452 }
5453 }
5454
5455 if (0) {
5456reconnect:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005457 conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
5458 conn_md_sync(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005459 }
5460 if (0) {
5461disconnect:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005462 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005463 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005464 clear_bit(SIGNAL_ASENDER, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005465
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005466 drbd_info(connection, "asender terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005467
5468 return 0;
5469}