blob: 68e3992e88381cd4974ebfa2da3400708ab4afa0 [file] [log] [blame]
Philipp Reisnerb411b362009-09-25 16:07:19 -07001/*
2 drbd_receiver.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23 */
24
25
Philipp Reisnerb411b362009-09-25 16:07:19 -070026#include <linux/module.h>
27
28#include <asm/uaccess.h>
29#include <net/sock.h>
30
Philipp Reisnerb411b362009-09-25 16:07:19 -070031#include <linux/drbd.h>
32#include <linux/fs.h>
33#include <linux/file.h>
34#include <linux/in.h>
35#include <linux/mm.h>
36#include <linux/memcontrol.h>
37#include <linux/mm_inline.h>
38#include <linux/slab.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070039#include <linux/pkt_sched.h>
40#define __KERNEL_SYSCALLS__
41#include <linux/unistd.h>
42#include <linux/vmalloc.h>
43#include <linux/random.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070044#include <linux/string.h>
45#include <linux/scatterlist.h>
46#include "drbd_int.h"
Andreas Gruenbachera3603a62011-05-30 11:47:37 +020047#include "drbd_protocol.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070048#include "drbd_req.h"
49
50#include "drbd_vli.h"
51
Philipp Reisner77351055b2011-02-07 17:24:26 +010052struct packet_info {
53 enum drbd_packet cmd;
Andreas Gruenbachere2857212011-03-25 00:57:38 +010054 unsigned int size;
55 unsigned int vnr;
Andreas Gruenbachere6589832011-03-30 12:54:42 +020056 void *data;
Philipp Reisner77351055b2011-02-07 17:24:26 +010057};
58
Philipp Reisnerb411b362009-09-25 16:07:19 -070059enum finish_epoch {
60 FE_STILL_LIVE,
61 FE_DESTROYED,
62 FE_RECYCLED,
63};
64
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +020065static int drbd_do_features(struct drbd_connection *connection);
66static int drbd_do_auth(struct drbd_connection *connection);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +020067static int drbd_disconnected(struct drbd_peer_device *);
Philipp Reisnerb411b362009-09-25 16:07:19 -070068
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +020069static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *, struct drbd_epoch *, enum epoch_event);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +010070static int e_end_block(struct drbd_work *, int);
Philipp Reisnerb411b362009-09-25 16:07:19 -070071
Philipp Reisnerb411b362009-09-25 16:07:19 -070072
73#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
74
Lars Ellenberg45bb9122010-05-14 17:10:48 +020075/*
76 * some helper functions to deal with single linked page lists,
77 * page->private being our "next" pointer.
78 */
79
80/* If at least n pages are linked at head, get n pages off.
81 * Otherwise, don't modify head, and return NULL.
82 * Locking is the responsibility of the caller.
83 */
84static struct page *page_chain_del(struct page **head, int n)
85{
86 struct page *page;
87 struct page *tmp;
88
89 BUG_ON(!n);
90 BUG_ON(!head);
91
92 page = *head;
Philipp Reisner23ce4222010-05-20 13:35:31 +020093
94 if (!page)
95 return NULL;
96
Lars Ellenberg45bb9122010-05-14 17:10:48 +020097 while (page) {
98 tmp = page_chain_next(page);
99 if (--n == 0)
100 break; /* found sufficient pages */
101 if (tmp == NULL)
102 /* insufficient pages, don't use any of them. */
103 return NULL;
104 page = tmp;
105 }
106
107 /* add end of list marker for the returned list */
108 set_page_private(page, 0);
109 /* actual return value, and adjustment of head */
110 page = *head;
111 *head = tmp;
112 return page;
113}
114
115/* may be used outside of locks to find the tail of a (usually short)
116 * "private" page chain, before adding it back to a global chain head
117 * with page_chain_add() under a spinlock. */
118static struct page *page_chain_tail(struct page *page, int *len)
119{
120 struct page *tmp;
121 int i = 1;
122 while ((tmp = page_chain_next(page)))
123 ++i, page = tmp;
124 if (len)
125 *len = i;
126 return page;
127}
128
129static int page_chain_free(struct page *page)
130{
131 struct page *tmp;
132 int i = 0;
133 page_chain_for_each_safe(page, tmp) {
134 put_page(page);
135 ++i;
136 }
137 return i;
138}
139
140static void page_chain_add(struct page **head,
141 struct page *chain_first, struct page *chain_last)
142{
143#if 1
144 struct page *tmp;
145 tmp = page_chain_tail(chain_first, NULL);
146 BUG_ON(tmp != chain_last);
147#endif
148
149 /* add chain to head */
150 set_page_private(chain_last, (unsigned long)*head);
151 *head = chain_first;
152}
153
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200154static struct page *__drbd_alloc_pages(struct drbd_device *device,
Andreas Gruenbacher18c2d522011-04-07 21:08:50 +0200155 unsigned int number)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700156{
157 struct page *page = NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200158 struct page *tmp = NULL;
Andreas Gruenbacher18c2d522011-04-07 21:08:50 +0200159 unsigned int i = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700160
161 /* Yes, testing drbd_pp_vacant outside the lock is racy.
162 * So what. It saves a spin_lock. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200163 if (drbd_pp_vacant >= number) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700164 spin_lock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200165 page = page_chain_del(&drbd_pp_pool, number);
166 if (page)
167 drbd_pp_vacant -= number;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700168 spin_unlock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200169 if (page)
170 return page;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700171 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200172
Philipp Reisnerb411b362009-09-25 16:07:19 -0700173 /* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD
174 * "criss-cross" setup, that might cause write-out on some other DRBD,
175 * which in turn might block on the other node at this very place. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200176 for (i = 0; i < number; i++) {
177 tmp = alloc_page(GFP_TRY);
178 if (!tmp)
179 break;
180 set_page_private(tmp, (unsigned long)page);
181 page = tmp;
182 }
183
184 if (i == number)
185 return page;
186
187 /* Not enough pages immediately available this time.
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200188 * No need to jump around here, drbd_alloc_pages will retry this
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200189 * function "soon". */
190 if (page) {
191 tmp = page_chain_tail(page, NULL);
192 spin_lock(&drbd_pp_lock);
193 page_chain_add(&drbd_pp_pool, page, tmp);
194 drbd_pp_vacant += i;
195 spin_unlock(&drbd_pp_lock);
196 }
197 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700198}
199
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200200static void reclaim_finished_net_peer_reqs(struct drbd_device *device,
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200201 struct list_head *to_be_freed)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700202{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200203 struct drbd_peer_request *peer_req, *tmp;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700204
205 /* The EEs are always appended to the end of the list. Since
206 they are sent in order over the wire, they have to finish
207 in order. As soon as we see the first not finished we can
208 stop to examine the list... */
209
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200210 list_for_each_entry_safe(peer_req, tmp, &device->net_ee, w.list) {
Andreas Gruenbacher045417f2011-04-07 21:34:24 +0200211 if (drbd_peer_req_has_active_page(peer_req))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700212 break;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200213 list_move(&peer_req->w.list, to_be_freed);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700214 }
215}
216
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200217static void drbd_kick_lo_and_reclaim_net(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700218{
219 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100220 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700221
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200222 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200223 reclaim_finished_net_peer_reqs(device, &reclaimed);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200224 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700225
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200226 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200227 drbd_free_net_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700228}
229
230/**
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200231 * drbd_alloc_pages() - Returns @number pages, retries forever (or until signalled)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200232 * @device: DRBD device.
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200233 * @number: number of pages requested
234 * @retry: whether to retry, if not enough pages are available right now
Philipp Reisnerb411b362009-09-25 16:07:19 -0700235 *
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200236 * Tries to allocate number pages, first from our own page pool, then from
237 * the kernel, unless this allocation would exceed the max_buffers setting.
238 * Possibly retry until DRBD frees sufficient pages somewhere else.
239 *
240 * Returns a page chain linked via page->private.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700241 */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200242struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int number,
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200243 bool retry)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700244{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200245 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700246 struct page *page = NULL;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200247 struct net_conf *nc;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700248 DEFINE_WAIT(wait);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200249 int mxb;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700250
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200251 /* Yes, we may run up to @number over max_buffers. If we
252 * follow it strictly, the admin will get it wrong anyways. */
Philipp Reisner44ed1672011-04-19 17:10:19 +0200253 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200254 nc = rcu_dereference(peer_device->connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200255 mxb = nc ? nc->max_buffers : 1000000;
256 rcu_read_unlock();
257
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200258 if (atomic_read(&device->pp_in_use) < mxb)
259 page = __drbd_alloc_pages(device, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700260
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200261 while (page == NULL) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700262 prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
263
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200264 drbd_kick_lo_and_reclaim_net(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700265
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200266 if (atomic_read(&device->pp_in_use) < mxb) {
267 page = __drbd_alloc_pages(device, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700268 if (page)
269 break;
270 }
271
272 if (!retry)
273 break;
274
275 if (signal_pending(current)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200276 drbd_warn(device, "drbd_alloc_pages interrupted!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700277 break;
278 }
279
280 schedule();
281 }
282 finish_wait(&drbd_pp_wait, &wait);
283
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200284 if (page)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200285 atomic_add(number, &device->pp_in_use);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700286 return page;
287}
288
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200289/* Must not be used from irq, as that may deadlock: see drbd_alloc_pages.
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200290 * Is also used from inside an other spin_lock_irq(&resource->req_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200291 * Either links the page chain back to the global pool,
292 * or returns all pages to the system. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200293static void drbd_free_pages(struct drbd_device *device, struct page *page, int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700294{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200295 atomic_t *a = is_net ? &device->pp_in_use_by_net : &device->pp_in_use;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700296 int i;
Lars Ellenberg435f0742010-09-06 12:30:25 +0200297
Lars Ellenberga73ff322012-06-25 19:15:38 +0200298 if (page == NULL)
299 return;
300
Philipp Reisner81a5d602011-02-22 19:53:16 -0500301 if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200302 i = page_chain_free(page);
303 else {
304 struct page *tmp;
305 tmp = page_chain_tail(page, &i);
306 spin_lock(&drbd_pp_lock);
307 page_chain_add(&drbd_pp_pool, page, tmp);
308 drbd_pp_vacant += i;
309 spin_unlock(&drbd_pp_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700310 }
Lars Ellenberg435f0742010-09-06 12:30:25 +0200311 i = atomic_sub_return(i, a);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200312 if (i < 0)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200313 drbd_warn(device, "ASSERTION FAILED: %s: %d < 0\n",
Lars Ellenberg435f0742010-09-06 12:30:25 +0200314 is_net ? "pp_in_use_by_net" : "pp_in_use", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700315 wake_up(&drbd_pp_wait);
316}
317
318/*
319You need to hold the req_lock:
320 _drbd_wait_ee_list_empty()
321
322You must not have the req_lock:
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +0200323 drbd_free_peer_req()
Andreas Gruenbacher0db55362011-04-06 16:09:15 +0200324 drbd_alloc_peer_req()
Andreas Gruenbacher7721f562011-04-06 17:14:02 +0200325 drbd_free_peer_reqs()
Philipp Reisnerb411b362009-09-25 16:07:19 -0700326 drbd_ee_fix_bhs()
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200327 drbd_finish_peer_reqs()
Philipp Reisnerb411b362009-09-25 16:07:19 -0700328 drbd_clear_done_ee()
329 drbd_wait_ee_list_empty()
330*/
331
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100332struct drbd_peer_request *
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200333drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
Andreas Gruenbacher0db55362011-04-06 16:09:15 +0200334 unsigned int data_size, gfp_t gfp_mask) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700335{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200336 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100337 struct drbd_peer_request *peer_req;
Lars Ellenberga73ff322012-06-25 19:15:38 +0200338 struct page *page = NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200339 unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700340
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200341 if (drbd_insert_fault(device, DRBD_FAULT_AL_EE))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700342 return NULL;
343
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100344 peer_req = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
345 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700346 if (!(gfp_mask & __GFP_NOWARN))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200347 drbd_err(device, "%s: allocation failed\n", __func__);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700348 return NULL;
349 }
350
Lars Ellenberga73ff322012-06-25 19:15:38 +0200351 if (data_size) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200352 page = drbd_alloc_pages(peer_device, nr_pages, (gfp_mask & __GFP_WAIT));
Lars Ellenberga73ff322012-06-25 19:15:38 +0200353 if (!page)
354 goto fail;
355 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700356
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100357 drbd_clear_interval(&peer_req->i);
358 peer_req->i.size = data_size;
359 peer_req->i.sector = sector;
360 peer_req->i.local = false;
361 peer_req->i.waiting = false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700362
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100363 peer_req->epoch = NULL;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200364 peer_req->peer_device = peer_device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100365 peer_req->pages = page;
366 atomic_set(&peer_req->pending_bios, 0);
367 peer_req->flags = 0;
Andreas Gruenbacher9a8e7752011-01-11 14:04:09 +0100368 /*
369 * The block_id is opaque to the receiver. It is not endianness
370 * converted, and sent back to the sender unchanged.
371 */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100372 peer_req->block_id = id;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700373
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100374 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700375
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200376 fail:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100377 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700378 return NULL;
379}
380
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200381void __drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request *peer_req,
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100382 int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700383{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100384 if (peer_req->flags & EE_HAS_DIGEST)
385 kfree(peer_req->digest);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200386 drbd_free_pages(device, peer_req->pages, is_net);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +0200387 D_ASSERT(device, atomic_read(&peer_req->pending_bios) == 0);
388 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100389 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700390}
391
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200392int drbd_free_peer_reqs(struct drbd_device *device, struct list_head *list)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700393{
394 LIST_HEAD(work_list);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100395 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700396 int count = 0;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200397 int is_net = list == &device->net_ee;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700398
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200399 spin_lock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700400 list_splice_init(list, &work_list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200401 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700402
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200403 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200404 __drbd_free_peer_req(device, peer_req, is_net);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700405 count++;
406 }
407 return count;
408}
409
Philipp Reisnerb411b362009-09-25 16:07:19 -0700410/*
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200411 * See also comments in _req_mod(,BARRIER_ACKED) and receive_Barrier.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700412 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200413static int drbd_finish_peer_reqs(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700414{
415 LIST_HEAD(work_list);
416 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100417 struct drbd_peer_request *peer_req, *t;
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100418 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700419
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200420 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200421 reclaim_finished_net_peer_reqs(device, &reclaimed);
422 list_splice_init(&device->done_ee, &work_list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200423 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700424
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200425 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200426 drbd_free_net_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700427
428 /* possible callbacks here:
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +0200429 * e_end_block, and e_end_resync_block, e_send_superseded.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700430 * all ignore the last argument.
431 */
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200432 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100433 int err2;
434
Philipp Reisnerb411b362009-09-25 16:07:19 -0700435 /* list_del not necessary, next/prev members not touched */
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200436 err2 = peer_req->w.cb(&peer_req->w, !!err);
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100437 if (!err)
438 err = err2;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200439 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700440 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200441 wake_up(&device->ee_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700442
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100443 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700444}
445
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200446static void _drbd_wait_ee_list_empty(struct drbd_device *device,
Andreas Gruenbacherd4da1532011-04-07 00:06:56 +0200447 struct list_head *head)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700448{
449 DEFINE_WAIT(wait);
450
451 /* avoids spin_lock/unlock
452 * and calling prepare_to_wait in the fast path */
453 while (!list_empty(head)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200454 prepare_to_wait(&device->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200455 spin_unlock_irq(&device->resource->req_lock);
Jens Axboe7eaceac2011-03-10 08:52:07 +0100456 io_schedule();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200457 finish_wait(&device->ee_wait, &wait);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200458 spin_lock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700459 }
460}
461
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200462static void drbd_wait_ee_list_empty(struct drbd_device *device,
Andreas Gruenbacherd4da1532011-04-07 00:06:56 +0200463 struct list_head *head)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700464{
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200465 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200466 _drbd_wait_ee_list_empty(device, head);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200467 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700468}
469
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100470static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700471{
Philipp Reisnerb411b362009-09-25 16:07:19 -0700472 struct kvec iov = {
473 .iov_base = buf,
474 .iov_len = size,
475 };
476 struct msghdr msg = {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700477 .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
478 };
Al Virof730c842014-02-08 21:07:38 -0500479 return kernel_recvmsg(sock, &msg, &iov, 1, size, msg.msg_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700480}
481
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200482static int drbd_recv(struct drbd_connection *connection, void *buf, size_t size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700483{
Philipp Reisnerb411b362009-09-25 16:07:19 -0700484 int rv;
485
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200486 rv = drbd_recv_short(connection->data.socket, buf, size, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700487
Philipp Reisnerdbd08202012-08-17 16:55:47 +0200488 if (rv < 0) {
489 if (rv == -ECONNRESET)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200490 drbd_info(connection, "sock was reset by peer\n");
Philipp Reisnerdbd08202012-08-17 16:55:47 +0200491 else if (rv != -ERESTARTSYS)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200492 drbd_err(connection, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerdbd08202012-08-17 16:55:47 +0200493 } else if (rv == 0) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200494 if (test_bit(DISCONNECT_SENT, &connection->flags)) {
Philipp Reisnerb66623e2012-08-08 21:19:09 +0200495 long t;
496 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200497 t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
Philipp Reisnerb66623e2012-08-08 21:19:09 +0200498 rcu_read_unlock();
499
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200500 t = wait_event_timeout(connection->ping_wait, connection->cstate < C_WF_REPORT_PARAMS, t);
Philipp Reisnerb66623e2012-08-08 21:19:09 +0200501
Philipp Reisner599377a2012-08-17 14:50:22 +0200502 if (t)
503 goto out;
504 }
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200505 drbd_info(connection, "sock was shut down by peer\n");
Philipp Reisner599377a2012-08-17 14:50:22 +0200506 }
507
Philipp Reisnerb411b362009-09-25 16:07:19 -0700508 if (rv != size)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200509 conn_request_state(connection, NS(conn, C_BROKEN_PIPE), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700510
Philipp Reisner599377a2012-08-17 14:50:22 +0200511out:
Philipp Reisnerb411b362009-09-25 16:07:19 -0700512 return rv;
513}
514
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200515static int drbd_recv_all(struct drbd_connection *connection, void *buf, size_t size)
Andreas Gruenbacherc6967742011-03-17 17:15:20 +0100516{
517 int err;
518
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200519 err = drbd_recv(connection, buf, size);
Andreas Gruenbacherc6967742011-03-17 17:15:20 +0100520 if (err != size) {
521 if (err >= 0)
522 err = -EIO;
523 } else
524 err = 0;
525 return err;
526}
527
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200528static int drbd_recv_all_warn(struct drbd_connection *connection, void *buf, size_t size)
Andreas Gruenbachera5c31902011-03-24 03:28:04 +0100529{
530 int err;
531
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200532 err = drbd_recv_all(connection, buf, size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +0100533 if (err && !signal_pending(current))
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200534 drbd_warn(connection, "short read (expected size %d)\n", (int)size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +0100535 return err;
536}
537
Lars Ellenberg5dbf1672010-05-25 16:18:01 +0200538/* quoting tcp(7):
539 * On individual connections, the socket buffer size must be set prior to the
540 * listen(2) or connect(2) calls in order to have it take effect.
541 * This is our wrapper to do so.
542 */
543static void drbd_setbufsize(struct socket *sock, unsigned int snd,
544 unsigned int rcv)
545{
546 /* open coded SO_SNDBUF, SO_RCVBUF */
547 if (snd) {
548 sock->sk->sk_sndbuf = snd;
549 sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
550 }
551 if (rcv) {
552 sock->sk->sk_rcvbuf = rcv;
553 sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
554 }
555}
556
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200557static struct socket *drbd_try_connect(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700558{
559 const char *what;
560 struct socket *sock;
561 struct sockaddr_in6 src_in6;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200562 struct sockaddr_in6 peer_in6;
563 struct net_conf *nc;
564 int err, peer_addr_len, my_addr_len;
Andreas Gruenbacher69ef82d2011-05-11 14:34:35 +0200565 int sndbuf_size, rcvbuf_size, connect_int;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700566 int disconnect_on_error = 1;
567
Philipp Reisner44ed1672011-04-19 17:10:19 +0200568 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200569 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200570 if (!nc) {
571 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700572 return NULL;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200573 }
Philipp Reisner44ed1672011-04-19 17:10:19 +0200574 sndbuf_size = nc->sndbuf_size;
575 rcvbuf_size = nc->rcvbuf_size;
Andreas Gruenbacher69ef82d2011-05-11 14:34:35 +0200576 connect_int = nc->connect_int;
Andreas Gruenbacher089c0752011-06-14 18:28:09 +0200577 rcu_read_unlock();
Philipp Reisner44ed1672011-04-19 17:10:19 +0200578
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200579 my_addr_len = min_t(int, connection->my_addr_len, sizeof(src_in6));
580 memcpy(&src_in6, &connection->my_addr, my_addr_len);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200581
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200582 if (((struct sockaddr *)&connection->my_addr)->sa_family == AF_INET6)
Philipp Reisner44ed1672011-04-19 17:10:19 +0200583 src_in6.sin6_port = 0;
584 else
585 ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
586
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200587 peer_addr_len = min_t(int, connection->peer_addr_len, sizeof(src_in6));
588 memcpy(&peer_in6, &connection->peer_addr, peer_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700589
590 what = "sock_create_kern";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200591 err = sock_create_kern(((struct sockaddr *)&src_in6)->sa_family,
592 SOCK_STREAM, IPPROTO_TCP, &sock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700593 if (err < 0) {
594 sock = NULL;
595 goto out;
596 }
597
598 sock->sk->sk_rcvtimeo =
Andreas Gruenbacher69ef82d2011-05-11 14:34:35 +0200599 sock->sk->sk_sndtimeo = connect_int * HZ;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200600 drbd_setbufsize(sock, sndbuf_size, rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700601
602 /* explicitly bind to the configured IP as source IP
603 * for the outgoing connections.
604 * This is needed for multihomed hosts and to be
605 * able to use lo: interfaces for drbd.
606 * Make sure to use 0 as port number, so linux selects
607 * a free one dynamically.
608 */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700609 what = "bind before connect";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200610 err = sock->ops->bind(sock, (struct sockaddr *) &src_in6, my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700611 if (err < 0)
612 goto out;
613
614 /* connect may fail, peer not yet available.
615 * stay C_WF_CONNECTION, don't go Disconnecting! */
616 disconnect_on_error = 0;
617 what = "connect";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200618 err = sock->ops->connect(sock, (struct sockaddr *) &peer_in6, peer_addr_len, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700619
620out:
621 if (err < 0) {
622 if (sock) {
623 sock_release(sock);
624 sock = NULL;
625 }
626 switch (-err) {
627 /* timeout, busy, signal pending */
628 case ETIMEDOUT: case EAGAIN: case EINPROGRESS:
629 case EINTR: case ERESTARTSYS:
630 /* peer not (yet) available, network problem */
631 case ECONNREFUSED: case ENETUNREACH:
632 case EHOSTDOWN: case EHOSTUNREACH:
633 disconnect_on_error = 0;
634 break;
635 default:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200636 drbd_err(connection, "%s failed, err = %d\n", what, err);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700637 }
638 if (disconnect_on_error)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200639 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700640 }
Philipp Reisner44ed1672011-04-19 17:10:19 +0200641
Philipp Reisnerb411b362009-09-25 16:07:19 -0700642 return sock;
643}
644
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200645struct accept_wait_data {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200646 struct drbd_connection *connection;
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200647 struct socket *s_listen;
648 struct completion door_bell;
649 void (*original_sk_state_change)(struct sock *sk);
650
651};
652
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200653static void drbd_incoming_connection(struct sock *sk)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700654{
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200655 struct accept_wait_data *ad = sk->sk_user_data;
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200656 void (*state_change)(struct sock *sk);
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200657
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200658 state_change = ad->original_sk_state_change;
659 if (sk->sk_state == TCP_ESTABLISHED)
660 complete(&ad->door_bell);
661 state_change(sk);
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200662}
663
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200664static int prepare_listen_socket(struct drbd_connection *connection, struct accept_wait_data *ad)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700665{
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200666 int err, sndbuf_size, rcvbuf_size, my_addr_len;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200667 struct sockaddr_in6 my_addr;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200668 struct socket *s_listen;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200669 struct net_conf *nc;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700670 const char *what;
671
Philipp Reisner44ed1672011-04-19 17:10:19 +0200672 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200673 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200674 if (!nc) {
675 rcu_read_unlock();
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200676 return -EIO;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200677 }
Philipp Reisner44ed1672011-04-19 17:10:19 +0200678 sndbuf_size = nc->sndbuf_size;
679 rcvbuf_size = nc->rcvbuf_size;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200680 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700681
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200682 my_addr_len = min_t(int, connection->my_addr_len, sizeof(struct sockaddr_in6));
683 memcpy(&my_addr, &connection->my_addr, my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700684
685 what = "sock_create_kern";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200686 err = sock_create_kern(((struct sockaddr *)&my_addr)->sa_family,
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200687 SOCK_STREAM, IPPROTO_TCP, &s_listen);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700688 if (err) {
689 s_listen = NULL;
690 goto out;
691 }
692
Philipp Reisner98683652012-11-09 14:18:43 +0100693 s_listen->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
Philipp Reisner44ed1672011-04-19 17:10:19 +0200694 drbd_setbufsize(s_listen, sndbuf_size, rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700695
696 what = "bind before listen";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200697 err = s_listen->ops->bind(s_listen, (struct sockaddr *)&my_addr, my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700698 if (err < 0)
699 goto out;
700
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200701 ad->s_listen = s_listen;
702 write_lock_bh(&s_listen->sk->sk_callback_lock);
703 ad->original_sk_state_change = s_listen->sk->sk_state_change;
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200704 s_listen->sk->sk_state_change = drbd_incoming_connection;
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200705 s_listen->sk->sk_user_data = ad;
706 write_unlock_bh(&s_listen->sk->sk_callback_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700707
Philipp Reisner2820fd32012-07-12 10:22:48 +0200708 what = "listen";
709 err = s_listen->ops->listen(s_listen, 5);
710 if (err < 0)
711 goto out;
712
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200713 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700714out:
715 if (s_listen)
716 sock_release(s_listen);
717 if (err < 0) {
718 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200719 drbd_err(connection, "%s failed, err = %d\n", what, err);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200720 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700721 }
722 }
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200723
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200724 return -EIO;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200725}
726
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200727static void unregister_state_change(struct sock *sk, struct accept_wait_data *ad)
728{
729 write_lock_bh(&sk->sk_callback_lock);
730 sk->sk_state_change = ad->original_sk_state_change;
731 sk->sk_user_data = NULL;
732 write_unlock_bh(&sk->sk_callback_lock);
733}
734
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200735static struct socket *drbd_wait_for_connect(struct drbd_connection *connection, struct accept_wait_data *ad)
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200736{
737 int timeo, connect_int, err = 0;
738 struct socket *s_estab = NULL;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200739 struct net_conf *nc;
740
741 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200742 nc = rcu_dereference(connection->net_conf);
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200743 if (!nc) {
744 rcu_read_unlock();
745 return NULL;
746 }
747 connect_int = nc->connect_int;
748 rcu_read_unlock();
749
750 timeo = connect_int * HZ;
Akinobu Mita38b682b22013-04-29 16:21:31 -0700751 /* 28.5% random jitter */
752 timeo += (prandom_u32() & 1) ? timeo / 7 : -timeo / 7;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200753
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200754 err = wait_for_completion_interruptible_timeout(&ad->door_bell, timeo);
755 if (err <= 0)
756 return NULL;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200757
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200758 err = kernel_accept(ad->s_listen, &s_estab, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700759 if (err < 0) {
760 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200761 drbd_err(connection, "accept failed, err = %d\n", err);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200762 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700763 }
764 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700765
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200766 if (s_estab)
767 unregister_state_change(s_estab->sk, ad);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700768
769 return s_estab;
770}
771
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200772static int decode_header(struct drbd_connection *, void *, struct packet_info *);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700773
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200774static int send_first_packet(struct drbd_connection *connection, struct drbd_socket *sock,
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200775 enum drbd_packet cmd)
776{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200777 if (!conn_prepare_command(connection, sock))
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200778 return -EIO;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200779 return conn_send_command(connection, sock, cmd, 0, NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700780}
781
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200782static int receive_first_packet(struct drbd_connection *connection, struct socket *sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700783{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200784 unsigned int header_size = drbd_header_size(connection);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200785 struct packet_info pi;
786 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700787
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200788 err = drbd_recv_short(sock, connection->data.rbuf, header_size, 0);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200789 if (err != header_size) {
790 if (err >= 0)
791 err = -EIO;
792 return err;
793 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200794 err = decode_header(connection, connection->data.rbuf, &pi);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200795 if (err)
796 return err;
797 return pi.cmd;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700798}
799
800/**
801 * drbd_socket_okay() - Free the socket if its connection is not okay
Philipp Reisnerb411b362009-09-25 16:07:19 -0700802 * @sock: pointer to the pointer to the socket.
803 */
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100804static int drbd_socket_okay(struct socket **sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700805{
806 int rr;
807 char tb[4];
808
809 if (!*sock)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100810 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700811
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100812 rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700813
814 if (rr > 0 || rr == -EAGAIN) {
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100815 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700816 } else {
817 sock_release(*sock);
818 *sock = NULL;
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100819 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700820 }
821}
Philipp Reisner2325eb62011-03-15 16:56:18 +0100822/* Gets called if a connection is established, or if a new minor gets created
823 in a connection */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200824int drbd_connected(struct drbd_peer_device *peer_device)
Philipp Reisner907599e2011-02-08 11:25:37 +0100825{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200826 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100827 int err;
Philipp Reisner907599e2011-02-08 11:25:37 +0100828
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200829 atomic_set(&device->packet_seq, 0);
830 device->peer_seq = 0;
Philipp Reisner907599e2011-02-08 11:25:37 +0100831
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200832 device->state_mutex = peer_device->connection->agreed_pro_version < 100 ?
833 &peer_device->connection->cstate_mutex :
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200834 &device->own_state_mutex;
Philipp Reisner8410da82011-02-11 20:11:10 +0100835
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200836 err = drbd_send_sync_param(peer_device);
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100837 if (!err)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200838 err = drbd_send_sizes(peer_device, 0, 0);
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100839 if (!err)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200840 err = drbd_send_uuids(peer_device);
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100841 if (!err)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200842 err = drbd_send_current_state(peer_device);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200843 clear_bit(USE_DEGR_WFC_T, &device->flags);
844 clear_bit(RESIZE_PENDING, &device->flags);
845 atomic_set(&device->ap_in_flight, 0);
846 mod_timer(&device->request_timer, jiffies + HZ); /* just start it here. */
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100847 return err;
Philipp Reisner907599e2011-02-08 11:25:37 +0100848}
Philipp Reisnerb411b362009-09-25 16:07:19 -0700849
850/*
851 * return values:
852 * 1 yes, we have a valid connection
853 * 0 oops, did not work out, please try again
854 * -1 peer talks different language,
855 * no point in trying again, please go standalone.
856 * -2 We do not have a network config...
857 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200858static int conn_connect(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700859{
Philipp Reisner7da35862011-12-19 22:42:56 +0100860 struct drbd_socket sock, msock;
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +0200861 struct drbd_peer_device *peer_device;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200862 struct net_conf *nc;
Philipp Reisner92f14952012-08-01 11:41:01 +0200863 int vnr, timeout, h, ok;
Philipp Reisner08b165b2011-09-05 16:22:33 +0200864 bool discard_my_data;
Philipp Reisner197296f2012-03-26 16:47:11 +0200865 enum drbd_state_rv rv;
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200866 struct accept_wait_data ad = {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200867 .connection = connection,
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200868 .door_bell = COMPLETION_INITIALIZER_ONSTACK(ad.door_bell),
869 };
Philipp Reisnerb411b362009-09-25 16:07:19 -0700870
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200871 clear_bit(DISCONNECT_SENT, &connection->flags);
872 if (conn_request_state(connection, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700873 return -2;
874
Philipp Reisner7da35862011-12-19 22:42:56 +0100875 mutex_init(&sock.mutex);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200876 sock.sbuf = connection->data.sbuf;
877 sock.rbuf = connection->data.rbuf;
Philipp Reisner7da35862011-12-19 22:42:56 +0100878 sock.socket = NULL;
879 mutex_init(&msock.mutex);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200880 msock.sbuf = connection->meta.sbuf;
881 msock.rbuf = connection->meta.rbuf;
Philipp Reisner7da35862011-12-19 22:42:56 +0100882 msock.socket = NULL;
883
Andreas Gruenbacher0916e0e2011-03-21 14:10:15 +0100884 /* Assume that the peer only understands protocol 80 until we know better. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200885 connection->agreed_pro_version = 80;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700886
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200887 if (prepare_listen_socket(connection, &ad))
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200888 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700889
890 do {
Andreas Gruenbacher2bf89622011-03-28 16:33:12 +0200891 struct socket *s;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700892
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200893 s = drbd_try_connect(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700894 if (s) {
Philipp Reisner7da35862011-12-19 22:42:56 +0100895 if (!sock.socket) {
896 sock.socket = s;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200897 send_first_packet(connection, &sock, P_INITIAL_DATA);
Philipp Reisner7da35862011-12-19 22:42:56 +0100898 } else if (!msock.socket) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200899 clear_bit(RESOLVE_CONFLICTS, &connection->flags);
Philipp Reisner7da35862011-12-19 22:42:56 +0100900 msock.socket = s;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200901 send_first_packet(connection, &msock, P_INITIAL_META);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700902 } else {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200903 drbd_err(connection, "Logic error in conn_connect()\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700904 goto out_release_sockets;
905 }
906 }
907
Philipp Reisner7da35862011-12-19 22:42:56 +0100908 if (sock.socket && msock.socket) {
909 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200910 nc = rcu_dereference(connection->net_conf);
Philipp Reisner7da35862011-12-19 22:42:56 +0100911 timeout = nc->ping_timeo * HZ / 10;
912 rcu_read_unlock();
913 schedule_timeout_interruptible(timeout);
914 ok = drbd_socket_okay(&sock.socket);
915 ok = drbd_socket_okay(&msock.socket) && ok;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700916 if (ok)
917 break;
918 }
919
920retry:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200921 s = drbd_wait_for_connect(connection, &ad);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700922 if (s) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200923 int fp = receive_first_packet(connection, s);
Philipp Reisner7da35862011-12-19 22:42:56 +0100924 drbd_socket_okay(&sock.socket);
925 drbd_socket_okay(&msock.socket);
Philipp Reisner92f14952012-08-01 11:41:01 +0200926 switch (fp) {
Andreas Gruenbachere5d6f332011-03-28 16:44:40 +0200927 case P_INITIAL_DATA:
Philipp Reisner7da35862011-12-19 22:42:56 +0100928 if (sock.socket) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200929 drbd_warn(connection, "initial packet S crossed\n");
Philipp Reisner7da35862011-12-19 22:42:56 +0100930 sock_release(sock.socket);
Philipp Reisner80c6eed2012-08-01 14:53:39 +0200931 sock.socket = s;
932 goto randomize;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700933 }
Philipp Reisner7da35862011-12-19 22:42:56 +0100934 sock.socket = s;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700935 break;
Andreas Gruenbachere5d6f332011-03-28 16:44:40 +0200936 case P_INITIAL_META:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200937 set_bit(RESOLVE_CONFLICTS, &connection->flags);
Philipp Reisner7da35862011-12-19 22:42:56 +0100938 if (msock.socket) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200939 drbd_warn(connection, "initial packet M crossed\n");
Philipp Reisner7da35862011-12-19 22:42:56 +0100940 sock_release(msock.socket);
Philipp Reisner80c6eed2012-08-01 14:53:39 +0200941 msock.socket = s;
942 goto randomize;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700943 }
Philipp Reisner7da35862011-12-19 22:42:56 +0100944 msock.socket = s;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700945 break;
946 default:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200947 drbd_warn(connection, "Error receiving initial packet\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700948 sock_release(s);
Philipp Reisner80c6eed2012-08-01 14:53:39 +0200949randomize:
Akinobu Mita38b682b22013-04-29 16:21:31 -0700950 if (prandom_u32() & 1)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700951 goto retry;
952 }
953 }
954
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200955 if (connection->cstate <= C_DISCONNECTING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700956 goto out_release_sockets;
957 if (signal_pending(current)) {
958 flush_signals(current);
959 smp_rmb();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200960 if (get_t_state(&connection->receiver) == EXITING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700961 goto out_release_sockets;
962 }
963
Philipp Reisnerb666dbf2012-07-26 14:12:59 +0200964 ok = drbd_socket_okay(&sock.socket);
965 ok = drbd_socket_okay(&msock.socket) && ok;
966 } while (!ok);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700967
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200968 if (ad.s_listen)
969 sock_release(ad.s_listen);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700970
Philipp Reisner98683652012-11-09 14:18:43 +0100971 sock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
972 msock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700973
Philipp Reisner7da35862011-12-19 22:42:56 +0100974 sock.socket->sk->sk_allocation = GFP_NOIO;
975 msock.socket->sk->sk_allocation = GFP_NOIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700976
Philipp Reisner7da35862011-12-19 22:42:56 +0100977 sock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
978 msock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700979
Philipp Reisnerb411b362009-09-25 16:07:19 -0700980 /* NOT YET ...
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200981 * sock.socket->sk->sk_sndtimeo = connection->net_conf->timeout*HZ/10;
Philipp Reisner7da35862011-12-19 22:42:56 +0100982 * sock.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
Andreas Gruenbacher60381782011-03-28 17:05:50 +0200983 * first set it to the P_CONNECTION_FEATURES timeout,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700984 * which we set to 4x the configured ping_timeout. */
Philipp Reisner44ed1672011-04-19 17:10:19 +0200985 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200986 nc = rcu_dereference(connection->net_conf);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700987
Philipp Reisner7da35862011-12-19 22:42:56 +0100988 sock.socket->sk->sk_sndtimeo =
989 sock.socket->sk->sk_rcvtimeo = nc->ping_timeo*4*HZ/10;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200990
Philipp Reisner7da35862011-12-19 22:42:56 +0100991 msock.socket->sk->sk_rcvtimeo = nc->ping_int*HZ;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200992 timeout = nc->timeout * HZ / 10;
Philipp Reisner08b165b2011-09-05 16:22:33 +0200993 discard_my_data = nc->discard_my_data;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200994 rcu_read_unlock();
995
Philipp Reisner7da35862011-12-19 22:42:56 +0100996 msock.socket->sk->sk_sndtimeo = timeout;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700997
998 /* we don't want delays.
Lucas De Marchi25985ed2011-03-30 22:57:33 -0300999 * we use TCP_CORK where appropriate, though */
Philipp Reisner7da35862011-12-19 22:42:56 +01001000 drbd_tcp_nodelay(sock.socket);
1001 drbd_tcp_nodelay(msock.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001002
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001003 connection->data.socket = sock.socket;
1004 connection->meta.socket = msock.socket;
1005 connection->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001006
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001007 h = drbd_do_features(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001008 if (h <= 0)
1009 return h;
1010
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001011 if (connection->cram_hmac_tfm) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001012 /* drbd_request_state(device, NS(conn, WFAuth)); */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001013 switch (drbd_do_auth(connection)) {
Johannes Thomab10d96c2010-01-07 16:02:50 +01001014 case -1:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001015 drbd_err(connection, "Authentication of peer failed\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001016 return -1;
Johannes Thomab10d96c2010-01-07 16:02:50 +01001017 case 0:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001018 drbd_err(connection, "Authentication of peer failed, trying again.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01001019 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001020 }
1021 }
1022
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001023 connection->data.socket->sk->sk_sndtimeo = timeout;
1024 connection->data.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001025
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001026 if (drbd_send_protocol(connection) == -EOPNOTSUPP)
Philipp Reisner7e2455c2010-04-22 14:50:23 +02001027 return -1;
Philipp Reisner1e86ac42011-08-04 10:33:08 +02001028
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001029 set_bit(STATE_SENT, &connection->flags);
Philipp Reisner197296f2012-03-26 16:47:11 +02001030
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02001031 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001032 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1033 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001034 kref_get(&device->kref);
Andreas Gruenbacher26ea8f92013-06-25 16:50:03 +02001035 rcu_read_unlock();
1036
Philipp Reisner13c76ab2012-11-22 17:06:00 +01001037 /* Prevent a race between resync-handshake and
1038 * being promoted to Primary.
1039 *
1040 * Grab and release the state mutex, so we know that any current
1041 * drbd_set_role() is finished, and any incoming drbd_set_role
1042 * will see the STATE_SENT flag, and wait for it to be cleared.
1043 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001044 mutex_lock(device->state_mutex);
1045 mutex_unlock(device->state_mutex);
Philipp Reisner13c76ab2012-11-22 17:06:00 +01001046
Philipp Reisner08b165b2011-09-05 16:22:33 +02001047 if (discard_my_data)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001048 set_bit(DISCARD_MY_DATA, &device->flags);
Philipp Reisner08b165b2011-09-05 16:22:33 +02001049 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001050 clear_bit(DISCARD_MY_DATA, &device->flags);
Philipp Reisner08b165b2011-09-05 16:22:33 +02001051
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001052 drbd_connected(peer_device);
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001053 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02001054 rcu_read_lock();
1055 }
1056 rcu_read_unlock();
1057
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001058 rv = conn_request_state(connection, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE);
1059 if (rv < SS_SUCCESS || connection->cstate != C_WF_REPORT_PARAMS) {
1060 clear_bit(STATE_SENT, &connection->flags);
Philipp Reisner1e86ac42011-08-04 10:33:08 +02001061 return 0;
Philipp Reisnera1096a62012-04-06 12:07:34 +02001062 }
Philipp Reisner1e86ac42011-08-04 10:33:08 +02001063
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001064 drbd_thread_start(&connection->asender);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001065
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001066 mutex_lock(&connection->resource->conf_update);
Philipp Reisner08b165b2011-09-05 16:22:33 +02001067 /* The discard_my_data flag is a single-shot modifier to the next
1068 * connection attempt, the handshake of which is now well underway.
1069 * No need for rcu style copying of the whole struct
1070 * just to clear a single value. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001071 connection->net_conf->discard_my_data = 0;
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001072 mutex_unlock(&connection->resource->conf_update);
Philipp Reisner08b165b2011-09-05 16:22:33 +02001073
Philipp Reisnerd3fcb492011-04-13 14:46:05 -07001074 return h;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001075
1076out_release_sockets:
Philipp Reisner7a426fd2012-07-12 14:22:37 +02001077 if (ad.s_listen)
1078 sock_release(ad.s_listen);
Philipp Reisner7da35862011-12-19 22:42:56 +01001079 if (sock.socket)
1080 sock_release(sock.socket);
1081 if (msock.socket)
1082 sock_release(msock.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001083 return -1;
1084}
1085
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001086static int decode_header(struct drbd_connection *connection, void *header, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001087{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001088 unsigned int header_size = drbd_header_size(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001089
Andreas Gruenbacher0c8e36d2011-03-30 16:00:17 +02001090 if (header_size == sizeof(struct p_header100) &&
1091 *(__be32 *)header == cpu_to_be32(DRBD_MAGIC_100)) {
1092 struct p_header100 *h = header;
1093 if (h->pad != 0) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001094 drbd_err(connection, "Header padding is not zero\n");
Andreas Gruenbacher0c8e36d2011-03-30 16:00:17 +02001095 return -EINVAL;
1096 }
1097 pi->vnr = be16_to_cpu(h->volume);
1098 pi->cmd = be16_to_cpu(h->command);
1099 pi->size = be32_to_cpu(h->length);
1100 } else if (header_size == sizeof(struct p_header95) &&
1101 *(__be16 *)header == cpu_to_be16(DRBD_MAGIC_BIG)) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001102 struct p_header95 *h = header;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001103 pi->cmd = be16_to_cpu(h->command);
Andreas Gruenbacherb55d84b2011-03-22 13:17:47 +01001104 pi->size = be32_to_cpu(h->length);
1105 pi->vnr = 0;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001106 } else if (header_size == sizeof(struct p_header80) &&
1107 *(__be32 *)header == cpu_to_be32(DRBD_MAGIC)) {
1108 struct p_header80 *h = header;
1109 pi->cmd = be16_to_cpu(h->command);
1110 pi->size = be16_to_cpu(h->length);
Philipp Reisner77351055b2011-02-07 17:24:26 +01001111 pi->vnr = 0;
Philipp Reisner02918be2010-08-20 14:35:10 +02001112 } else {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001113 drbd_err(connection, "Wrong magic value 0x%08x in protocol version %d\n",
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001114 be32_to_cpu(*(__be32 *)header),
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001115 connection->agreed_pro_version);
Andreas Gruenbacher8172f3e2011-03-16 17:22:39 +01001116 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001117 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001118 pi->data = header + header_size;
Andreas Gruenbacher8172f3e2011-03-16 17:22:39 +01001119 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001120}
1121
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001122static int drbd_recv_header(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisner257d0af2011-01-26 12:15:29 +01001123{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001124 void *buffer = connection->data.rbuf;
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001125 int err;
Philipp Reisner257d0af2011-01-26 12:15:29 +01001126
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001127 err = drbd_recv_all_warn(connection, buffer, drbd_header_size(connection));
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001128 if (err)
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001129 return err;
Philipp Reisner257d0af2011-01-26 12:15:29 +01001130
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001131 err = decode_header(connection, buffer, pi);
1132 connection->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001133
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001134 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001135}
1136
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001137static void drbd_flush(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001138{
1139 int rv;
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001140 struct drbd_peer_device *peer_device;
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001141 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001142
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001143 if (connection->write_ordering >= WO_bdev_flush) {
Lars Ellenberg615e0872011-11-17 14:32:12 +01001144 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001145 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1146 struct drbd_device *device = peer_device->device;
1147
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001148 if (!get_ldev(device))
Lars Ellenberg615e0872011-11-17 14:32:12 +01001149 continue;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001150 kref_get(&device->kref);
Lars Ellenberg615e0872011-11-17 14:32:12 +01001151 rcu_read_unlock();
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001152
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001153 rv = blkdev_issue_flush(device->ldev->backing_bdev,
Lars Ellenberg615e0872011-11-17 14:32:12 +01001154 GFP_NOIO, NULL);
1155 if (rv) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001156 drbd_info(device, "local disk flush failed with status %d\n", rv);
Lars Ellenberg615e0872011-11-17 14:32:12 +01001157 /* would rather check on EOPNOTSUPP, but that is not reliable.
1158 * don't try again for ANY return value != 0
1159 * if (rv == -EOPNOTSUPP) */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001160 drbd_bump_write_ordering(connection, WO_drain_io);
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001161 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001162 put_ldev(device);
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001163 kref_put(&device->kref, drbd_destroy_device);
Lars Ellenberg615e0872011-11-17 14:32:12 +01001164
1165 rcu_read_lock();
1166 if (rv)
1167 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001168 }
Lars Ellenberg615e0872011-11-17 14:32:12 +01001169 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07001170 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001171}
1172
1173/**
1174 * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it.
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001175 * @device: DRBD device.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001176 * @epoch: Epoch object.
1177 * @ev: Epoch event.
1178 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001179static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *connection,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001180 struct drbd_epoch *epoch,
1181 enum epoch_event ev)
1182{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001183 int epoch_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001184 struct drbd_epoch *next_epoch;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001185 enum finish_epoch rv = FE_STILL_LIVE;
1186
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001187 spin_lock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001188 do {
1189 next_epoch = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001190
1191 epoch_size = atomic_read(&epoch->epoch_size);
1192
1193 switch (ev & ~EV_CLEANUP) {
1194 case EV_PUT:
1195 atomic_dec(&epoch->active);
1196 break;
1197 case EV_GOT_BARRIER_NR:
1198 set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001199 break;
1200 case EV_BECAME_LAST:
1201 /* nothing to do*/
1202 break;
1203 }
1204
Philipp Reisnerb411b362009-09-25 16:07:19 -07001205 if (epoch_size != 0 &&
1206 atomic_read(&epoch->active) == 0 &&
Philipp Reisner80f9fd52011-07-18 15:45:15 +02001207 (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) || ev & EV_CLEANUP)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001208 if (!(ev & EV_CLEANUP)) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001209 spin_unlock(&connection->epoch_lock);
1210 drbd_send_b_ack(epoch->connection, epoch->barrier_nr, epoch_size);
1211 spin_lock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001212 }
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001213#if 0
1214 /* FIXME: dec unacked on connection, once we have
1215 * something to count pending connection packets in. */
Philipp Reisner80f9fd52011-07-18 15:45:15 +02001216 if (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags))
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001217 dec_unacked(epoch->connection);
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001218#endif
Philipp Reisnerb411b362009-09-25 16:07:19 -07001219
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001220 if (connection->current_epoch != epoch) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001221 next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
1222 list_del(&epoch->list);
1223 ev = EV_BECAME_LAST | (ev & EV_CLEANUP);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001224 connection->epochs--;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001225 kfree(epoch);
1226
1227 if (rv == FE_STILL_LIVE)
1228 rv = FE_DESTROYED;
1229 } else {
1230 epoch->flags = 0;
1231 atomic_set(&epoch->epoch_size, 0);
Uwe Kleine-König698f9312010-07-02 20:41:51 +02001232 /* atomic_set(&epoch->active, 0); is already zero */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001233 if (rv == FE_STILL_LIVE)
1234 rv = FE_RECYCLED;
1235 }
1236 }
1237
1238 if (!next_epoch)
1239 break;
1240
1241 epoch = next_epoch;
1242 } while (1);
1243
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001244 spin_unlock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001245
Philipp Reisnerb411b362009-09-25 16:07:19 -07001246 return rv;
1247}
1248
1249/**
1250 * drbd_bump_write_ordering() - Fall back to an other write ordering method
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001251 * @connection: DRBD connection.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001252 * @wo: Write ordering method to try.
1253 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001254void drbd_bump_write_ordering(struct drbd_connection *connection, enum write_ordering_e wo)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001255{
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001256 struct disk_conf *dc;
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001257 struct drbd_peer_device *peer_device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001258 enum write_ordering_e pwo;
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001259 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001260 static char *write_ordering_str[] = {
1261 [WO_none] = "none",
1262 [WO_drain_io] = "drain",
1263 [WO_bdev_flush] = "flush",
Philipp Reisnerb411b362009-09-25 16:07:19 -07001264 };
1265
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001266 pwo = connection->write_ordering;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001267 wo = min(pwo, wo);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001268 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001269 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1270 struct drbd_device *device = peer_device->device;
1271
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001272 if (!get_ldev_if_state(device, D_ATTACHING))
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001273 continue;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001274 dc = rcu_dereference(device->ldev->disk_conf);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001275
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001276 if (wo == WO_bdev_flush && !dc->disk_flushes)
1277 wo = WO_drain_io;
1278 if (wo == WO_drain_io && !dc->disk_drain)
1279 wo = WO_none;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001280 put_ldev(device);
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001281 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001282 rcu_read_unlock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001283 connection->write_ordering = wo;
1284 if (pwo != connection->write_ordering || wo == WO_bdev_flush)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001285 drbd_info(connection, "Method to ensure write ordering: %s\n", write_ordering_str[connection->write_ordering]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001286}
1287
1288/**
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001289 * drbd_submit_peer_request()
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001290 * @device: DRBD device.
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001291 * @peer_req: peer request
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001292 * @rw: flag field, see bio->bi_rw
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001293 *
1294 * May spread the pages to multiple bios,
1295 * depending on bio_add_page restrictions.
1296 *
1297 * Returns 0 if all bios have been submitted,
1298 * -ENOMEM if we could not allocate enough bios,
1299 * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a
1300 * single page to an empty bio (which should never happen and likely indicates
1301 * that the lower level IO stack is in some way broken). This has been observed
1302 * on certain Xen deployments.
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001303 */
1304/* TODO allocate from our own bio_set. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001305int drbd_submit_peer_request(struct drbd_device *device,
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001306 struct drbd_peer_request *peer_req,
1307 const unsigned rw, const int fault_type)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001308{
1309 struct bio *bios = NULL;
1310 struct bio *bio;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001311 struct page *page = peer_req->pages;
1312 sector_t sector = peer_req->i.sector;
1313 unsigned ds = peer_req->i.size;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001314 unsigned n_bios = 0;
1315 unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT;
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001316 int err = -ENOMEM;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001317
1318 /* In most cases, we will only need one bio. But in case the lower
1319 * level restrictions happen to be different at this offset on this
1320 * side than those of the sending peer, we may need to submit the
Lars Ellenberg9476f392011-02-23 17:02:01 +01001321 * request in more than one bio.
1322 *
1323 * Plain bio_alloc is good enough here, this is no DRBD internally
1324 * generated bio, but a bio allocated on behalf of the peer.
1325 */
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001326next_bio:
1327 bio = bio_alloc(GFP_NOIO, nr_pages);
1328 if (!bio) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001329 drbd_err(device, "submit_ee: Allocation of a bio failed\n");
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001330 goto fail;
1331 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001332 /* > peer_req->i.sector, unless this is the first bio */
Kent Overstreet4f024f32013-10-11 15:44:27 -07001333 bio->bi_iter.bi_sector = sector;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001334 bio->bi_bdev = device->ldev->backing_bdev;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001335 bio->bi_rw = rw;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001336 bio->bi_private = peer_req;
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001337 bio->bi_end_io = drbd_peer_request_endio;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001338
1339 bio->bi_next = bios;
1340 bios = bio;
1341 ++n_bios;
1342
1343 page_chain_for_each(page) {
1344 unsigned len = min_t(unsigned, ds, PAGE_SIZE);
1345 if (!bio_add_page(bio, page, len, 0)) {
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001346 /* A single page must always be possible!
1347 * But in case it fails anyways,
1348 * we deal with it, and complain (below). */
1349 if (bio->bi_vcnt == 0) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001350 drbd_err(device,
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001351 "bio_add_page failed for len=%u, "
1352 "bi_vcnt=0 (bi_sector=%llu)\n",
Kent Overstreet4f024f32013-10-11 15:44:27 -07001353 len, (uint64_t)bio->bi_iter.bi_sector);
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001354 err = -ENOSPC;
1355 goto fail;
1356 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001357 goto next_bio;
1358 }
1359 ds -= len;
1360 sector += len >> 9;
1361 --nr_pages;
1362 }
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001363 D_ASSERT(device, page == NULL);
1364 D_ASSERT(device, ds == 0);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001365
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001366 atomic_set(&peer_req->pending_bios, n_bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001367 do {
1368 bio = bios;
1369 bios = bios->bi_next;
1370 bio->bi_next = NULL;
1371
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001372 drbd_generic_make_request(device, fault_type, bio);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001373 } while (bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001374 return 0;
1375
1376fail:
1377 while (bios) {
1378 bio = bios;
1379 bios = bios->bi_next;
1380 bio_put(bio);
1381 }
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001382 return err;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001383}
1384
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001385static void drbd_remove_epoch_entry_interval(struct drbd_device *device,
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001386 struct drbd_peer_request *peer_req)
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001387{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001388 struct drbd_interval *i = &peer_req->i;
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001389
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001390 drbd_remove_interval(&device->write_requests, i);
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001391 drbd_clear_interval(i);
1392
Andreas Gruenbacher6c852be2011-02-04 15:38:52 +01001393 /* Wake up any processes waiting for this peer request to complete. */
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001394 if (i->waiting)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001395 wake_up(&device->misc_wait);
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001396}
1397
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001398static void conn_wait_active_ee_empty(struct drbd_connection *connection)
Philipp Reisner77fede52011-11-10 21:19:11 +01001399{
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001400 struct drbd_peer_device *peer_device;
Philipp Reisner77fede52011-11-10 21:19:11 +01001401 int vnr;
1402
1403 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001404 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1405 struct drbd_device *device = peer_device->device;
1406
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001407 kref_get(&device->kref);
Philipp Reisner77fede52011-11-10 21:19:11 +01001408 rcu_read_unlock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001409 drbd_wait_ee_list_empty(device, &device->active_ee);
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001410 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisner77fede52011-11-10 21:19:11 +01001411 rcu_read_lock();
1412 }
1413 rcu_read_unlock();
1414}
1415
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001416static struct drbd_peer_device *
1417conn_peer_device(struct drbd_connection *connection, int volume_number)
1418{
1419 return idr_find(&connection->peer_devices, volume_number);
1420}
1421
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001422static int receive_Barrier(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001423{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001424 int rv;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001425 struct p_barrier *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001426 struct drbd_epoch *epoch;
1427
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001428 /* FIXME these are unacked on connection,
1429 * not a specific (peer)device.
1430 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001431 connection->current_epoch->barrier_nr = p->barrier;
1432 connection->current_epoch->connection = connection;
1433 rv = drbd_may_finish_epoch(connection, connection->current_epoch, EV_GOT_BARRIER_NR);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001434
1435 /* P_BARRIER_ACK may imply that the corresponding extent is dropped from
1436 * the activity log, which means it would not be resynced in case the
1437 * R_PRIMARY crashes now.
1438 * Therefore we must send the barrier_ack after the barrier request was
1439 * completed. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001440 switch (connection->write_ordering) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001441 case WO_none:
1442 if (rv == FE_RECYCLED)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001443 return 0;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001444
1445 /* receiver context, in the writeout path of the other node.
1446 * avoid potential distributed deadlock */
1447 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1448 if (epoch)
1449 break;
1450 else
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001451 drbd_warn(connection, "Allocation of an epoch failed, slowing down\n");
Philipp Reisner2451fc32010-08-24 13:43:11 +02001452 /* Fall through */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001453
1454 case WO_bdev_flush:
1455 case WO_drain_io:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001456 conn_wait_active_ee_empty(connection);
1457 drbd_flush(connection);
Philipp Reisner2451fc32010-08-24 13:43:11 +02001458
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001459 if (atomic_read(&connection->current_epoch->epoch_size)) {
Philipp Reisner2451fc32010-08-24 13:43:11 +02001460 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1461 if (epoch)
1462 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001463 }
1464
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001465 return 0;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001466 default:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001467 drbd_err(connection, "Strangeness in connection->write_ordering %d\n", connection->write_ordering);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001468 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001469 }
1470
1471 epoch->flags = 0;
1472 atomic_set(&epoch->epoch_size, 0);
1473 atomic_set(&epoch->active, 0);
1474
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001475 spin_lock(&connection->epoch_lock);
1476 if (atomic_read(&connection->current_epoch->epoch_size)) {
1477 list_add(&epoch->list, &connection->current_epoch->list);
1478 connection->current_epoch = epoch;
1479 connection->epochs++;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001480 } else {
1481 /* The current_epoch got recycled while we allocated this one... */
1482 kfree(epoch);
1483 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001484 spin_unlock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001485
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001486 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001487}
1488
1489/* used from receive_RSDataReply (recv_resync_read)
1490 * and from receive_Data */
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +01001491static struct drbd_peer_request *
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001492read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +01001493 int data_size) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001494{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001495 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001496 const sector_t capacity = drbd_get_capacity(device->this_bdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001497 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001498 struct page *page;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001499 int dgs, ds, err;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001500 void *dig_in = peer_device->connection->int_dig_in;
1501 void *dig_vv = peer_device->connection->int_dig_vv;
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001502 unsigned long *data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001503
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001504 dgs = 0;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001505 if (peer_device->connection->peer_integrity_tfm) {
1506 dgs = crypto_hash_digestsize(peer_device->connection->peer_integrity_tfm);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001507 /*
1508 * FIXME: Receive the incoming digest into the receive buffer
1509 * here, together with its struct p_data?
1510 */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001511 err = drbd_recv_all_warn(peer_device->connection, dig_in, dgs);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001512 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001513 return NULL;
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001514 data_size -= dgs;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001515 }
1516
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01001517 if (!expect(IS_ALIGNED(data_size, 512)))
1518 return NULL;
1519 if (!expect(data_size <= DRBD_MAX_BIO_SIZE))
1520 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001521
Lars Ellenberg66660322010-04-06 12:15:04 +02001522 /* even though we trust out peer,
1523 * we sometimes have to double check. */
1524 if (sector + (data_size>>9) > capacity) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001525 drbd_err(device, "request from peer beyond end of local disk: "
Lars Ellenbergfdda6542011-01-24 15:11:01 +01001526 "capacity: %llus < sector: %llus + size: %u\n",
Lars Ellenberg66660322010-04-06 12:15:04 +02001527 (unsigned long long)capacity,
1528 (unsigned long long)sector, data_size);
1529 return NULL;
1530 }
1531
Philipp Reisnerb411b362009-09-25 16:07:19 -07001532 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
1533 * "criss-cross" setup, that might cause write-out on some other DRBD,
1534 * which in turn might block on the other node at this very place. */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001535 peer_req = drbd_alloc_peer_req(peer_device, id, sector, data_size, GFP_NOIO);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001536 if (!peer_req)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001537 return NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001538
Lars Ellenberga73ff322012-06-25 19:15:38 +02001539 if (!data_size)
Lars Ellenberg81a35372012-07-30 09:00:54 +02001540 return peer_req;
Lars Ellenberga73ff322012-06-25 19:15:38 +02001541
Philipp Reisnerb411b362009-09-25 16:07:19 -07001542 ds = data_size;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001543 page = peer_req->pages;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001544 page_chain_for_each(page) {
1545 unsigned len = min_t(int, ds, PAGE_SIZE);
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001546 data = kmap(page);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001547 err = drbd_recv_all_warn(peer_device->connection, data, len);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001548 if (drbd_insert_fault(device, DRBD_FAULT_RECEIVE)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001549 drbd_err(device, "Fault injection: Corrupting data on receive\n");
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001550 data[0] = data[0] ^ (unsigned long)-1;
1551 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001552 kunmap(page);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001553 if (err) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001554 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001555 return NULL;
1556 }
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001557 ds -= len;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001558 }
1559
1560 if (dgs) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001561 drbd_csum_ee(peer_device->connection->peer_integrity_tfm, peer_req, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001562 if (memcmp(dig_in, dig_vv, dgs)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001563 drbd_err(device, "Digest integrity check FAILED: %llus +%u\n",
Lars Ellenberg470be442010-11-10 10:36:52 +01001564 (unsigned long long)sector, data_size);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001565 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001566 return NULL;
1567 }
1568 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001569 device->recv_cnt += data_size>>9;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001570 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001571}
1572
1573/* drbd_drain_block() just takes a data block
1574 * out of the socket input buffer, and discards it.
1575 */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001576static int drbd_drain_block(struct drbd_peer_device *peer_device, int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001577{
1578 struct page *page;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001579 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001580 void *data;
1581
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001582 if (!data_size)
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001583 return 0;
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001584
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001585 page = drbd_alloc_pages(peer_device, 1, 1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001586
1587 data = kmap(page);
1588 while (data_size) {
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001589 unsigned int len = min_t(int, data_size, PAGE_SIZE);
1590
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001591 err = drbd_recv_all_warn(peer_device->connection, data, len);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001592 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001593 break;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001594 data_size -= len;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001595 }
1596 kunmap(page);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001597 drbd_free_pages(peer_device->device, page, 0);
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001598 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001599}
1600
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001601static int recv_dless_read(struct drbd_peer_device *peer_device, struct drbd_request *req,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001602 sector_t sector, int data_size)
1603{
Kent Overstreet79886132013-11-23 17:19:00 -08001604 struct bio_vec bvec;
1605 struct bvec_iter iter;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001606 struct bio *bio;
Kent Overstreet79886132013-11-23 17:19:00 -08001607 int dgs, err, expect;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001608 void *dig_in = peer_device->connection->int_dig_in;
1609 void *dig_vv = peer_device->connection->int_dig_vv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001610
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001611 dgs = 0;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001612 if (peer_device->connection->peer_integrity_tfm) {
1613 dgs = crypto_hash_digestsize(peer_device->connection->peer_integrity_tfm);
1614 err = drbd_recv_all_warn(peer_device->connection, dig_in, dgs);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001615 if (err)
1616 return err;
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001617 data_size -= dgs;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001618 }
1619
Philipp Reisnerb411b362009-09-25 16:07:19 -07001620 /* optimistically update recv_cnt. if receiving fails below,
1621 * we disconnect anyways, and counters will be reset. */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001622 peer_device->device->recv_cnt += data_size>>9;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001623
1624 bio = req->master_bio;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001625 D_ASSERT(peer_device->device, sector == bio->bi_iter.bi_sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001626
Kent Overstreet79886132013-11-23 17:19:00 -08001627 bio_for_each_segment(bvec, bio, iter) {
1628 void *mapped = kmap(bvec.bv_page) + bvec.bv_offset;
1629 expect = min_t(int, data_size, bvec.bv_len);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001630 err = drbd_recv_all_warn(peer_device->connection, mapped, expect);
Kent Overstreet79886132013-11-23 17:19:00 -08001631 kunmap(bvec.bv_page);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001632 if (err)
1633 return err;
1634 data_size -= expect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001635 }
1636
1637 if (dgs) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001638 drbd_csum_bio(peer_device->connection->peer_integrity_tfm, bio, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001639 if (memcmp(dig_in, dig_vv, dgs)) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001640 drbd_err(peer_device, "Digest integrity check FAILED. Broken NICs?\n");
Andreas Gruenbacher28284ce2011-03-16 17:54:02 +01001641 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001642 }
1643 }
1644
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001645 D_ASSERT(peer_device->device, data_size == 0);
Andreas Gruenbacher28284ce2011-03-16 17:54:02 +01001646 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001647}
1648
Andreas Gruenbachera990be42011-04-06 17:56:48 +02001649/*
1650 * e_end_resync_block() is called in asender context via
1651 * drbd_finish_peer_reqs().
1652 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001653static int e_end_resync_block(struct drbd_work *w, int unused)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001654{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001655 struct drbd_peer_request *peer_req =
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001656 container_of(w, struct drbd_peer_request, w);
1657 struct drbd_peer_device *peer_device = peer_req->peer_device;
1658 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001659 sector_t sector = peer_req->i.sector;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001660 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001661
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001662 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001663
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001664 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001665 drbd_set_in_sync(device, sector, peer_req->i.size);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001666 err = drbd_send_ack(peer_device, P_RS_WRITE_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001667 } else {
1668 /* Record failure to sync */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001669 drbd_rs_failed_io(device, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001670
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001671 err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001672 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001673 dec_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001674
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001675 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001676}
1677
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001678static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t sector,
1679 int data_size) __releases(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001680{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001681 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001682 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001683
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001684 peer_req = read_in_block(peer_device, ID_SYNCER, sector, data_size);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001685 if (!peer_req)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001686 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001687
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001688 dec_rs_pending(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001689
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001690 inc_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001691 /* corresponding dec_unacked() in e_end_resync_block()
1692 * respective _drbd_clear_done_ee */
1693
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001694 peer_req->w.cb = e_end_resync_block;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001695
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001696 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001697 list_add(&peer_req->w.list, &device->sync_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001698 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001699
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001700 atomic_add(data_size >> 9, &device->rs_sect_ev);
1701 if (drbd_submit_peer_request(device, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0)
Andreas Gruenbachere1c1b0f2011-03-16 17:58:27 +01001702 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001703
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001704 /* don't care for the reason here */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001705 drbd_err(device, "submit failed, triggering re-connect\n");
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001706 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001707 list_del(&peer_req->w.list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001708 spin_unlock_irq(&device->resource->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02001709
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001710 drbd_free_peer_req(device, peer_req);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001711fail:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001712 put_ldev(device);
Andreas Gruenbachere1c1b0f2011-03-16 17:58:27 +01001713 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001714}
1715
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001716static struct drbd_request *
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001717find_request(struct drbd_device *device, struct rb_root *root, u64 id,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001718 sector_t sector, bool missing_ok, const char *func)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001719{
1720 struct drbd_request *req;
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001721
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001722 /* Request object according to our peer */
1723 req = (struct drbd_request *)(unsigned long)id;
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001724 if (drbd_contains_interval(root, sector, &req->i) && req->i.local)
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001725 return req;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001726 if (!missing_ok) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001727 drbd_err(device, "%s: failed to find request 0x%lx, sector %llus\n", func,
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001728 (unsigned long)id, (unsigned long long)sector);
1729 }
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001730 return NULL;
1731}
1732
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001733static int receive_DataReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001734{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001735 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001736 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001737 struct drbd_request *req;
1738 sector_t sector;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001739 int err;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001740 struct p_data *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001741
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001742 peer_device = conn_peer_device(connection, pi->vnr);
1743 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001744 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001745 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001746
1747 sector = be64_to_cpu(p->sector);
1748
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001749 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001750 req = find_request(device, &device->read_requests, p->block_id, sector, false, __func__);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001751 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001752 if (unlikely(!req))
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001753 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001754
Bart Van Assche24c48302011-05-21 18:32:29 +02001755 /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid
Philipp Reisnerb411b362009-09-25 16:07:19 -07001756 * special casing it there for the various failure cases.
1757 * still no race with drbd_fail_pending_reads */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001758 err = recv_dless_read(peer_device, req, sector, pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001759 if (!err)
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001760 req_mod(req, DATA_RECEIVED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001761 /* else: nothing. handled from drbd_disconnect...
1762 * I don't think we may complete this just yet
1763 * in case we are "on-disconnect: freeze" */
1764
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001765 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001766}
1767
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001768static int receive_RSDataReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001769{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001770 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001771 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001772 sector_t sector;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001773 int err;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001774 struct p_data *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001775
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001776 peer_device = conn_peer_device(connection, pi->vnr);
1777 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001778 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001779 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001780
1781 sector = be64_to_cpu(p->sector);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001782 D_ASSERT(device, p->block_id == ID_SYNCER);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001783
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001784 if (get_ldev(device)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001785 /* data is submitted to disk within recv_resync_read.
1786 * corresponding put_ldev done below on error,
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001787 * or in drbd_peer_request_endio. */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001788 err = recv_resync_read(peer_device, sector, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001789 } else {
1790 if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001791 drbd_err(device, "Can not write resync data to local disk.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001792
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001793 err = drbd_drain_block(peer_device, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001794
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001795 drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001796 }
1797
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001798 atomic_add(pi->size >> 9, &device->rs_sect_in);
Philipp Reisner778f2712010-07-06 11:14:00 +02001799
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001800 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001801}
1802
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001803static void restart_conflicting_writes(struct drbd_device *device,
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001804 sector_t sector, int size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001805{
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001806 struct drbd_interval *i;
1807 struct drbd_request *req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001808
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001809 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001810 if (!i->local)
1811 continue;
1812 req = container_of(i, struct drbd_request, i);
1813 if (req->rq_state & RQ_LOCAL_PENDING ||
1814 !(req->rq_state & RQ_POSTPONED))
1815 continue;
Lars Ellenberg2312f0b32011-11-24 10:36:25 +01001816 /* as it is RQ_POSTPONED, this will cause it to
1817 * be queued on the retry workqueue. */
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02001818 __req_mod(req, CONFLICT_RESOLVED, NULL);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001819 }
1820}
1821
Andreas Gruenbachera990be42011-04-06 17:56:48 +02001822/*
1823 * e_end_block() is called in asender context via drbd_finish_peer_reqs().
Philipp Reisnerb411b362009-09-25 16:07:19 -07001824 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001825static int e_end_block(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001826{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001827 struct drbd_peer_request *peer_req =
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001828 container_of(w, struct drbd_peer_request, w);
1829 struct drbd_peer_device *peer_device = peer_req->peer_device;
1830 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001831 sector_t sector = peer_req->i.sector;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001832 int err = 0, pcmd;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001833
Philipp Reisner303d1442011-04-13 16:24:47 -07001834 if (peer_req->flags & EE_SEND_WRITE_ACK) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001835 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001836 pcmd = (device->state.conn >= C_SYNC_SOURCE &&
1837 device->state.conn <= C_PAUSED_SYNC_T &&
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001838 peer_req->flags & EE_MAY_SET_IN_SYNC) ?
Philipp Reisnerb411b362009-09-25 16:07:19 -07001839 P_RS_WRITE_ACK : P_WRITE_ACK;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001840 err = drbd_send_ack(peer_device, pcmd, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001841 if (pcmd == P_RS_WRITE_ACK)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001842 drbd_set_in_sync(device, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001843 } else {
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001844 err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001845 /* we expect it to be marked out of sync anyways...
1846 * maybe assert this? */
1847 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001848 dec_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001849 }
1850 /* we delete from the conflict detection hash _after_ we sent out the
1851 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */
Philipp Reisner302bdea2011-04-21 11:36:49 +02001852 if (peer_req->flags & EE_IN_INTERVAL_TREE) {
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001853 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001854 D_ASSERT(device, !drbd_interval_empty(&peer_req->i));
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001855 drbd_remove_epoch_entry_interval(device, peer_req);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001856 if (peer_req->flags & EE_RESTART_REQUESTS)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001857 restart_conflicting_writes(device, sector, peer_req->i.size);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001858 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +01001859 } else
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001860 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001861
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001862 drbd_may_finish_epoch(first_peer_device(device)->connection, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001863
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001864 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001865}
1866
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001867static int e_send_ack(struct drbd_work *w, enum drbd_packet ack)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001868{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001869 struct drbd_peer_request *peer_req =
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001870 container_of(w, struct drbd_peer_request, w);
1871 struct drbd_peer_device *peer_device = peer_req->peer_device;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001872 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001873
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001874 err = drbd_send_ack(peer_device, ack, peer_req);
1875 dec_unacked(peer_device->device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001876
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001877 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001878}
1879
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02001880static int e_send_superseded(struct drbd_work *w, int unused)
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001881{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001882 return e_send_ack(w, P_SUPERSEDED);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001883}
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001884
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001885static int e_send_retry_write(struct drbd_work *w, int unused)
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001886{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001887 struct drbd_peer_request *peer_req =
1888 container_of(w, struct drbd_peer_request, w);
1889 struct drbd_connection *connection = peer_req->peer_device->connection;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001890
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001891 return e_send_ack(w, connection->agreed_pro_version >= 100 ?
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02001892 P_RETRY_WRITE : P_SUPERSEDED);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001893}
1894
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001895static bool seq_greater(u32 a, u32 b)
1896{
1897 /*
1898 * We assume 32-bit wrap-around here.
1899 * For 24-bit wrap-around, we would have to shift:
1900 * a <<= 8; b <<= 8;
1901 */
1902 return (s32)a - (s32)b > 0;
1903}
1904
1905static u32 seq_max(u32 a, u32 b)
1906{
1907 return seq_greater(a, b) ? a : b;
1908}
1909
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001910static void update_peer_seq(struct drbd_peer_device *peer_device, unsigned int peer_seq)
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001911{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001912 struct drbd_device *device = peer_device->device;
Lars Ellenberg3c13b682011-02-23 16:10:01 +01001913 unsigned int newest_peer_seq;
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001914
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001915 if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001916 spin_lock(&device->peer_seq_lock);
1917 newest_peer_seq = seq_max(device->peer_seq, peer_seq);
1918 device->peer_seq = newest_peer_seq;
1919 spin_unlock(&device->peer_seq_lock);
1920 /* wake up only if we actually changed device->peer_seq */
Lars Ellenberg3c13b682011-02-23 16:10:01 +01001921 if (peer_seq == newest_peer_seq)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001922 wake_up(&device->seq_wait);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001923 }
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001924}
1925
Lars Ellenbergd93f6302012-03-26 15:49:13 +02001926static inline int overlaps(sector_t s1, int l1, sector_t s2, int l2)
1927{
1928 return !((s1 + (l1>>9) <= s2) || (s1 >= s2 + (l2>>9)));
1929}
1930
1931/* maybe change sync_ee into interval trees as well? */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001932static bool overlapping_resync_write(struct drbd_device *device, struct drbd_peer_request *peer_req)
Lars Ellenbergd93f6302012-03-26 15:49:13 +02001933{
1934 struct drbd_peer_request *rs_req;
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001935 bool rv = 0;
1936
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001937 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001938 list_for_each_entry(rs_req, &device->sync_ee, w.list) {
Lars Ellenbergd93f6302012-03-26 15:49:13 +02001939 if (overlaps(peer_req->i.sector, peer_req->i.size,
1940 rs_req->i.sector, rs_req->i.size)) {
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001941 rv = 1;
1942 break;
1943 }
1944 }
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001945 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001946
1947 return rv;
1948}
1949
Philipp Reisnerb411b362009-09-25 16:07:19 -07001950/* Called from receive_Data.
1951 * Synchronize packets on sock with packets on msock.
1952 *
1953 * This is here so even when a P_DATA packet traveling via sock overtook an Ack
1954 * packet traveling on msock, they are still processed in the order they have
1955 * been sent.
1956 *
1957 * Note: we don't care for Ack packets overtaking P_DATA packets.
1958 *
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001959 * In case packet_seq is larger than device->peer_seq number, there are
Philipp Reisnerb411b362009-09-25 16:07:19 -07001960 * outstanding packets on the msock. We wait for them to arrive.
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001961 * In case we are the logically next packet, we update device->peer_seq
Philipp Reisnerb411b362009-09-25 16:07:19 -07001962 * ourselves. Correctly handles 32bit wrap around.
1963 *
1964 * Assume we have a 10 GBit connection, that is about 1<<30 byte per second,
1965 * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds
1966 * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have
1967 * 1<<9 == 512 seconds aka ages for the 32bit wrap around...
1968 *
1969 * returns 0 if we may process the packet,
1970 * -ERESTARTSYS if we were interrupted (by disconnect signal). */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001971static int wait_for_and_update_peer_seq(struct drbd_peer_device *peer_device, const u32 peer_seq)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001972{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001973 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001974 DEFINE_WAIT(wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001975 long timeout;
Philipp Reisnerb874d232013-10-23 10:59:16 +02001976 int ret = 0, tp;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001977
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001978 if (!test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags))
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001979 return 0;
1980
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001981 spin_lock(&device->peer_seq_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001982 for (;;) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001983 if (!seq_greater(peer_seq - 1, device->peer_seq)) {
1984 device->peer_seq = seq_max(device->peer_seq, peer_seq);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001985 break;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001986 }
Philipp Reisnerb874d232013-10-23 10:59:16 +02001987
Philipp Reisnerb411b362009-09-25 16:07:19 -07001988 if (signal_pending(current)) {
1989 ret = -ERESTARTSYS;
1990 break;
1991 }
Philipp Reisnerb874d232013-10-23 10:59:16 +02001992
1993 rcu_read_lock();
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001994 tp = rcu_dereference(first_peer_device(device)->connection->net_conf)->two_primaries;
Philipp Reisnerb874d232013-10-23 10:59:16 +02001995 rcu_read_unlock();
1996
1997 if (!tp)
1998 break;
1999
2000 /* Only need to wait if two_primaries is enabled */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002001 prepare_to_wait(&device->seq_wait, &wait, TASK_INTERRUPTIBLE);
2002 spin_unlock(&device->peer_seq_lock);
Philipp Reisner44ed1672011-04-19 17:10:19 +02002003 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002004 timeout = rcu_dereference(peer_device->connection->net_conf)->ping_timeo*HZ/10;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002005 rcu_read_unlock();
Andreas Gruenbacher71b1c1e2011-03-01 15:40:43 +01002006 timeout = schedule_timeout(timeout);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002007 spin_lock(&device->peer_seq_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002008 if (!timeout) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002009 ret = -ETIMEDOUT;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002010 drbd_err(device, "Timed out waiting for missing ack packets; disconnecting\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002011 break;
2012 }
2013 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002014 spin_unlock(&device->peer_seq_lock);
2015 finish_wait(&device->seq_wait, &wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002016 return ret;
2017}
2018
Lars Ellenberg688593c2010-11-17 22:25:03 +01002019/* see also bio_flags_to_wire()
2020 * DRBD_REQ_*, because we need to semantically map the flags to data packet
2021 * flags and back. We may replicate to other kernel versions. */
Andreas Gruenbacher81f0ffd2011-08-30 16:22:33 +02002022static unsigned long wire_flags_to_bio(u32 dpf)
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02002023{
Lars Ellenberg688593c2010-11-17 22:25:03 +01002024 return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
2025 (dpf & DP_FUA ? REQ_FUA : 0) |
2026 (dpf & DP_FLUSH ? REQ_FLUSH : 0) |
2027 (dpf & DP_DISCARD ? REQ_DISCARD : 0);
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02002028}
2029
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002030static void fail_postponed_requests(struct drbd_device *device, sector_t sector,
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002031 unsigned int size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002032{
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002033 struct drbd_interval *i;
2034
2035 repeat:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002036 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002037 struct drbd_request *req;
2038 struct bio_and_error m;
2039
2040 if (!i->local)
2041 continue;
2042 req = container_of(i, struct drbd_request, i);
2043 if (!(req->rq_state & RQ_POSTPONED))
2044 continue;
2045 req->rq_state &= ~RQ_POSTPONED;
2046 __req_mod(req, NEG_ACKED, &m);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002047 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002048 if (m.bio)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002049 complete_master_bio(device, &m);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002050 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002051 goto repeat;
2052 }
2053}
2054
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002055static int handle_write_conflicts(struct drbd_device *device,
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002056 struct drbd_peer_request *peer_req)
2057{
Andreas Gruenbachere33b32d2011-08-30 15:38:04 +02002058 struct drbd_connection *connection = peer_req->peer_device->connection;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002059 bool resolve_conflicts = test_bit(RESOLVE_CONFLICTS, &connection->flags);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002060 sector_t sector = peer_req->i.sector;
2061 const unsigned int size = peer_req->i.size;
2062 struct drbd_interval *i;
2063 bool equal;
2064 int err;
2065
2066 /*
2067 * Inserting the peer request into the write_requests tree will prevent
2068 * new conflicting local requests from being added.
2069 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002070 drbd_insert_interval(&device->write_requests, &peer_req->i);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002071
2072 repeat:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002073 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002074 if (i == &peer_req->i)
2075 continue;
2076
2077 if (!i->local) {
2078 /*
2079 * Our peer has sent a conflicting remote request; this
2080 * should not happen in a two-node setup. Wait for the
2081 * earlier peer request to complete.
2082 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002083 err = drbd_wait_misc(device, i);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002084 if (err)
2085 goto out;
2086 goto repeat;
2087 }
2088
2089 equal = i->sector == sector && i->size == size;
2090 if (resolve_conflicts) {
2091 /*
2092 * If the peer request is fully contained within the
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002093 * overlapping request, it can be considered overwritten
2094 * and thus superseded; otherwise, it will be retried
2095 * once all overlapping requests have completed.
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002096 */
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002097 bool superseded = i->sector <= sector && i->sector +
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002098 (i->size >> 9) >= sector + (size >> 9);
2099
2100 if (!equal)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002101 drbd_alert(device, "Concurrent writes detected: "
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002102 "local=%llus +%u, remote=%llus +%u, "
2103 "assuming %s came first\n",
2104 (unsigned long long)i->sector, i->size,
2105 (unsigned long long)sector, size,
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002106 superseded ? "local" : "remote");
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002107
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002108 inc_unacked(device);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002109 peer_req->w.cb = superseded ? e_send_superseded :
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002110 e_send_retry_write;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002111 list_add_tail(&peer_req->w.list, &device->done_ee);
Andreas Gruenbachere33b32d2011-08-30 15:38:04 +02002112 wake_asender(connection);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002113
2114 err = -ENOENT;
2115 goto out;
2116 } else {
2117 struct drbd_request *req =
2118 container_of(i, struct drbd_request, i);
2119
2120 if (!equal)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002121 drbd_alert(device, "Concurrent writes detected: "
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002122 "local=%llus +%u, remote=%llus +%u\n",
2123 (unsigned long long)i->sector, i->size,
2124 (unsigned long long)sector, size);
2125
2126 if (req->rq_state & RQ_LOCAL_PENDING ||
2127 !(req->rq_state & RQ_POSTPONED)) {
2128 /*
2129 * Wait for the node with the discard flag to
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002130 * decide if this request has been superseded
2131 * or needs to be retried.
2132 * Requests that have been superseded will
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002133 * disappear from the write_requests tree.
2134 *
2135 * In addition, wait for the conflicting
2136 * request to finish locally before submitting
2137 * the conflicting peer request.
2138 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002139 err = drbd_wait_misc(device, &req->i);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002140 if (err) {
Andreas Gruenbachere33b32d2011-08-30 15:38:04 +02002141 _conn_request_state(connection, NS(conn, C_TIMEOUT), CS_HARD);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002142 fail_postponed_requests(device, sector, size);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002143 goto out;
2144 }
2145 goto repeat;
2146 }
2147 /*
2148 * Remember to restart the conflicting requests after
2149 * the new peer request has completed.
2150 */
2151 peer_req->flags |= EE_RESTART_REQUESTS;
2152 }
2153 }
2154 err = 0;
2155
2156 out:
2157 if (err)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002158 drbd_remove_epoch_entry_interval(device, peer_req);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002159 return err;
2160}
2161
Philipp Reisnerb411b362009-09-25 16:07:19 -07002162/* mirrored write */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002163static int receive_Data(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002164{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002165 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002166 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002167 sector_t sector;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002168 struct drbd_peer_request *peer_req;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02002169 struct p_data *p = pi->data;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002170 u32 peer_seq = be32_to_cpu(p->seq_num);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002171 int rw = WRITE;
2172 u32 dp_flags;
Philipp Reisner302bdea2011-04-21 11:36:49 +02002173 int err, tp;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002174
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002175 peer_device = conn_peer_device(connection, pi->vnr);
2176 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002177 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002178 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002179
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002180 if (!get_ldev(device)) {
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002181 int err2;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002182
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002183 err = wait_for_and_update_peer_seq(peer_device, peer_seq);
2184 drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002185 atomic_inc(&connection->current_epoch->epoch_size);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002186 err2 = drbd_drain_block(peer_device, pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002187 if (!err)
2188 err = err2;
2189 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002190 }
2191
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01002192 /*
2193 * Corresponding put_ldev done either below (on various errors), or in
2194 * drbd_peer_request_endio, if we successfully submit the data at the
2195 * end of this function.
2196 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002197
2198 sector = be64_to_cpu(p->sector);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002199 peer_req = read_in_block(peer_device, p->block_id, sector, pi->size);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002200 if (!peer_req) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002201 put_ldev(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002202 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002203 }
2204
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002205 peer_req->w.cb = e_end_block;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002206
Lars Ellenberg688593c2010-11-17 22:25:03 +01002207 dp_flags = be32_to_cpu(p->dp_flags);
Andreas Gruenbacher81f0ffd2011-08-30 16:22:33 +02002208 rw |= wire_flags_to_bio(dp_flags);
Lars Ellenberg81a35372012-07-30 09:00:54 +02002209 if (peer_req->pages == NULL) {
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02002210 D_ASSERT(device, peer_req->i.size == 0);
2211 D_ASSERT(device, dp_flags & DP_FLUSH);
Lars Ellenberga73ff322012-06-25 19:15:38 +02002212 }
Lars Ellenberg688593c2010-11-17 22:25:03 +01002213
2214 if (dp_flags & DP_MAY_SET_IN_SYNC)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002215 peer_req->flags |= EE_MAY_SET_IN_SYNC;
Lars Ellenberg688593c2010-11-17 22:25:03 +01002216
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002217 spin_lock(&connection->epoch_lock);
2218 peer_req->epoch = connection->current_epoch;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002219 atomic_inc(&peer_req->epoch->epoch_size);
2220 atomic_inc(&peer_req->epoch->active);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002221 spin_unlock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002222
Philipp Reisner302bdea2011-04-21 11:36:49 +02002223 rcu_read_lock();
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002224 tp = rcu_dereference(peer_device->connection->net_conf)->two_primaries;
Philipp Reisner302bdea2011-04-21 11:36:49 +02002225 rcu_read_unlock();
2226 if (tp) {
2227 peer_req->flags |= EE_IN_INTERVAL_TREE;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002228 err = wait_for_and_update_peer_seq(peer_device, peer_seq);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002229 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002230 goto out_interrupted;
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002231 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002232 err = handle_write_conflicts(device, peer_req);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002233 if (err) {
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002234 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002235 if (err == -ENOENT) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002236 put_ldev(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002237 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002238 }
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002239 goto out_interrupted;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002240 }
Philipp Reisnerb874d232013-10-23 10:59:16 +02002241 } else {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002242 update_peer_seq(peer_device, peer_seq);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002243 spin_lock_irq(&device->resource->req_lock);
Philipp Reisnerb874d232013-10-23 10:59:16 +02002244 }
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002245 list_add(&peer_req->w.list, &device->active_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002246 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002247
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002248 if (device->state.conn == C_SYNC_TARGET)
2249 wait_event(device->ee_wait, !overlapping_resync_write(device, peer_req));
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01002250
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002251 if (peer_device->connection->agreed_pro_version < 100) {
Philipp Reisner44ed1672011-04-19 17:10:19 +02002252 rcu_read_lock();
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002253 switch (rcu_dereference(peer_device->connection->net_conf)->wire_protocol) {
Philipp Reisner303d1442011-04-13 16:24:47 -07002254 case DRBD_PROT_C:
2255 dp_flags |= DP_SEND_WRITE_ACK;
2256 break;
2257 case DRBD_PROT_B:
2258 dp_flags |= DP_SEND_RECEIVE_ACK;
2259 break;
2260 }
Philipp Reisner44ed1672011-04-19 17:10:19 +02002261 rcu_read_unlock();
Philipp Reisner303d1442011-04-13 16:24:47 -07002262 }
2263
2264 if (dp_flags & DP_SEND_WRITE_ACK) {
2265 peer_req->flags |= EE_SEND_WRITE_ACK;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002266 inc_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002267 /* corresponding dec_unacked() in e_end_block()
2268 * respective _drbd_clear_done_ee */
Philipp Reisner303d1442011-04-13 16:24:47 -07002269 }
2270
2271 if (dp_flags & DP_SEND_RECEIVE_ACK) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002272 /* I really don't like it that the receiver thread
2273 * sends on the msock, but anyways */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002274 drbd_send_ack(first_peer_device(device), P_RECV_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002275 }
2276
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002277 if (device->state.pdsk < D_INCONSISTENT) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002278 /* In case we have the only disk of the cluster, */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002279 drbd_set_out_of_sync(device, peer_req->i.sector, peer_req->i.size);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002280 peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
2281 peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002282 drbd_al_begin_io(device, &peer_req->i, true);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002283 }
2284
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002285 err = drbd_submit_peer_request(device, peer_req, rw, DRBD_FAULT_DT_WR);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002286 if (!err)
2287 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002288
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002289 /* don't care for the reason here */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002290 drbd_err(device, "submit failed, triggering re-connect\n");
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002291 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002292 list_del(&peer_req->w.list);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002293 drbd_remove_epoch_entry_interval(device, peer_req);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002294 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002295 if (peer_req->flags & EE_CALL_AL_COMPLETE_IO)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002296 drbd_al_complete_io(device, &peer_req->i);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002297
Philipp Reisnerb411b362009-09-25 16:07:19 -07002298out_interrupted:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002299 drbd_may_finish_epoch(connection, peer_req->epoch, EV_PUT + EV_CLEANUP);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002300 put_ldev(device);
2301 drbd_free_peer_req(device, peer_req);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002302 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002303}
2304
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002305/* We may throttle resync, if the lower device seems to be busy,
2306 * and current sync rate is above c_min_rate.
2307 *
2308 * To decide whether or not the lower device is busy, we use a scheme similar
2309 * to MD RAID is_mddev_idle(): if the partition stats reveal "significant"
2310 * (more than 64 sectors) of activity we cannot account for with our own resync
2311 * activity, it obviously is "busy".
2312 *
2313 * The current sync rate used here uses only the most recent two step marks,
2314 * to have a short time average so we can react faster.
2315 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002316int drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002317{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002318 struct gendisk *disk = device->ldev->backing_bdev->bd_contains->bd_disk;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002319 unsigned long db, dt, dbdt;
Philipp Reisnere3555d82010-11-07 15:56:29 +01002320 struct lc_element *tmp;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002321 int curr_events;
2322 int throttle = 0;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002323 unsigned int c_min_rate;
2324
2325 rcu_read_lock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002326 c_min_rate = rcu_dereference(device->ldev->disk_conf)->c_min_rate;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002327 rcu_read_unlock();
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002328
2329 /* feature disabled? */
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002330 if (c_min_rate == 0)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002331 return 0;
2332
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002333 spin_lock_irq(&device->al_lock);
2334 tmp = lc_find(device->resync, BM_SECT_TO_EXT(sector));
Philipp Reisnere3555d82010-11-07 15:56:29 +01002335 if (tmp) {
2336 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
2337 if (test_bit(BME_PRIORITY, &bm_ext->flags)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002338 spin_unlock_irq(&device->al_lock);
Philipp Reisnere3555d82010-11-07 15:56:29 +01002339 return 0;
2340 }
2341 /* Do not slow down if app IO is already waiting for this extent */
2342 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002343 spin_unlock_irq(&device->al_lock);
Philipp Reisnere3555d82010-11-07 15:56:29 +01002344
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002345 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
2346 (int)part_stat_read(&disk->part0, sectors[1]) -
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002347 atomic_read(&device->rs_sect_ev);
Philipp Reisnere3555d82010-11-07 15:56:29 +01002348
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002349 if (!device->rs_last_events || curr_events - device->rs_last_events > 64) {
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002350 unsigned long rs_left;
2351 int i;
2352
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002353 device->rs_last_events = curr_events;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002354
2355 /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
2356 * approx. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002357 i = (device->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
Lars Ellenberg2649f082010-11-05 10:05:47 +01002358
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002359 if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T)
2360 rs_left = device->ov_left;
Lars Ellenberg2649f082010-11-05 10:05:47 +01002361 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002362 rs_left = drbd_bm_total_weight(device) - device->rs_failed;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002363
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002364 dt = ((long)jiffies - (long)device->rs_mark_time[i]) / HZ;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002365 if (!dt)
2366 dt++;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002367 db = device->rs_mark_left[i] - rs_left;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002368 dbdt = Bit2KB(db/dt);
2369
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002370 if (dbdt > c_min_rate)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002371 throttle = 1;
2372 }
2373 return throttle;
2374}
2375
2376
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002377static int receive_DataRequest(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002378{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002379 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002380 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002381 sector_t sector;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002382 sector_t capacity;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002383 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002384 struct digest_info *di = NULL;
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002385 int size, verb;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002386 unsigned int fault_type;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02002387 struct p_block_req *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002388
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002389 peer_device = conn_peer_device(connection, pi->vnr);
2390 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002391 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002392 device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002393 capacity = drbd_get_capacity(device->this_bdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002394
2395 sector = be64_to_cpu(p->sector);
2396 size = be32_to_cpu(p->blksize);
2397
Andreas Gruenbacherc670a392011-02-21 12:41:39 +01002398 if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002399 drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
Philipp Reisnerb411b362009-09-25 16:07:19 -07002400 (unsigned long long)sector, size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002401 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002402 }
2403 if (sector + (size>>9) > capacity) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002404 drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
Philipp Reisnerb411b362009-09-25 16:07:19 -07002405 (unsigned long long)sector, size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002406 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002407 }
2408
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002409 if (!get_ldev_if_state(device, D_UP_TO_DATE)) {
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002410 verb = 1;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002411 switch (pi->cmd) {
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002412 case P_DATA_REQUEST:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002413 drbd_send_ack_rp(peer_device, P_NEG_DREPLY, p);
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002414 break;
2415 case P_RS_DATA_REQUEST:
2416 case P_CSUM_RS_REQUEST:
2417 case P_OV_REQUEST:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002418 drbd_send_ack_rp(peer_device, P_NEG_RS_DREPLY , p);
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002419 break;
2420 case P_OV_REPLY:
2421 verb = 0;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002422 dec_rs_pending(device);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002423 drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size, ID_IN_SYNC);
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002424 break;
2425 default:
Andreas Gruenbacher49ba9b12011-03-25 00:35:45 +01002426 BUG();
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002427 }
2428 if (verb && __ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002429 drbd_err(device, "Can not satisfy peer's read request, "
Philipp Reisnerb411b362009-09-25 16:07:19 -07002430 "no local data.\n");
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002431
Lars Ellenberga821cc42010-09-06 12:31:37 +02002432 /* drain possibly payload */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002433 return drbd_drain_block(peer_device, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002434 }
2435
2436 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
2437 * "criss-cross" setup, that might cause write-out on some other DRBD,
2438 * which in turn might block on the other node at this very place. */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002439 peer_req = drbd_alloc_peer_req(peer_device, p->block_id, sector, size, GFP_NOIO);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002440 if (!peer_req) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002441 put_ldev(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002442 return -ENOMEM;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002443 }
2444
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002445 switch (pi->cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002446 case P_DATA_REQUEST:
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002447 peer_req->w.cb = w_e_end_data_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002448 fault_type = DRBD_FAULT_DT_RD;
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002449 /* application IO, don't drbd_rs_begin_io */
2450 goto submit;
2451
Philipp Reisnerb411b362009-09-25 16:07:19 -07002452 case P_RS_DATA_REQUEST:
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002453 peer_req->w.cb = w_e_end_rsdata_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002454 fault_type = DRBD_FAULT_RS_RD;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002455 /* used in the sector offset progress display */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002456 device->bm_resync_fo = BM_SECT_TO_BIT(sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002457 break;
2458
2459 case P_OV_REPLY:
2460 case P_CSUM_RS_REQUEST:
2461 fault_type = DRBD_FAULT_RS_RD;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002462 di = kmalloc(sizeof(*di) + pi->size, GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002463 if (!di)
2464 goto out_free_e;
2465
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002466 di->digest_size = pi->size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002467 di->digest = (((char *)di)+sizeof(struct digest_info));
2468
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002469 peer_req->digest = di;
2470 peer_req->flags |= EE_HAS_DIGEST;
Lars Ellenbergc36c3ce2010-08-11 20:42:55 +02002471
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002472 if (drbd_recv_all(peer_device->connection, di->digest, pi->size))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002473 goto out_free_e;
2474
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002475 if (pi->cmd == P_CSUM_RS_REQUEST) {
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002476 D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002477 peer_req->w.cb = w_e_end_csum_rs_req;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002478 /* used in the sector offset progress display */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002479 device->bm_resync_fo = BM_SECT_TO_BIT(sector);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002480 } else if (pi->cmd == P_OV_REPLY) {
Lars Ellenberg2649f082010-11-05 10:05:47 +01002481 /* track progress, we may need to throttle */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002482 atomic_add(size >> 9, &device->rs_sect_in);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002483 peer_req->w.cb = w_e_end_ov_reply;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002484 dec_rs_pending(device);
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002485 /* drbd_rs_begin_io done when we sent this request,
2486 * but accounting still needs to be done. */
2487 goto submit_for_resync;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002488 }
2489 break;
2490
2491 case P_OV_REQUEST:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002492 if (device->ov_start_sector == ~(sector_t)0 &&
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002493 peer_device->connection->agreed_pro_version >= 90) {
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002494 unsigned long now = jiffies;
2495 int i;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002496 device->ov_start_sector = sector;
2497 device->ov_position = sector;
2498 device->ov_left = drbd_bm_bits(device) - BM_SECT_TO_BIT(sector);
2499 device->rs_total = device->ov_left;
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002500 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002501 device->rs_mark_left[i] = device->ov_left;
2502 device->rs_mark_time[i] = now;
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002503 }
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002504 drbd_info(device, "Online Verify start sector: %llu\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07002505 (unsigned long long)sector);
2506 }
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002507 peer_req->w.cb = w_e_end_ov_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002508 fault_type = DRBD_FAULT_RS_RD;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002509 break;
2510
Philipp Reisnerb411b362009-09-25 16:07:19 -07002511 default:
Andreas Gruenbacher49ba9b12011-03-25 00:35:45 +01002512 BUG();
Philipp Reisnerb411b362009-09-25 16:07:19 -07002513 }
2514
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002515 /* Throttle, drbd_rs_begin_io and submit should become asynchronous
2516 * wrt the receiver, but it is not as straightforward as it may seem.
2517 * Various places in the resync start and stop logic assume resync
2518 * requests are processed in order, requeuing this on the worker thread
2519 * introduces a bunch of new code for synchronization between threads.
2520 *
2521 * Unlimited throttling before drbd_rs_begin_io may stall the resync
2522 * "forever", throttling after drbd_rs_begin_io will lock that extent
2523 * for application writes for the same time. For now, just throttle
2524 * here, where the rest of the code expects the receiver to sleep for
2525 * a while, anyways.
2526 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002527
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002528 /* Throttle before drbd_rs_begin_io, as that locks out application IO;
2529 * this defers syncer requests for some time, before letting at least
2530 * on request through. The resync controller on the receiving side
2531 * will adapt to the incoming rate accordingly.
2532 *
2533 * We cannot throttle here if remote is Primary/SyncTarget:
2534 * we would also throttle its application reads.
2535 * In that case, throttling is done on the SyncTarget only.
2536 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002537 if (device->state.peer != R_PRIMARY && drbd_rs_should_slow_down(device, sector))
Philipp Reisnere3555d82010-11-07 15:56:29 +01002538 schedule_timeout_uninterruptible(HZ/10);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002539 if (drbd_rs_begin_io(device, sector))
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002540 goto out_free_e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002541
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002542submit_for_resync:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002543 atomic_add(size >> 9, &device->rs_sect_ev);
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002544
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002545submit:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002546 inc_unacked(device);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002547 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002548 list_add_tail(&peer_req->w.list, &device->read_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002549 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002550
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002551 if (drbd_submit_peer_request(device, peer_req, READ, fault_type) == 0)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002552 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002553
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002554 /* don't care for the reason here */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002555 drbd_err(device, "submit failed, triggering re-connect\n");
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002556 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002557 list_del(&peer_req->w.list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002558 spin_unlock_irq(&device->resource->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002559 /* no drbd_rs_complete_io(), we are dropping the connection anyways */
2560
Philipp Reisnerb411b362009-09-25 16:07:19 -07002561out_free_e:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002562 put_ldev(device);
2563 drbd_free_peer_req(device, peer_req);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002564 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002565}
2566
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002567/**
2568 * drbd_asb_recover_0p - Recover after split-brain with no remaining primaries
2569 */
2570static int drbd_asb_recover_0p(struct drbd_peer_device *peer_device) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002571{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002572 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002573 int self, peer, rv = -100;
2574 unsigned long ch_self, ch_peer;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002575 enum drbd_after_sb_p after_sb_0p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002576
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002577 self = device->ldev->md.uuid[UI_BITMAP] & 1;
2578 peer = device->p_uuid[UI_BITMAP] & 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002579
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002580 ch_peer = device->p_uuid[UI_SIZE];
2581 ch_self = device->comm_bm_set;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002582
Philipp Reisner44ed1672011-04-19 17:10:19 +02002583 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002584 after_sb_0p = rcu_dereference(peer_device->connection->net_conf)->after_sb_0p;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002585 rcu_read_unlock();
2586 switch (after_sb_0p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002587 case ASB_CONSENSUS:
2588 case ASB_DISCARD_SECONDARY:
2589 case ASB_CALL_HELPER:
Philipp Reisner44ed1672011-04-19 17:10:19 +02002590 case ASB_VIOLENTLY:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002591 drbd_err(device, "Configuration error.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002592 break;
2593 case ASB_DISCONNECT:
2594 break;
2595 case ASB_DISCARD_YOUNGER_PRI:
2596 if (self == 0 && peer == 1) {
2597 rv = -1;
2598 break;
2599 }
2600 if (self == 1 && peer == 0) {
2601 rv = 1;
2602 break;
2603 }
2604 /* Else fall through to one of the other strategies... */
2605 case ASB_DISCARD_OLDER_PRI:
2606 if (self == 0 && peer == 1) {
2607 rv = 1;
2608 break;
2609 }
2610 if (self == 1 && peer == 0) {
2611 rv = -1;
2612 break;
2613 }
2614 /* Else fall through to one of the other strategies... */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002615 drbd_warn(device, "Discard younger/older primary did not find a decision\n"
Philipp Reisnerb411b362009-09-25 16:07:19 -07002616 "Using discard-least-changes instead\n");
2617 case ASB_DISCARD_ZERO_CHG:
2618 if (ch_peer == 0 && ch_self == 0) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002619 rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002620 ? -1 : 1;
2621 break;
2622 } else {
2623 if (ch_peer == 0) { rv = 1; break; }
2624 if (ch_self == 0) { rv = -1; break; }
2625 }
Philipp Reisner44ed1672011-04-19 17:10:19 +02002626 if (after_sb_0p == ASB_DISCARD_ZERO_CHG)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002627 break;
2628 case ASB_DISCARD_LEAST_CHG:
2629 if (ch_self < ch_peer)
2630 rv = -1;
2631 else if (ch_self > ch_peer)
2632 rv = 1;
2633 else /* ( ch_self == ch_peer ) */
2634 /* Well, then use something else. */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002635 rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002636 ? -1 : 1;
2637 break;
2638 case ASB_DISCARD_LOCAL:
2639 rv = -1;
2640 break;
2641 case ASB_DISCARD_REMOTE:
2642 rv = 1;
2643 }
2644
2645 return rv;
2646}
2647
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002648/**
2649 * drbd_asb_recover_1p - Recover after split-brain with one remaining primary
2650 */
2651static int drbd_asb_recover_1p(struct drbd_peer_device *peer_device) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002652{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002653 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002654 int hg, rv = -100;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002655 enum drbd_after_sb_p after_sb_1p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002656
Philipp Reisner44ed1672011-04-19 17:10:19 +02002657 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002658 after_sb_1p = rcu_dereference(peer_device->connection->net_conf)->after_sb_1p;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002659 rcu_read_unlock();
2660 switch (after_sb_1p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002661 case ASB_DISCARD_YOUNGER_PRI:
2662 case ASB_DISCARD_OLDER_PRI:
2663 case ASB_DISCARD_LEAST_CHG:
2664 case ASB_DISCARD_LOCAL:
2665 case ASB_DISCARD_REMOTE:
Philipp Reisner44ed1672011-04-19 17:10:19 +02002666 case ASB_DISCARD_ZERO_CHG:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002667 drbd_err(device, "Configuration error.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002668 break;
2669 case ASB_DISCONNECT:
2670 break;
2671 case ASB_CONSENSUS:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002672 hg = drbd_asb_recover_0p(peer_device);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002673 if (hg == -1 && device->state.role == R_SECONDARY)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002674 rv = hg;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002675 if (hg == 1 && device->state.role == R_PRIMARY)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002676 rv = hg;
2677 break;
2678 case ASB_VIOLENTLY:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002679 rv = drbd_asb_recover_0p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002680 break;
2681 case ASB_DISCARD_SECONDARY:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002682 return device->state.role == R_PRIMARY ? 1 : -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002683 case ASB_CALL_HELPER:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002684 hg = drbd_asb_recover_0p(peer_device);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002685 if (hg == -1 && device->state.role == R_PRIMARY) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002686 enum drbd_state_rv rv2;
2687
Philipp Reisnerb411b362009-09-25 16:07:19 -07002688 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2689 * we might be here in C_WF_REPORT_PARAMS which is transient.
2690 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002691 rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002692 if (rv2 != SS_SUCCESS) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002693 drbd_khelper(device, "pri-lost-after-sb");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002694 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002695 drbd_warn(device, "Successfully gave up primary role.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002696 rv = hg;
2697 }
2698 } else
2699 rv = hg;
2700 }
2701
2702 return rv;
2703}
2704
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002705/**
2706 * drbd_asb_recover_2p - Recover after split-brain with two remaining primaries
2707 */
2708static int drbd_asb_recover_2p(struct drbd_peer_device *peer_device) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002709{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002710 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002711 int hg, rv = -100;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002712 enum drbd_after_sb_p after_sb_2p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002713
Philipp Reisner44ed1672011-04-19 17:10:19 +02002714 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002715 after_sb_2p = rcu_dereference(peer_device->connection->net_conf)->after_sb_2p;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002716 rcu_read_unlock();
2717 switch (after_sb_2p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002718 case ASB_DISCARD_YOUNGER_PRI:
2719 case ASB_DISCARD_OLDER_PRI:
2720 case ASB_DISCARD_LEAST_CHG:
2721 case ASB_DISCARD_LOCAL:
2722 case ASB_DISCARD_REMOTE:
2723 case ASB_CONSENSUS:
2724 case ASB_DISCARD_SECONDARY:
Philipp Reisner44ed1672011-04-19 17:10:19 +02002725 case ASB_DISCARD_ZERO_CHG:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002726 drbd_err(device, "Configuration error.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002727 break;
2728 case ASB_VIOLENTLY:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002729 rv = drbd_asb_recover_0p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002730 break;
2731 case ASB_DISCONNECT:
2732 break;
2733 case ASB_CALL_HELPER:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002734 hg = drbd_asb_recover_0p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002735 if (hg == -1) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002736 enum drbd_state_rv rv2;
2737
Philipp Reisnerb411b362009-09-25 16:07:19 -07002738 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2739 * we might be here in C_WF_REPORT_PARAMS which is transient.
2740 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002741 rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002742 if (rv2 != SS_SUCCESS) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002743 drbd_khelper(device, "pri-lost-after-sb");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002744 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002745 drbd_warn(device, "Successfully gave up primary role.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002746 rv = hg;
2747 }
2748 } else
2749 rv = hg;
2750 }
2751
2752 return rv;
2753}
2754
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002755static void drbd_uuid_dump(struct drbd_device *device, char *text, u64 *uuid,
Philipp Reisnerb411b362009-09-25 16:07:19 -07002756 u64 bits, u64 flags)
2757{
2758 if (!uuid) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002759 drbd_info(device, "%s uuid info vanished while I was looking!\n", text);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002760 return;
2761 }
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002762 drbd_info(device, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07002763 text,
2764 (unsigned long long)uuid[UI_CURRENT],
2765 (unsigned long long)uuid[UI_BITMAP],
2766 (unsigned long long)uuid[UI_HISTORY_START],
2767 (unsigned long long)uuid[UI_HISTORY_END],
2768 (unsigned long long)bits,
2769 (unsigned long long)flags);
2770}
2771
2772/*
2773 100 after split brain try auto recover
2774 2 C_SYNC_SOURCE set BitMap
2775 1 C_SYNC_SOURCE use BitMap
2776 0 no Sync
2777 -1 C_SYNC_TARGET use BitMap
2778 -2 C_SYNC_TARGET set BitMap
2779 -100 after split brain, disconnect
2780-1000 unrelated data
Philipp Reisner4a23f262011-01-11 17:42:17 +01002781-1091 requires proto 91
2782-1096 requires proto 96
Philipp Reisnerb411b362009-09-25 16:07:19 -07002783 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002784static int drbd_uuid_compare(struct drbd_device *device, int *rule_nr) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002785{
2786 u64 self, peer;
2787 int i, j;
2788
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002789 self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2790 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002791
2792 *rule_nr = 10;
2793 if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED)
2794 return 0;
2795
2796 *rule_nr = 20;
2797 if ((self == UUID_JUST_CREATED || self == (u64)0) &&
2798 peer != UUID_JUST_CREATED)
2799 return -2;
2800
2801 *rule_nr = 30;
2802 if (self != UUID_JUST_CREATED &&
2803 (peer == UUID_JUST_CREATED || peer == (u64)0))
2804 return 2;
2805
2806 if (self == peer) {
2807 int rct, dc; /* roles at crash time */
2808
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002809 if (device->p_uuid[UI_BITMAP] == (u64)0 && device->ldev->md.uuid[UI_BITMAP] != (u64)0) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002810
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002811 if (first_peer_device(device)->connection->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002812 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002813
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002814 if ((device->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
2815 (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002816 drbd_info(device, "was SyncSource, missed the resync finished event, corrected myself:\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002817 drbd_uuid_move_history(device);
2818 device->ldev->md.uuid[UI_HISTORY_START] = device->ldev->md.uuid[UI_BITMAP];
2819 device->ldev->md.uuid[UI_BITMAP] = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002820
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002821 drbd_uuid_dump(device, "self", device->ldev->md.uuid,
2822 device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002823 *rule_nr = 34;
2824 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002825 drbd_info(device, "was SyncSource (peer failed to write sync_uuid)\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002826 *rule_nr = 36;
2827 }
2828
2829 return 1;
2830 }
2831
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002832 if (device->ldev->md.uuid[UI_BITMAP] == (u64)0 && device->p_uuid[UI_BITMAP] != (u64)0) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002833
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002834 if (first_peer_device(device)->connection->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002835 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002836
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002837 if ((device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_BITMAP] & ~((u64)1)) &&
2838 (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002839 drbd_info(device, "was SyncTarget, peer missed the resync finished event, corrected peer:\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002840
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002841 device->p_uuid[UI_HISTORY_START + 1] = device->p_uuid[UI_HISTORY_START];
2842 device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_BITMAP];
2843 device->p_uuid[UI_BITMAP] = 0UL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002844
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002845 drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002846 *rule_nr = 35;
2847 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002848 drbd_info(device, "was SyncTarget (failed to write sync_uuid)\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002849 *rule_nr = 37;
2850 }
2851
2852 return -1;
2853 }
2854
2855 /* Common power [off|failure] */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002856 rct = (test_bit(CRASHED_PRIMARY, &device->flags) ? 1 : 0) +
2857 (device->p_uuid[UI_FLAGS] & 2);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002858 /* lowest bit is set when we were primary,
2859 * next bit (weight 2) is set when peer was primary */
2860 *rule_nr = 40;
2861
2862 switch (rct) {
2863 case 0: /* !self_pri && !peer_pri */ return 0;
2864 case 1: /* self_pri && !peer_pri */ return 1;
2865 case 2: /* !self_pri && peer_pri */ return -1;
2866 case 3: /* self_pri && peer_pri */
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002867 dc = test_bit(RESOLVE_CONFLICTS, &first_peer_device(device)->connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002868 return dc ? -1 : 1;
2869 }
2870 }
2871
2872 *rule_nr = 50;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002873 peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002874 if (self == peer)
2875 return -1;
2876
2877 *rule_nr = 51;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002878 peer = device->p_uuid[UI_HISTORY_START] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002879 if (self == peer) {
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002880 if (first_peer_device(device)->connection->agreed_pro_version < 96 ?
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002881 (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
2882 (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
2883 peer + UUID_NEW_BM_OFFSET == (device->p_uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002884 /* The last P_SYNC_UUID did not get though. Undo the last start of
2885 resync as sync source modifications of the peer's UUIDs. */
2886
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002887 if (first_peer_device(device)->connection->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002888 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002889
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002890 device->p_uuid[UI_BITMAP] = device->p_uuid[UI_HISTORY_START];
2891 device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_HISTORY_START + 1];
Philipp Reisner4a23f262011-01-11 17:42:17 +01002892
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002893 drbd_info(device, "Lost last syncUUID packet, corrected:\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002894 drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
Philipp Reisner4a23f262011-01-11 17:42:17 +01002895
Philipp Reisnerb411b362009-09-25 16:07:19 -07002896 return -1;
2897 }
2898 }
2899
2900 *rule_nr = 60;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002901 self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002902 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002903 peer = device->p_uuid[i] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002904 if (self == peer)
2905 return -2;
2906 }
2907
2908 *rule_nr = 70;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002909 self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2910 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002911 if (self == peer)
2912 return 1;
2913
2914 *rule_nr = 71;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002915 self = device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002916 if (self == peer) {
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002917 if (first_peer_device(device)->connection->agreed_pro_version < 96 ?
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002918 (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
2919 (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
2920 self + UUID_NEW_BM_OFFSET == (device->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002921 /* The last P_SYNC_UUID did not get though. Undo the last start of
2922 resync as sync source modifications of our UUIDs. */
2923
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002924 if (first_peer_device(device)->connection->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002925 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002926
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002927 __drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_HISTORY_START]);
2928 __drbd_uuid_set(device, UI_HISTORY_START, device->ldev->md.uuid[UI_HISTORY_START + 1]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002929
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002930 drbd_info(device, "Last syncUUID did not get through, corrected:\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002931 drbd_uuid_dump(device, "self", device->ldev->md.uuid,
2932 device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002933
2934 return 1;
2935 }
2936 }
2937
2938
2939 *rule_nr = 80;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002940 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002941 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002942 self = device->ldev->md.uuid[i] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002943 if (self == peer)
2944 return 2;
2945 }
2946
2947 *rule_nr = 90;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002948 self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2949 peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002950 if (self == peer && self != ((u64)0))
2951 return 100;
2952
2953 *rule_nr = 100;
2954 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002955 self = device->ldev->md.uuid[i] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002956 for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002957 peer = device->p_uuid[j] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002958 if (self == peer)
2959 return -100;
2960 }
2961 }
2962
2963 return -1000;
2964}
2965
2966/* drbd_sync_handshake() returns the new conn state on success, or
2967 CONN_MASK (-1) on failure.
2968 */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002969static enum drbd_conns drbd_sync_handshake(struct drbd_peer_device *peer_device,
2970 enum drbd_role peer_role,
Philipp Reisnerb411b362009-09-25 16:07:19 -07002971 enum drbd_disk_state peer_disk) __must_hold(local)
2972{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002973 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002974 enum drbd_conns rv = C_MASK;
2975 enum drbd_disk_state mydisk;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002976 struct net_conf *nc;
Andreas Gruenbacher6dff2902011-06-28 14:18:12 +02002977 int hg, rule_nr, rr_conflict, tentative;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002978
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002979 mydisk = device->state.disk;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002980 if (mydisk == D_NEGOTIATING)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002981 mydisk = device->new_state_tmp.disk;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002982
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002983 drbd_info(device, "drbd_sync_handshake:\n");
Philipp Reisner9f2247b2012-08-16 14:25:58 +02002984
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002985 spin_lock_irq(&device->ldev->md.uuid_lock);
2986 drbd_uuid_dump(device, "self", device->ldev->md.uuid, device->comm_bm_set, 0);
2987 drbd_uuid_dump(device, "peer", device->p_uuid,
2988 device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002989
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002990 hg = drbd_uuid_compare(device, &rule_nr);
2991 spin_unlock_irq(&device->ldev->md.uuid_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002992
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002993 drbd_info(device, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002994
2995 if (hg == -1000) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002996 drbd_alert(device, "Unrelated data, aborting!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002997 return C_MASK;
2998 }
Philipp Reisner4a23f262011-01-11 17:42:17 +01002999 if (hg < -1000) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003000 drbd_alert(device, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003001 return C_MASK;
3002 }
3003
3004 if ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) ||
3005 (peer_disk == D_INCONSISTENT && mydisk > D_INCONSISTENT)) {
3006 int f = (hg == -100) || abs(hg) == 2;
3007 hg = mydisk > D_INCONSISTENT ? 1 : -1;
3008 if (f)
3009 hg = hg*2;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003010 drbd_info(device, "Becoming sync %s due to disk states.\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07003011 hg > 0 ? "source" : "target");
3012 }
3013
Adam Gandelman3a11a482010-04-08 16:48:23 -07003014 if (abs(hg) == 100)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003015 drbd_khelper(device, "initial-split-brain");
Adam Gandelman3a11a482010-04-08 16:48:23 -07003016
Philipp Reisner44ed1672011-04-19 17:10:19 +02003017 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003018 nc = rcu_dereference(peer_device->connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +02003019
3020 if (hg == 100 || (hg == -100 && nc->always_asbp)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003021 int pcount = (device->state.role == R_PRIMARY)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003022 + (peer_role == R_PRIMARY);
3023 int forced = (hg == -100);
3024
3025 switch (pcount) {
3026 case 0:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003027 hg = drbd_asb_recover_0p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003028 break;
3029 case 1:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003030 hg = drbd_asb_recover_1p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003031 break;
3032 case 2:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003033 hg = drbd_asb_recover_2p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003034 break;
3035 }
3036 if (abs(hg) < 100) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003037 drbd_warn(device, "Split-Brain detected, %d primaries, "
Philipp Reisnerb411b362009-09-25 16:07:19 -07003038 "automatically solved. Sync from %s node\n",
3039 pcount, (hg < 0) ? "peer" : "this");
3040 if (forced) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003041 drbd_warn(device, "Doing a full sync, since"
Philipp Reisnerb411b362009-09-25 16:07:19 -07003042 " UUIDs where ambiguous.\n");
3043 hg = hg*2;
3044 }
3045 }
3046 }
3047
3048 if (hg == -100) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003049 if (test_bit(DISCARD_MY_DATA, &device->flags) && !(device->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003050 hg = -1;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003051 if (!test_bit(DISCARD_MY_DATA, &device->flags) && (device->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003052 hg = 1;
3053
3054 if (abs(hg) < 100)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003055 drbd_warn(device, "Split-Brain detected, manually solved. "
Philipp Reisnerb411b362009-09-25 16:07:19 -07003056 "Sync from %s node\n",
3057 (hg < 0) ? "peer" : "this");
3058 }
Philipp Reisner44ed1672011-04-19 17:10:19 +02003059 rr_conflict = nc->rr_conflict;
Andreas Gruenbacher6dff2902011-06-28 14:18:12 +02003060 tentative = nc->tentative;
Philipp Reisner44ed1672011-04-19 17:10:19 +02003061 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07003062
3063 if (hg == -100) {
Lars Ellenberg580b9762010-02-26 23:15:23 +01003064 /* FIXME this log message is not correct if we end up here
3065 * after an attempted attach on a diskless node.
3066 * We just refuse to attach -- well, we drop the "connection"
3067 * to that disk, in a way... */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003068 drbd_alert(device, "Split-Brain detected but unresolved, dropping connection!\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003069 drbd_khelper(device, "split-brain");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003070 return C_MASK;
3071 }
3072
3073 if (hg > 0 && mydisk <= D_INCONSISTENT) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003074 drbd_err(device, "I shall become SyncSource, but I am inconsistent!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003075 return C_MASK;
3076 }
3077
3078 if (hg < 0 && /* by intention we do not use mydisk here. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003079 device->state.role == R_PRIMARY && device->state.disk >= D_CONSISTENT) {
Philipp Reisner44ed1672011-04-19 17:10:19 +02003080 switch (rr_conflict) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003081 case ASB_CALL_HELPER:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003082 drbd_khelper(device, "pri-lost");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003083 /* fall through */
3084 case ASB_DISCONNECT:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003085 drbd_err(device, "I shall become SyncTarget, but I am primary!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003086 return C_MASK;
3087 case ASB_VIOLENTLY:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003088 drbd_warn(device, "Becoming SyncTarget, violating the stable-data"
Philipp Reisnerb411b362009-09-25 16:07:19 -07003089 "assumption\n");
3090 }
3091 }
3092
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003093 if (tentative || test_bit(CONN_DRY_RUN, &peer_device->connection->flags)) {
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003094 if (hg == 0)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003095 drbd_info(device, "dry-run connect: No resync, would become Connected immediately.\n");
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003096 else
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003097 drbd_info(device, "dry-run connect: Would become %s, doing a %s resync.",
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003098 drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET),
3099 abs(hg) >= 2 ? "full" : "bit-map based");
3100 return C_MASK;
3101 }
3102
Philipp Reisnerb411b362009-09-25 16:07:19 -07003103 if (abs(hg) >= 2) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003104 drbd_info(device, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003105 if (drbd_bitmap_io(device, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003106 BM_LOCKED_SET_ALLOWED))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003107 return C_MASK;
3108 }
3109
3110 if (hg > 0) { /* become sync source. */
3111 rv = C_WF_BITMAP_S;
3112 } else if (hg < 0) { /* become sync target */
3113 rv = C_WF_BITMAP_T;
3114 } else {
3115 rv = C_CONNECTED;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003116 if (drbd_bm_total_weight(device)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003117 drbd_info(device, "No resync, but %lu bits in bitmap!\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003118 drbd_bm_total_weight(device));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003119 }
3120 }
3121
3122 return rv;
3123}
3124
Philipp Reisnerf179d762011-05-16 17:31:47 +02003125static enum drbd_after_sb_p convert_after_sb(enum drbd_after_sb_p peer)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003126{
3127 /* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */
Philipp Reisnerf179d762011-05-16 17:31:47 +02003128 if (peer == ASB_DISCARD_REMOTE)
3129 return ASB_DISCARD_LOCAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003130
3131 /* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */
Philipp Reisnerf179d762011-05-16 17:31:47 +02003132 if (peer == ASB_DISCARD_LOCAL)
3133 return ASB_DISCARD_REMOTE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003134
3135 /* everything else is valid if they are equal on both sides. */
Philipp Reisnerf179d762011-05-16 17:31:47 +02003136 return peer;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003137}
3138
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003139static int receive_protocol(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003140{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003141 struct p_protocol *p = pi->data;
Philipp Reisner036b17e2011-05-16 17:38:11 +02003142 enum drbd_after_sb_p p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
3143 int p_proto, p_discard_my_data, p_two_primaries, cf;
3144 struct net_conf *nc, *old_net_conf, *new_net_conf = NULL;
3145 char integrity_alg[SHARED_SECRET_MAX] = "";
Andreas Gruenbacheraccdbcc2011-07-15 17:41:09 +02003146 struct crypto_hash *peer_integrity_tfm = NULL;
Philipp Reisner7aca6c72011-05-17 10:12:56 +02003147 void *int_dig_in = NULL, *int_dig_vv = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003148
Philipp Reisnerb411b362009-09-25 16:07:19 -07003149 p_proto = be32_to_cpu(p->protocol);
3150 p_after_sb_0p = be32_to_cpu(p->after_sb_0p);
3151 p_after_sb_1p = be32_to_cpu(p->after_sb_1p);
3152 p_after_sb_2p = be32_to_cpu(p->after_sb_2p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003153 p_two_primaries = be32_to_cpu(p->two_primaries);
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003154 cf = be32_to_cpu(p->conn_flags);
Andreas Gruenbacher6139f602011-05-06 20:00:02 +02003155 p_discard_my_data = cf & CF_DISCARD_MY_DATA;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003156
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003157 if (connection->agreed_pro_version >= 87) {
Andreas Gruenbacher86db0612011-04-28 15:24:18 +02003158 int err;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003159
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02003160 if (pi->size > sizeof(integrity_alg))
Andreas Gruenbacher86db0612011-04-28 15:24:18 +02003161 return -EIO;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003162 err = drbd_recv_all(connection, integrity_alg, pi->size);
Andreas Gruenbacher86db0612011-04-28 15:24:18 +02003163 if (err)
3164 return err;
Philipp Reisner036b17e2011-05-16 17:38:11 +02003165 integrity_alg[SHARED_SECRET_MAX - 1] = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003166 }
3167
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003168 if (pi->cmd != P_PROTOCOL_UPDATE) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003169 clear_bit(CONN_DRY_RUN, &connection->flags);
Philipp Reisner036b17e2011-05-16 17:38:11 +02003170
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003171 if (cf & CF_DRY_RUN)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003172 set_bit(CONN_DRY_RUN, &connection->flags);
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003173
3174 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003175 nc = rcu_dereference(connection->net_conf);
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003176
3177 if (p_proto != nc->wire_protocol) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003178 drbd_err(connection, "incompatible %s settings\n", "protocol");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003179 goto disconnect_rcu_unlock;
3180 }
3181
3182 if (convert_after_sb(p_after_sb_0p) != nc->after_sb_0p) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003183 drbd_err(connection, "incompatible %s settings\n", "after-sb-0pri");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003184 goto disconnect_rcu_unlock;
3185 }
3186
3187 if (convert_after_sb(p_after_sb_1p) != nc->after_sb_1p) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003188 drbd_err(connection, "incompatible %s settings\n", "after-sb-1pri");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003189 goto disconnect_rcu_unlock;
3190 }
3191
3192 if (convert_after_sb(p_after_sb_2p) != nc->after_sb_2p) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003193 drbd_err(connection, "incompatible %s settings\n", "after-sb-2pri");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003194 goto disconnect_rcu_unlock;
3195 }
3196
3197 if (p_discard_my_data && nc->discard_my_data) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003198 drbd_err(connection, "incompatible %s settings\n", "discard-my-data");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003199 goto disconnect_rcu_unlock;
3200 }
3201
3202 if (p_two_primaries != nc->two_primaries) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003203 drbd_err(connection, "incompatible %s settings\n", "allow-two-primaries");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003204 goto disconnect_rcu_unlock;
3205 }
3206
3207 if (strcmp(integrity_alg, nc->integrity_alg)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003208 drbd_err(connection, "incompatible %s settings\n", "data-integrity-alg");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003209 goto disconnect_rcu_unlock;
3210 }
3211
3212 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07003213 }
3214
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003215 if (integrity_alg[0]) {
3216 int hash_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003217
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003218 /*
3219 * We can only change the peer data integrity algorithm
3220 * here. Changing our own data integrity algorithm
3221 * requires that we send a P_PROTOCOL_UPDATE packet at
3222 * the same time; otherwise, the peer has no way to
3223 * tell between which packets the algorithm should
3224 * change.
3225 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003226
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003227 peer_integrity_tfm = crypto_alloc_hash(integrity_alg, 0, CRYPTO_ALG_ASYNC);
3228 if (!peer_integrity_tfm) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003229 drbd_err(connection, "peer data-integrity-alg %s not supported\n",
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003230 integrity_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003231 goto disconnect;
3232 }
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003233
3234 hash_size = crypto_hash_digestsize(peer_integrity_tfm);
3235 int_dig_in = kmalloc(hash_size, GFP_KERNEL);
3236 int_dig_vv = kmalloc(hash_size, GFP_KERNEL);
3237 if (!(int_dig_in && int_dig_vv)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003238 drbd_err(connection, "Allocation of buffers for data integrity checking failed\n");
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003239 goto disconnect;
3240 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003241 }
3242
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003243 new_net_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL);
3244 if (!new_net_conf) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003245 drbd_err(connection, "Allocation of new net_conf failed\n");
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003246 goto disconnect;
3247 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003248
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003249 mutex_lock(&connection->data.mutex);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003250 mutex_lock(&connection->resource->conf_update);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003251 old_net_conf = connection->net_conf;
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003252 *new_net_conf = *old_net_conf;
3253
3254 new_net_conf->wire_protocol = p_proto;
3255 new_net_conf->after_sb_0p = convert_after_sb(p_after_sb_0p);
3256 new_net_conf->after_sb_1p = convert_after_sb(p_after_sb_1p);
3257 new_net_conf->after_sb_2p = convert_after_sb(p_after_sb_2p);
3258 new_net_conf->two_primaries = p_two_primaries;
3259
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003260 rcu_assign_pointer(connection->net_conf, new_net_conf);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003261 mutex_unlock(&connection->resource->conf_update);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003262 mutex_unlock(&connection->data.mutex);
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003263
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003264 crypto_free_hash(connection->peer_integrity_tfm);
3265 kfree(connection->int_dig_in);
3266 kfree(connection->int_dig_vv);
3267 connection->peer_integrity_tfm = peer_integrity_tfm;
3268 connection->int_dig_in = int_dig_in;
3269 connection->int_dig_vv = int_dig_vv;
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003270
3271 if (strcmp(old_net_conf->integrity_alg, integrity_alg))
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003272 drbd_info(connection, "peer data-integrity-alg: %s\n",
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003273 integrity_alg[0] ? integrity_alg : "(none)");
3274
3275 synchronize_rcu();
3276 kfree(old_net_conf);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003277 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003278
Philipp Reisner44ed1672011-04-19 17:10:19 +02003279disconnect_rcu_unlock:
3280 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07003281disconnect:
Andreas Gruenbacherb792c352011-07-15 16:48:49 +02003282 crypto_free_hash(peer_integrity_tfm);
Philipp Reisner036b17e2011-05-16 17:38:11 +02003283 kfree(int_dig_in);
3284 kfree(int_dig_vv);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003285 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003286 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003287}
3288
3289/* helper function
3290 * input: alg name, feature name
3291 * return: NULL (alg name was "")
3292 * ERR_PTR(error) if something goes wrong
3293 * or the crypto hash ptr, if it worked out ok. */
Rashika Kheriaf63e6312013-12-19 15:11:09 +05303294static
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003295struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_device *device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003296 const char *alg, const char *name)
3297{
3298 struct crypto_hash *tfm;
3299
3300 if (!alg[0])
3301 return NULL;
3302
3303 tfm = crypto_alloc_hash(alg, 0, CRYPTO_ALG_ASYNC);
3304 if (IS_ERR(tfm)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003305 drbd_err(device, "Can not allocate \"%s\" as %s (reason: %ld)\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07003306 alg, name, PTR_ERR(tfm));
3307 return tfm;
3308 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003309 return tfm;
3310}
3311
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003312static int ignore_remaining_packet(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003313{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003314 void *buffer = connection->data.rbuf;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003315 int size = pi->size;
3316
3317 while (size) {
3318 int s = min_t(int, size, DRBD_SOCKET_BUFFER_SIZE);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003319 s = drbd_recv(connection, buffer, s);
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003320 if (s <= 0) {
3321 if (s < 0)
3322 return s;
3323 break;
3324 }
3325 size -= s;
3326 }
3327 if (size)
3328 return -EIO;
3329 return 0;
3330}
3331
3332/*
3333 * config_unknown_volume - device configuration command for unknown volume
3334 *
3335 * When a device is added to an existing connection, the node on which the
3336 * device is added first will send configuration commands to its peer but the
3337 * peer will not know about the device yet. It will warn and ignore these
3338 * commands. Once the device is added on the second node, the second node will
3339 * send the same device configuration commands, but in the other direction.
3340 *
3341 * (We can also end up here if drbd is misconfigured.)
3342 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003343static int config_unknown_volume(struct drbd_connection *connection, struct packet_info *pi)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003344{
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003345 drbd_warn(connection, "%s packet received for volume %u, which is not configured locally\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02003346 cmdname(pi->cmd), pi->vnr);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003347 return ignore_remaining_packet(connection, pi);
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003348}
3349
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003350static int receive_SyncParam(struct drbd_connection *connection, struct packet_info *pi)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003351{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003352 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003353 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003354 struct p_rs_param_95 *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003355 unsigned int header_size, data_size, exp_max_sz;
3356 struct crypto_hash *verify_tfm = NULL;
3357 struct crypto_hash *csums_tfm = NULL;
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003358 struct net_conf *old_net_conf, *new_net_conf = NULL;
Philipp Reisner813472c2011-05-03 16:47:02 +02003359 struct disk_conf *old_disk_conf = NULL, *new_disk_conf = NULL;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003360 const int apv = connection->agreed_pro_version;
Philipp Reisner813472c2011-05-03 16:47:02 +02003361 struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
Philipp Reisner778f2712010-07-06 11:14:00 +02003362 int fifo_size = 0;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003363 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003364
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003365 peer_device = conn_peer_device(connection, pi->vnr);
3366 if (!peer_device)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003367 return config_unknown_volume(connection, pi);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003368 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003369
3370 exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param)
3371 : apv == 88 ? sizeof(struct p_rs_param)
3372 + SHARED_SECRET_MAX
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003373 : apv <= 94 ? sizeof(struct p_rs_param_89)
3374 : /* apv >= 95 */ sizeof(struct p_rs_param_95);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003375
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003376 if (pi->size > exp_max_sz) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003377 drbd_err(device, "SyncParam packet too long: received %u, expected <= %u bytes\n",
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003378 pi->size, exp_max_sz);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003379 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003380 }
3381
3382 if (apv <= 88) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003383 header_size = sizeof(struct p_rs_param);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003384 data_size = pi->size - header_size;
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003385 } else if (apv <= 94) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003386 header_size = sizeof(struct p_rs_param_89);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003387 data_size = pi->size - header_size;
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02003388 D_ASSERT(device, data_size == 0);
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003389 } else {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003390 header_size = sizeof(struct p_rs_param_95);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003391 data_size = pi->size - header_size;
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02003392 D_ASSERT(device, data_size == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003393 }
3394
3395 /* initialize verify_alg and csums_alg */
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003396 p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003397 memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
3398
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003399 err = drbd_recv_all(peer_device->connection, p, header_size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003400 if (err)
3401 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003402
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003403 mutex_lock(&connection->resource->conf_update);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003404 old_net_conf = peer_device->connection->net_conf;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003405 if (get_ldev(device)) {
Philipp Reisner813472c2011-05-03 16:47:02 +02003406 new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3407 if (!new_disk_conf) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003408 put_ldev(device);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003409 mutex_unlock(&connection->resource->conf_update);
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003410 drbd_err(device, "Allocation of new disk_conf failed\n");
Philipp Reisner813472c2011-05-03 16:47:02 +02003411 return -ENOMEM;
3412 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003413
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003414 old_disk_conf = device->ldev->disk_conf;
Philipp Reisner813472c2011-05-03 16:47:02 +02003415 *new_disk_conf = *old_disk_conf;
3416
Andreas Gruenbacher6394b932011-05-11 14:29:52 +02003417 new_disk_conf->resync_rate = be32_to_cpu(p->resync_rate);
Philipp Reisner813472c2011-05-03 16:47:02 +02003418 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003419
3420 if (apv >= 88) {
3421 if (apv == 88) {
Philipp Reisner5de73822012-03-28 10:17:32 +02003422 if (data_size > SHARED_SECRET_MAX || data_size == 0) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003423 drbd_err(device, "verify-alg of wrong size, "
Philipp Reisner5de73822012-03-28 10:17:32 +02003424 "peer wants %u, accepting only up to %u byte\n",
3425 data_size, SHARED_SECRET_MAX);
Philipp Reisner813472c2011-05-03 16:47:02 +02003426 err = -EIO;
3427 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003428 }
3429
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003430 err = drbd_recv_all(peer_device->connection, p->verify_alg, data_size);
Philipp Reisner813472c2011-05-03 16:47:02 +02003431 if (err)
3432 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003433 /* we expect NUL terminated string */
3434 /* but just in case someone tries to be evil */
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02003435 D_ASSERT(device, p->verify_alg[data_size-1] == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003436 p->verify_alg[data_size-1] = 0;
3437
3438 } else /* apv >= 89 */ {
3439 /* we still expect NUL terminated strings */
3440 /* but just in case someone tries to be evil */
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02003441 D_ASSERT(device, p->verify_alg[SHARED_SECRET_MAX-1] == 0);
3442 D_ASSERT(device, p->csums_alg[SHARED_SECRET_MAX-1] == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003443 p->verify_alg[SHARED_SECRET_MAX-1] = 0;
3444 p->csums_alg[SHARED_SECRET_MAX-1] = 0;
3445 }
3446
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003447 if (strcmp(old_net_conf->verify_alg, p->verify_alg)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003448 if (device->state.conn == C_WF_REPORT_PARAMS) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003449 drbd_err(device, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003450 old_net_conf->verify_alg, p->verify_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003451 goto disconnect;
3452 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003453 verify_tfm = drbd_crypto_alloc_digest_safe(device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003454 p->verify_alg, "verify-alg");
3455 if (IS_ERR(verify_tfm)) {
3456 verify_tfm = NULL;
3457 goto disconnect;
3458 }
3459 }
3460
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003461 if (apv >= 89 && strcmp(old_net_conf->csums_alg, p->csums_alg)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003462 if (device->state.conn == C_WF_REPORT_PARAMS) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003463 drbd_err(device, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003464 old_net_conf->csums_alg, p->csums_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003465 goto disconnect;
3466 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003467 csums_tfm = drbd_crypto_alloc_digest_safe(device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003468 p->csums_alg, "csums-alg");
3469 if (IS_ERR(csums_tfm)) {
3470 csums_tfm = NULL;
3471 goto disconnect;
3472 }
3473 }
3474
Philipp Reisner813472c2011-05-03 16:47:02 +02003475 if (apv > 94 && new_disk_conf) {
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003476 new_disk_conf->c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
3477 new_disk_conf->c_delay_target = be32_to_cpu(p->c_delay_target);
3478 new_disk_conf->c_fill_target = be32_to_cpu(p->c_fill_target);
3479 new_disk_conf->c_max_rate = be32_to_cpu(p->c_max_rate);
Philipp Reisner778f2712010-07-06 11:14:00 +02003480
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003481 fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003482 if (fifo_size != device->rs_plan_s->size) {
Philipp Reisner813472c2011-05-03 16:47:02 +02003483 new_plan = fifo_alloc(fifo_size);
3484 if (!new_plan) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003485 drbd_err(device, "kmalloc of fifo_buffer failed");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003486 put_ldev(device);
Philipp Reisner778f2712010-07-06 11:14:00 +02003487 goto disconnect;
3488 }
3489 }
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003490 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003491
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003492 if (verify_tfm || csums_tfm) {
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003493 new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
3494 if (!new_net_conf) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003495 drbd_err(device, "Allocation of new net_conf failed\n");
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003496 goto disconnect;
3497 }
3498
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003499 *new_net_conf = *old_net_conf;
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003500
3501 if (verify_tfm) {
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003502 strcpy(new_net_conf->verify_alg, p->verify_alg);
3503 new_net_conf->verify_alg_len = strlen(p->verify_alg) + 1;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003504 crypto_free_hash(peer_device->connection->verify_tfm);
3505 peer_device->connection->verify_tfm = verify_tfm;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003506 drbd_info(device, "using verify-alg: \"%s\"\n", p->verify_alg);
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003507 }
3508 if (csums_tfm) {
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003509 strcpy(new_net_conf->csums_alg, p->csums_alg);
3510 new_net_conf->csums_alg_len = strlen(p->csums_alg) + 1;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003511 crypto_free_hash(peer_device->connection->csums_tfm);
3512 peer_device->connection->csums_tfm = csums_tfm;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003513 drbd_info(device, "using csums-alg: \"%s\"\n", p->csums_alg);
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003514 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003515 rcu_assign_pointer(connection->net_conf, new_net_conf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003516 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003517 }
3518
Philipp Reisner813472c2011-05-03 16:47:02 +02003519 if (new_disk_conf) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003520 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
3521 put_ldev(device);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003522 }
Philipp Reisner813472c2011-05-03 16:47:02 +02003523
3524 if (new_plan) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003525 old_plan = device->rs_plan_s;
3526 rcu_assign_pointer(device->rs_plan_s, new_plan);
Philipp Reisner813472c2011-05-03 16:47:02 +02003527 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003528
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003529 mutex_unlock(&connection->resource->conf_update);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003530 synchronize_rcu();
3531 if (new_net_conf)
3532 kfree(old_net_conf);
3533 kfree(old_disk_conf);
Philipp Reisner813472c2011-05-03 16:47:02 +02003534 kfree(old_plan);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003535
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003536 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003537
Philipp Reisner813472c2011-05-03 16:47:02 +02003538reconnect:
3539 if (new_disk_conf) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003540 put_ldev(device);
Philipp Reisner813472c2011-05-03 16:47:02 +02003541 kfree(new_disk_conf);
3542 }
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003543 mutex_unlock(&connection->resource->conf_update);
Philipp Reisner813472c2011-05-03 16:47:02 +02003544 return -EIO;
3545
Philipp Reisnerb411b362009-09-25 16:07:19 -07003546disconnect:
Philipp Reisner813472c2011-05-03 16:47:02 +02003547 kfree(new_plan);
3548 if (new_disk_conf) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003549 put_ldev(device);
Philipp Reisner813472c2011-05-03 16:47:02 +02003550 kfree(new_disk_conf);
3551 }
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003552 mutex_unlock(&connection->resource->conf_update);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003553 /* just for completeness: actually not needed,
3554 * as this is not reached if csums_tfm was ok. */
3555 crypto_free_hash(csums_tfm);
3556 /* but free the verify_tfm again, if csums_tfm did not work out */
3557 crypto_free_hash(verify_tfm);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003558 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003559 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003560}
3561
Philipp Reisnerb411b362009-09-25 16:07:19 -07003562/* warn if the arguments differ by more than 12.5% */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003563static void warn_if_differ_considerably(struct drbd_device *device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003564 const char *s, sector_t a, sector_t b)
3565{
3566 sector_t d;
3567 if (a == 0 || b == 0)
3568 return;
3569 d = (a > b) ? (a - b) : (b - a);
3570 if (d > (a>>3) || d > (b>>3))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003571 drbd_warn(device, "Considerable difference in %s: %llus vs. %llus\n", s,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003572 (unsigned long long)a, (unsigned long long)b);
3573}
3574
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003575static int receive_sizes(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003576{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003577 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003578 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003579 struct p_sizes *p = pi->data;
Philipp Reisnere96c9632013-06-25 16:50:07 +02003580 enum determine_dev_size dd = DS_UNCHANGED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003581 sector_t p_size, p_usize, my_usize;
3582 int ldsc = 0; /* local disk size changed */
Philipp Reisnere89b5912010-03-24 17:11:33 +01003583 enum dds_flags ddsf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003584
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003585 peer_device = conn_peer_device(connection, pi->vnr);
3586 if (!peer_device)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003587 return config_unknown_volume(connection, pi);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003588 device = peer_device->device;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003589
Philipp Reisnerb411b362009-09-25 16:07:19 -07003590 p_size = be64_to_cpu(p->d_size);
3591 p_usize = be64_to_cpu(p->u_size);
3592
Philipp Reisnerb411b362009-09-25 16:07:19 -07003593 /* just store the peer's disk size for now.
3594 * we still need to figure out whether we accept that. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003595 device->p_size = p_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003596
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003597 if (get_ldev(device)) {
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003598 rcu_read_lock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003599 my_usize = rcu_dereference(device->ldev->disk_conf)->disk_size;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003600 rcu_read_unlock();
3601
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003602 warn_if_differ_considerably(device, "lower level device sizes",
3603 p_size, drbd_get_max_capacity(device->ldev));
3604 warn_if_differ_considerably(device, "user requested size",
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003605 p_usize, my_usize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003606
3607 /* if this is the first connect, or an otherwise expected
3608 * param exchange, choose the minimum */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003609 if (device->state.conn == C_WF_REPORT_PARAMS)
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003610 p_usize = min_not_zero(my_usize, p_usize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003611
3612 /* Never shrink a device with usable data during connect.
3613 But allow online shrinking if we are connected. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003614 if (drbd_new_dev_size(device, device->ldev, p_usize, 0) <
3615 drbd_get_capacity(device->this_bdev) &&
3616 device->state.disk >= D_OUTDATED &&
3617 device->state.conn < C_CONNECTED) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003618 drbd_err(device, "The peer's disk size is too small!\n");
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003619 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003620 put_ldev(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003621 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003622 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003623
3624 if (my_usize != p_usize) {
3625 struct disk_conf *old_disk_conf, *new_disk_conf = NULL;
3626
3627 new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3628 if (!new_disk_conf) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003629 drbd_err(device, "Allocation of new disk_conf failed\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003630 put_ldev(device);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003631 return -ENOMEM;
3632 }
3633
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003634 mutex_lock(&connection->resource->conf_update);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003635 old_disk_conf = device->ldev->disk_conf;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003636 *new_disk_conf = *old_disk_conf;
3637 new_disk_conf->disk_size = p_usize;
3638
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003639 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003640 mutex_unlock(&connection->resource->conf_update);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003641 synchronize_rcu();
3642 kfree(old_disk_conf);
3643
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003644 drbd_info(device, "Peer sets u_size to %lu sectors\n",
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003645 (unsigned long)my_usize);
3646 }
3647
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003648 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003649 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003650
Philipp Reisnere89b5912010-03-24 17:11:33 +01003651 ddsf = be16_to_cpu(p->dds_flags);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003652 if (get_ldev(device)) {
3653 dd = drbd_determine_dev_size(device, ddsf, NULL);
3654 put_ldev(device);
Philipp Reisnere96c9632013-06-25 16:50:07 +02003655 if (dd == DS_ERROR)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003656 return -EIO;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003657 drbd_md_sync(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003658 } else {
3659 /* I am diskless, need to accept the peer's size. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003660 drbd_set_my_capacity(device, p_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003661 }
3662
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003663 device->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
3664 drbd_reconsider_max_bio_size(device);
Philipp Reisner99432fc2011-05-20 16:39:13 +02003665
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003666 if (get_ldev(device)) {
3667 if (device->ldev->known_size != drbd_get_capacity(device->ldev->backing_bdev)) {
3668 device->ldev->known_size = drbd_get_capacity(device->ldev->backing_bdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003669 ldsc = 1;
3670 }
3671
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003672 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003673 }
3674
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003675 if (device->state.conn > C_WF_REPORT_PARAMS) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003676 if (be64_to_cpu(p->c_size) !=
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003677 drbd_get_capacity(device->this_bdev) || ldsc) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003678 /* we have different sizes, probably peer
3679 * needs to know my new size... */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003680 drbd_send_sizes(peer_device, 0, ddsf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003681 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003682 if (test_and_clear_bit(RESIZE_PENDING, &device->flags) ||
3683 (dd == DS_GREW && device->state.conn == C_CONNECTED)) {
3684 if (device->state.pdsk >= D_INCONSISTENT &&
3685 device->state.disk >= D_INCONSISTENT) {
Philipp Reisnere89b5912010-03-24 17:11:33 +01003686 if (ddsf & DDSF_NO_RESYNC)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003687 drbd_info(device, "Resync of new storage suppressed with --assume-clean\n");
Philipp Reisnere89b5912010-03-24 17:11:33 +01003688 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003689 resync_after_online_grow(device);
Philipp Reisnere89b5912010-03-24 17:11:33 +01003690 } else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003691 set_bit(RESYNC_AFTER_NEG, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003692 }
3693 }
3694
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003695 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003696}
3697
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003698static int receive_uuids(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003699{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003700 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003701 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003702 struct p_uuids *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003703 u64 *p_uuid;
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003704 int i, updated_uuids = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003705
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003706 peer_device = conn_peer_device(connection, pi->vnr);
3707 if (!peer_device)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003708 return config_unknown_volume(connection, pi);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003709 device = peer_device->device;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003710
Philipp Reisnerb411b362009-09-25 16:07:19 -07003711 p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO);
Jing Wang063eacf2012-10-25 15:00:56 +08003712 if (!p_uuid) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003713 drbd_err(device, "kmalloc of p_uuid failed\n");
Jing Wang063eacf2012-10-25 15:00:56 +08003714 return false;
3715 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003716
3717 for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
3718 p_uuid[i] = be64_to_cpu(p->uuid[i]);
3719
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003720 kfree(device->p_uuid);
3721 device->p_uuid = p_uuid;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003722
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003723 if (device->state.conn < C_CONNECTED &&
3724 device->state.disk < D_INCONSISTENT &&
3725 device->state.role == R_PRIMARY &&
3726 (device->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003727 drbd_err(device, "Can only connect to data with current UUID=%016llX\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003728 (unsigned long long)device->ed_uuid);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003729 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003730 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003731 }
3732
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003733 if (get_ldev(device)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003734 int skip_initial_sync =
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003735 device->state.conn == C_CONNECTED &&
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003736 peer_device->connection->agreed_pro_version >= 90 &&
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003737 device->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003738 (p_uuid[UI_FLAGS] & 8);
3739 if (skip_initial_sync) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003740 drbd_info(device, "Accepted new current UUID, preparing to skip initial sync\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003741 drbd_bitmap_io(device, &drbd_bmio_clear_n_write,
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003742 "clear_n_write from receive_uuids",
3743 BM_LOCKED_TEST_ALLOWED);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003744 _drbd_uuid_set(device, UI_CURRENT, p_uuid[UI_CURRENT]);
3745 _drbd_uuid_set(device, UI_BITMAP, 0);
3746 _drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
Philipp Reisnerb411b362009-09-25 16:07:19 -07003747 CS_VERBOSE, NULL);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003748 drbd_md_sync(device);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003749 updated_uuids = 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003750 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003751 put_ldev(device);
3752 } else if (device->state.disk < D_INCONSISTENT &&
3753 device->state.role == R_PRIMARY) {
Philipp Reisner18a50fa2010-06-21 14:14:15 +02003754 /* I am a diskless primary, the peer just created a new current UUID
3755 for me. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003756 updated_uuids = drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003757 }
3758
3759 /* Before we test for the disk state, we should wait until an eventually
3760 ongoing cluster wide state change is finished. That is important if
3761 we are primary and are detaching from our disk. We need to see the
3762 new disk state... */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003763 mutex_lock(device->state_mutex);
3764 mutex_unlock(device->state_mutex);
3765 if (device->state.conn >= C_CONNECTED && device->state.disk < D_INCONSISTENT)
3766 updated_uuids |= drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003767
3768 if (updated_uuids)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003769 drbd_print_uuids(device, "receiver updated UUIDs to");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003770
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003771 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003772}
3773
3774/**
3775 * convert_state() - Converts the peer's view of the cluster state to our point of view
3776 * @ps: The state as seen by the peer.
3777 */
3778static union drbd_state convert_state(union drbd_state ps)
3779{
3780 union drbd_state ms;
3781
3782 static enum drbd_conns c_tab[] = {
Philipp Reisner369bea62011-07-06 23:04:44 +02003783 [C_WF_REPORT_PARAMS] = C_WF_REPORT_PARAMS,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003784 [C_CONNECTED] = C_CONNECTED,
3785
3786 [C_STARTING_SYNC_S] = C_STARTING_SYNC_T,
3787 [C_STARTING_SYNC_T] = C_STARTING_SYNC_S,
3788 [C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */
3789 [C_VERIFY_S] = C_VERIFY_T,
3790 [C_MASK] = C_MASK,
3791 };
3792
3793 ms.i = ps.i;
3794
3795 ms.conn = c_tab[ps.conn];
3796 ms.peer = ps.role;
3797 ms.role = ps.peer;
3798 ms.pdsk = ps.disk;
3799 ms.disk = ps.pdsk;
3800 ms.peer_isp = (ps.aftr_isp | ps.user_isp);
3801
3802 return ms;
3803}
3804
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003805static int receive_req_state(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003806{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003807 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003808 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003809 struct p_req_state *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003810 union drbd_state mask, val;
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +01003811 enum drbd_state_rv rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003812
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003813 peer_device = conn_peer_device(connection, pi->vnr);
3814 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003815 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003816 device = peer_device->device;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003817
Philipp Reisnerb411b362009-09-25 16:07:19 -07003818 mask.i = be32_to_cpu(p->mask);
3819 val.i = be32_to_cpu(p->val);
3820
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003821 if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags) &&
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003822 mutex_is_locked(device->state_mutex)) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003823 drbd_send_sr_reply(peer_device, SS_CONCURRENT_ST_CHG);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003824 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003825 }
3826
3827 mask = convert_state(mask);
3828 val = convert_state(val);
3829
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003830 rv = drbd_change_state(device, CS_VERBOSE, mask, val);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003831 drbd_send_sr_reply(peer_device, rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003832
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003833 drbd_md_sync(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003834
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003835 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003836}
3837
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003838static int receive_req_conn_state(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003839{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003840 struct p_req_state *p = pi->data;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003841 union drbd_state mask, val;
3842 enum drbd_state_rv rv;
3843
3844 mask.i = be32_to_cpu(p->mask);
3845 val.i = be32_to_cpu(p->val);
3846
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003847 if (test_bit(RESOLVE_CONFLICTS, &connection->flags) &&
3848 mutex_is_locked(&connection->cstate_mutex)) {
3849 conn_send_sr_reply(connection, SS_CONCURRENT_ST_CHG);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003850 return 0;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003851 }
3852
3853 mask = convert_state(mask);
3854 val = convert_state(val);
3855
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003856 rv = conn_request_state(connection, mask, val, CS_VERBOSE | CS_LOCAL_ONLY | CS_IGN_OUTD_FAIL);
3857 conn_send_sr_reply(connection, rv);
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003858
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003859 return 0;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003860}
3861
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003862static int receive_state(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003863{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003864 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003865 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003866 struct p_state *p = pi->data;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003867 union drbd_state os, ns, peer_state;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003868 enum drbd_disk_state real_peer_disk;
Philipp Reisner65d922c2010-06-16 16:18:09 +02003869 enum chg_state_flags cs_flags;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003870 int rv;
3871
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003872 peer_device = conn_peer_device(connection, pi->vnr);
3873 if (!peer_device)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003874 return config_unknown_volume(connection, pi);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003875 device = peer_device->device;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003876
Philipp Reisnerb411b362009-09-25 16:07:19 -07003877 peer_state.i = be32_to_cpu(p->state);
3878
3879 real_peer_disk = peer_state.disk;
3880 if (peer_state.disk == D_NEGOTIATING) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003881 real_peer_disk = device->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003882 drbd_info(device, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003883 }
3884
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003885 spin_lock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003886 retry:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003887 os = ns = drbd_read_state(device);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003888 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003889
Lars Ellenberg545752d2011-12-05 14:39:25 +01003890 /* If some other part of the code (asender thread, timeout)
3891 * already decided to close the connection again,
3892 * we must not "re-establish" it here. */
3893 if (os.conn <= C_TEAR_DOWN)
Lars Ellenberg58ffa582012-07-26 14:09:49 +02003894 return -ECONNRESET;
Lars Ellenberg545752d2011-12-05 14:39:25 +01003895
Lars Ellenberg40424e42011-09-26 15:24:56 +02003896 /* If this is the "end of sync" confirmation, usually the peer disk
3897 * transitions from D_INCONSISTENT to D_UP_TO_DATE. For empty (0 bits
3898 * set) resync started in PausedSyncT, or if the timing of pause-/
3899 * unpause-sync events has been "just right", the peer disk may
3900 * transition from D_CONSISTENT to D_UP_TO_DATE as well.
3901 */
3902 if ((os.pdsk == D_INCONSISTENT || os.pdsk == D_CONSISTENT) &&
3903 real_peer_disk == D_UP_TO_DATE &&
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003904 os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
3905 /* If we are (becoming) SyncSource, but peer is still in sync
3906 * preparation, ignore its uptodate-ness to avoid flapping, it
3907 * will change to inconsistent once the peer reaches active
3908 * syncing states.
3909 * It may have changed syncer-paused flags, however, so we
3910 * cannot ignore this completely. */
3911 if (peer_state.conn > C_CONNECTED &&
3912 peer_state.conn < C_SYNC_SOURCE)
3913 real_peer_disk = D_INCONSISTENT;
3914
3915 /* if peer_state changes to connected at the same time,
3916 * it explicitly notifies us that it finished resync.
3917 * Maybe we should finish it up, too? */
3918 else if (os.conn >= C_SYNC_SOURCE &&
3919 peer_state.conn == C_CONNECTED) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003920 if (drbd_bm_total_weight(device) <= device->rs_failed)
3921 drbd_resync_finished(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003922 return 0;
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003923 }
3924 }
3925
Lars Ellenberg02b91b52012-06-28 18:26:52 +02003926 /* explicit verify finished notification, stop sector reached. */
3927 if (os.conn == C_VERIFY_T && os.disk == D_UP_TO_DATE &&
3928 peer_state.conn == C_CONNECTED && real_peer_disk == D_UP_TO_DATE) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003929 ov_out_of_sync_print(device);
3930 drbd_resync_finished(device);
Lars Ellenberg58ffa582012-07-26 14:09:49 +02003931 return 0;
Lars Ellenberg02b91b52012-06-28 18:26:52 +02003932 }
3933
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003934 /* peer says his disk is inconsistent, while we think it is uptodate,
3935 * and this happens while the peer still thinks we have a sync going on,
3936 * but we think we are already done with the sync.
3937 * We ignore this to avoid flapping pdsk.
3938 * This should not happen, if the peer is a recent version of drbd. */
3939 if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT &&
3940 os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE)
3941 real_peer_disk = D_UP_TO_DATE;
3942
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003943 if (ns.conn == C_WF_REPORT_PARAMS)
3944 ns.conn = C_CONNECTED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003945
Philipp Reisner67531712010-10-27 12:21:30 +02003946 if (peer_state.conn == C_AHEAD)
3947 ns.conn = C_BEHIND;
3948
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003949 if (device->p_uuid && peer_state.disk >= D_NEGOTIATING &&
3950 get_ldev_if_state(device, D_NEGOTIATING)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003951 int cr; /* consider resync */
3952
3953 /* if we established a new connection */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003954 cr = (os.conn < C_CONNECTED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003955 /* if we had an established connection
3956 * and one of the nodes newly attaches a disk */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003957 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003958 (peer_state.disk == D_NEGOTIATING ||
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003959 os.disk == D_NEGOTIATING));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003960 /* if we have both been inconsistent, and the peer has been
3961 * forced to be UpToDate with --overwrite-data */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003962 cr |= test_bit(CONSIDER_RESYNC, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003963 /* if we had been plain connected, and the admin requested to
3964 * start a sync by "invalidate" or "invalidate-remote" */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003965 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003966 (peer_state.conn >= C_STARTING_SYNC_S &&
3967 peer_state.conn <= C_WF_BITMAP_T));
3968
3969 if (cr)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003970 ns.conn = drbd_sync_handshake(peer_device, peer_state.role, real_peer_disk);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003971
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003972 put_ldev(device);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003973 if (ns.conn == C_MASK) {
3974 ns.conn = C_CONNECTED;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003975 if (device->state.disk == D_NEGOTIATING) {
3976 drbd_force_state(device, NS(disk, D_FAILED));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003977 } else if (peer_state.disk == D_NEGOTIATING) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003978 drbd_err(device, "Disk attach process on the peer node was aborted.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003979 peer_state.disk = D_DISKLESS;
Lars Ellenberg580b9762010-02-26 23:15:23 +01003980 real_peer_disk = D_DISKLESS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003981 } else {
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003982 if (test_and_clear_bit(CONN_DRY_RUN, &peer_device->connection->flags))
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003983 return -EIO;
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02003984 D_ASSERT(device, os.conn == C_WF_REPORT_PARAMS);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003985 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003986 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003987 }
3988 }
3989 }
3990
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003991 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003992 if (os.i != drbd_read_state(device).i)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003993 goto retry;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003994 clear_bit(CONSIDER_RESYNC, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003995 ns.peer = peer_state.role;
3996 ns.pdsk = real_peer_disk;
3997 ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003998 if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003999 ns.disk = device->new_state_tmp.disk;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004000 cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004001 if (ns.pdsk == D_CONSISTENT && drbd_suspended(device) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
4002 test_bit(NEW_CUR_UUID, &device->flags)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004003 /* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this
Philipp Reisner481c6f52010-06-22 14:03:27 +02004004 for temporal network outages! */
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004005 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004006 drbd_err(device, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004007 tl_clear(peer_device->connection);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004008 drbd_uuid_new_current(device);
4009 clear_bit(NEW_CUR_UUID, &device->flags);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004010 conn_request_state(peer_device->connection, NS2(conn, C_PROTOCOL_ERROR, susp, 0), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004011 return -EIO;
Philipp Reisner481c6f52010-06-22 14:03:27 +02004012 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004013 rv = _drbd_set_state(device, ns, cs_flags, NULL);
4014 ns = drbd_read_state(device);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004015 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004016
4017 if (rv < SS_SUCCESS) {
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004018 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004019 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004020 }
4021
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004022 if (os.conn > C_WF_REPORT_PARAMS) {
4023 if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07004024 peer_state.disk != D_NEGOTIATING ) {
4025 /* we want resync, peer has not yet decided to sync... */
4026 /* Nowadays only used when forcing a node into primary role and
4027 setting its disk to UpToDate with that */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004028 drbd_send_uuids(peer_device);
4029 drbd_send_current_state(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004030 }
4031 }
4032
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004033 clear_bit(DISCARD_MY_DATA, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004034
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004035 drbd_md_sync(device); /* update connected indicator, la_size_sect, ... */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004036
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004037 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004038}
4039
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004040static int receive_sync_uuid(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004041{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004042 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004043 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004044 struct p_rs_uuid *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004045
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004046 peer_device = conn_peer_device(connection, pi->vnr);
4047 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004048 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004049 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004050
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004051 wait_event(device->misc_wait,
4052 device->state.conn == C_WF_SYNC_UUID ||
4053 device->state.conn == C_BEHIND ||
4054 device->state.conn < C_CONNECTED ||
4055 device->state.disk < D_NEGOTIATING);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004056
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02004057 /* D_ASSERT(device, device->state.conn == C_WF_SYNC_UUID ); */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004058
Philipp Reisnerb411b362009-09-25 16:07:19 -07004059 /* Here the _drbd_uuid_ functions are right, current should
4060 _not_ be rotated into the history */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004061 if (get_ldev_if_state(device, D_NEGOTIATING)) {
4062 _drbd_uuid_set(device, UI_CURRENT, be64_to_cpu(p->uuid));
4063 _drbd_uuid_set(device, UI_BITMAP, 0UL);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004064
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004065 drbd_print_uuids(device, "updated sync uuid");
4066 drbd_start_resync(device, C_SYNC_TARGET);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004067
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004068 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004069 } else
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004070 drbd_err(device, "Ignoring SyncUUID packet!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004071
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004072 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004073}
4074
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004075/**
4076 * receive_bitmap_plain
4077 *
4078 * Return 0 when done, 1 when another iteration is needed, and a negative error
4079 * code upon failure.
4080 */
4081static int
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004082receive_bitmap_plain(struct drbd_peer_device *peer_device, unsigned int size,
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004083 unsigned long *p, struct bm_xfer_ctx *c)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004084{
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004085 unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE -
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004086 drbd_header_size(peer_device->connection);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004087 unsigned int num_words = min_t(size_t, data_size / sizeof(*p),
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004088 c->bm_words - c->word_offset);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004089 unsigned int want = num_words * sizeof(*p);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004090 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004091
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004092 if (want != size) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004093 drbd_err(peer_device, "%s:want (%u) != size (%u)\n", __func__, want, size);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004094 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004095 }
4096 if (want == 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004097 return 0;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004098 err = drbd_recv_all(peer_device->connection, p, want);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004099 if (err)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004100 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004101
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004102 drbd_bm_merge_lel(peer_device->device, c->word_offset, num_words, p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004103
4104 c->word_offset += num_words;
4105 c->bit_offset = c->word_offset * BITS_PER_LONG;
4106 if (c->bit_offset > c->bm_bits)
4107 c->bit_offset = c->bm_bits;
4108
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004109 return 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004110}
4111
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004112static enum drbd_bitmap_code dcbp_get_code(struct p_compressed_bm *p)
4113{
4114 return (enum drbd_bitmap_code)(p->encoding & 0x0f);
4115}
4116
4117static int dcbp_get_start(struct p_compressed_bm *p)
4118{
4119 return (p->encoding & 0x80) != 0;
4120}
4121
4122static int dcbp_get_pad_bits(struct p_compressed_bm *p)
4123{
4124 return (p->encoding >> 4) & 0x7;
4125}
4126
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004127/**
4128 * recv_bm_rle_bits
4129 *
4130 * Return 0 when done, 1 when another iteration is needed, and a negative error
4131 * code upon failure.
4132 */
4133static int
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004134recv_bm_rle_bits(struct drbd_peer_device *peer_device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07004135 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01004136 struct bm_xfer_ctx *c,
4137 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004138{
4139 struct bitstream bs;
4140 u64 look_ahead;
4141 u64 rl;
4142 u64 tmp;
4143 unsigned long s = c->bit_offset;
4144 unsigned long e;
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004145 int toggle = dcbp_get_start(p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004146 int have;
4147 int bits;
4148
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004149 bitstream_init(&bs, p->code, len, dcbp_get_pad_bits(p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004150
4151 bits = bitstream_get_bits(&bs, &look_ahead, 64);
4152 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004153 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004154
4155 for (have = bits; have > 0; s += rl, toggle = !toggle) {
4156 bits = vli_decode_bits(&rl, look_ahead);
4157 if (bits <= 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004158 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004159
4160 if (toggle) {
4161 e = s + rl -1;
4162 if (e >= c->bm_bits) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004163 drbd_err(peer_device, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004164 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004165 }
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004166 _drbd_bm_set_bits(peer_device->device, s, e);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004167 }
4168
4169 if (have < bits) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004170 drbd_err(peer_device, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07004171 have, bits, look_ahead,
4172 (unsigned int)(bs.cur.b - p->code),
4173 (unsigned int)bs.buf_len);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004174 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004175 }
Lars Ellenbergd2da5b02013-10-23 10:59:18 +02004176 /* if we consumed all 64 bits, assign 0; >> 64 is "undefined"; */
4177 if (likely(bits < 64))
4178 look_ahead >>= bits;
4179 else
4180 look_ahead = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004181 have -= bits;
4182
4183 bits = bitstream_get_bits(&bs, &tmp, 64 - have);
4184 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004185 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004186 look_ahead |= tmp << have;
4187 have += bits;
4188 }
4189
4190 c->bit_offset = s;
4191 bm_xfer_ctx_bit_to_word_offset(c);
4192
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004193 return (s != c->bm_bits);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004194}
4195
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004196/**
4197 * decode_bitmap_c
4198 *
4199 * Return 0 when done, 1 when another iteration is needed, and a negative error
4200 * code upon failure.
4201 */
4202static int
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004203decode_bitmap_c(struct drbd_peer_device *peer_device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07004204 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01004205 struct bm_xfer_ctx *c,
4206 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004207{
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004208 if (dcbp_get_code(p) == RLE_VLI_Bits)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004209 return recv_bm_rle_bits(peer_device, p, c, len - sizeof(*p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004210
4211 /* other variants had been implemented for evaluation,
4212 * but have been dropped as this one turned out to be "best"
4213 * during all our tests. */
4214
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004215 drbd_err(peer_device, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
4216 conn_request_state(peer_device->connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004217 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004218}
4219
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004220void INFO_bm_xfer_stats(struct drbd_device *device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07004221 const char *direction, struct bm_xfer_ctx *c)
4222{
4223 /* what would it take to transfer it "plaintext" */
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02004224 unsigned int header_size = drbd_header_size(first_peer_device(device)->connection);
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004225 unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE - header_size;
4226 unsigned int plain =
4227 header_size * (DIV_ROUND_UP(c->bm_words, data_size) + 1) +
4228 c->bm_words * sizeof(unsigned long);
4229 unsigned int total = c->bytes[0] + c->bytes[1];
4230 unsigned int r;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004231
4232 /* total can not be zero. but just in case: */
4233 if (total == 0)
4234 return;
4235
4236 /* don't report if not compressed */
4237 if (total >= plain)
4238 return;
4239
4240 /* total < plain. check for overflow, still */
4241 r = (total > UINT_MAX/1000) ? (total / (plain/1000))
4242 : (1000 * total / plain);
4243
4244 if (r > 1000)
4245 r = 1000;
4246
4247 r = 1000 - r;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004248 drbd_info(device, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
Philipp Reisnerb411b362009-09-25 16:07:19 -07004249 "total %u; compression: %u.%u%%\n",
4250 direction,
4251 c->bytes[1], c->packets[1],
4252 c->bytes[0], c->packets[0],
4253 total, r/10, r % 10);
4254}
4255
4256/* Since we are processing the bitfield from lower addresses to higher,
4257 it does not matter if the process it in 32 bit chunks or 64 bit
4258 chunks as long as it is little endian. (Understand it as byte stream,
4259 beginning with the lowest byte...) If we would use big endian
4260 we would need to process it from the highest address to the lowest,
4261 in order to be agnostic to the 32 vs 64 bits issue.
4262
4263 returns 0 on failure, 1 if we successfully received it. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004264static int receive_bitmap(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004265{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004266 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004267 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004268 struct bm_xfer_ctx c;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004269 int err;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004270
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004271 peer_device = conn_peer_device(connection, pi->vnr);
4272 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004273 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004274 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004275
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004276 drbd_bm_lock(device, "receive bitmap", BM_LOCKED_SET_ALLOWED);
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004277 /* you are supposed to send additional out-of-sync information
4278 * if you actually set bits during this phase */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004279
Philipp Reisnerb411b362009-09-25 16:07:19 -07004280 c = (struct bm_xfer_ctx) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004281 .bm_bits = drbd_bm_bits(device),
4282 .bm_words = drbd_bm_words(device),
Philipp Reisnerb411b362009-09-25 16:07:19 -07004283 };
4284
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004285 for(;;) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004286 if (pi->cmd == P_BITMAP)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004287 err = receive_bitmap_plain(peer_device, pi->size, pi->data, &c);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004288 else if (pi->cmd == P_COMPRESSED_BITMAP) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004289 /* MAYBE: sanity check that we speak proto >= 90,
4290 * and the feature is enabled! */
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004291 struct p_compressed_bm *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004292
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004293 if (pi->size > DRBD_SOCKET_BUFFER_SIZE - drbd_header_size(connection)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004294 drbd_err(device, "ReportCBitmap packet too large\n");
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004295 err = -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004296 goto out;
4297 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004298 if (pi->size <= sizeof(*p)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004299 drbd_err(device, "ReportCBitmap packet too small (l:%u)\n", pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004300 err = -EIO;
Andreas Gruenbacher78fcbda2010-12-10 22:18:27 +01004301 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004302 }
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004303 err = drbd_recv_all(peer_device->connection, p, pi->size);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004304 if (err)
4305 goto out;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004306 err = decode_bitmap_c(peer_device, p, &c, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004307 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004308 drbd_warn(device, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", pi->cmd);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004309 err = -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004310 goto out;
4311 }
4312
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004313 c.packets[pi->cmd == P_BITMAP]++;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004314 c.bytes[pi->cmd == P_BITMAP] += drbd_header_size(connection) + pi->size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004315
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004316 if (err <= 0) {
4317 if (err < 0)
4318 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004319 break;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004320 }
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004321 err = drbd_recv_header(peer_device->connection, pi);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004322 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004323 goto out;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004324 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004325
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004326 INFO_bm_xfer_stats(device, "receive", &c);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004327
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004328 if (device->state.conn == C_WF_BITMAP_T) {
Andreas Gruenbacherde1f8e42010-12-10 21:04:00 +01004329 enum drbd_state_rv rv;
4330
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004331 err = drbd_send_bitmap(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004332 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004333 goto out;
4334 /* Omit CS_ORDERED with this state transition to avoid deadlocks. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004335 rv = _drbd_request_state(device, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02004336 D_ASSERT(device, rv == SS_SUCCESS);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004337 } else if (device->state.conn != C_WF_BITMAP_S) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004338 /* admin may have requested C_DISCONNECTING,
4339 * other threads may have noticed network errors */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004340 drbd_info(device, "unexpected cstate (%s) in receive_bitmap\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004341 drbd_conn_str(device->state.conn));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004342 }
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004343 err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004344
Philipp Reisnerb411b362009-09-25 16:07:19 -07004345 out:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004346 drbd_bm_unlock(device);
4347 if (!err && device->state.conn == C_WF_BITMAP_S)
4348 drbd_start_resync(device, C_SYNC_SOURCE);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004349 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004350}
4351
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004352static int receive_skip(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004353{
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004354 drbd_warn(connection, "skipping unknown optional packet type %d, l: %d!\n",
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004355 pi->cmd, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004356
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004357 return ignore_remaining_packet(connection, pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004358}
4359
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004360static int receive_UnplugRemote(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004361{
Philipp Reisnerb411b362009-09-25 16:07:19 -07004362 /* Make sure we've acked all the TCP data associated
4363 * with the data requests being unplugged */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004364 drbd_tcp_quickack(connection->data.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004365
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004366 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004367}
4368
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004369static int receive_out_of_sync(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisner73a01a12010-10-27 14:33:00 +02004370{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004371 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004372 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004373 struct p_block_desc *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004374
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004375 peer_device = conn_peer_device(connection, pi->vnr);
4376 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004377 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004378 device = peer_device->device;
Philipp Reisner73a01a12010-10-27 14:33:00 +02004379
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004380 switch (device->state.conn) {
Lars Ellenbergf735e3632010-12-17 21:06:18 +01004381 case C_WF_SYNC_UUID:
4382 case C_WF_BITMAP_T:
4383 case C_BEHIND:
4384 break;
4385 default:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004386 drbd_err(device, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004387 drbd_conn_str(device->state.conn));
Lars Ellenbergf735e3632010-12-17 21:06:18 +01004388 }
4389
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004390 drbd_set_out_of_sync(device, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
Philipp Reisner73a01a12010-10-27 14:33:00 +02004391
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004392 return 0;
Philipp Reisner73a01a12010-10-27 14:33:00 +02004393}
4394
Philipp Reisner02918be2010-08-20 14:35:10 +02004395struct data_cmd {
4396 int expect_payload;
4397 size_t pkt_size;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004398 int (*fn)(struct drbd_connection *, struct packet_info *);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004399};
4400
Philipp Reisner02918be2010-08-20 14:35:10 +02004401static struct data_cmd drbd_cmd_handler[] = {
4402 [P_DATA] = { 1, sizeof(struct p_data), receive_Data },
4403 [P_DATA_REPLY] = { 1, sizeof(struct p_data), receive_DataReply },
4404 [P_RS_DATA_REPLY] = { 1, sizeof(struct p_data), receive_RSDataReply } ,
4405 [P_BARRIER] = { 0, sizeof(struct p_barrier), receive_Barrier } ,
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004406 [P_BITMAP] = { 1, 0, receive_bitmap } ,
4407 [P_COMPRESSED_BITMAP] = { 1, 0, receive_bitmap } ,
4408 [P_UNPLUG_REMOTE] = { 0, 0, receive_UnplugRemote },
Philipp Reisner02918be2010-08-20 14:35:10 +02004409 [P_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
4410 [P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004411 [P_SYNC_PARAM] = { 1, 0, receive_SyncParam },
4412 [P_SYNC_PARAM89] = { 1, 0, receive_SyncParam },
Philipp Reisner02918be2010-08-20 14:35:10 +02004413 [P_PROTOCOL] = { 1, sizeof(struct p_protocol), receive_protocol },
4414 [P_UUIDS] = { 0, sizeof(struct p_uuids), receive_uuids },
4415 [P_SIZES] = { 0, sizeof(struct p_sizes), receive_sizes },
4416 [P_STATE] = { 0, sizeof(struct p_state), receive_state },
4417 [P_STATE_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_state },
4418 [P_SYNC_UUID] = { 0, sizeof(struct p_rs_uuid), receive_sync_uuid },
4419 [P_OV_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
4420 [P_OV_REPLY] = { 1, sizeof(struct p_block_req), receive_DataRequest },
4421 [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest },
4422 [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip },
Philipp Reisner73a01a12010-10-27 14:33:00 +02004423 [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004424 [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_conn_state },
Philipp Reisner036b17e2011-05-16 17:38:11 +02004425 [P_PROTOCOL_UPDATE] = { 1, sizeof(struct p_protocol), receive_protocol },
Philipp Reisner02918be2010-08-20 14:35:10 +02004426};
4427
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004428static void drbdd(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004429{
Philipp Reisner77351055b2011-02-07 17:24:26 +01004430 struct packet_info pi;
Philipp Reisner02918be2010-08-20 14:35:10 +02004431 size_t shs; /* sub header size */
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004432 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004433
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004434 while (get_t_state(&connection->receiver) == RUNNING) {
Andreas Gruenbacherdeebe192011-03-25 00:01:04 +01004435 struct data_cmd *cmd;
4436
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004437 drbd_thread_current_set_cpu(&connection->receiver);
4438 if (drbd_recv_header(connection, &pi))
Philipp Reisner02918be2010-08-20 14:35:10 +02004439 goto err_out;
4440
Andreas Gruenbacherdeebe192011-03-25 00:01:04 +01004441 cmd = &drbd_cmd_handler[pi.cmd];
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004442 if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) || !cmd->fn)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004443 drbd_err(connection, "Unexpected data packet %s (0x%04x)",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004444 cmdname(pi.cmd), pi.cmd);
Philipp Reisner02918be2010-08-20 14:35:10 +02004445 goto err_out;
Lars Ellenberg0b33a912009-11-16 15:58:04 +01004446 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004447
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004448 shs = cmd->pkt_size;
4449 if (pi.size > shs && !cmd->expect_payload) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004450 drbd_err(connection, "No payload expected %s l:%d\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004451 cmdname(pi.cmd), pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02004452 goto err_out;
4453 }
4454
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02004455 if (shs) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004456 err = drbd_recv_all_warn(connection, pi.data, shs);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004457 if (err)
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02004458 goto err_out;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004459 pi.size -= shs;
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02004460 }
4461
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004462 err = cmd->fn(connection, &pi);
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004463 if (err) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004464 drbd_err(connection, "error receiving %s, e: %d l: %d!\n",
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004465 cmdname(pi.cmd), err, pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02004466 goto err_out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004467 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004468 }
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004469 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004470
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004471 err_out:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004472 conn_request_state(connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004473}
4474
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004475static void conn_disconnect(struct drbd_connection *connection)
Philipp Reisnerf70b35112010-06-24 14:34:40 +02004476{
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02004477 struct drbd_peer_device *peer_device;
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004478 enum drbd_conns oc;
Philipp Reisner376694a2011-11-07 10:54:28 +01004479 int vnr;
Philipp Reisnerf70b35112010-06-24 14:34:40 +02004480
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004481 if (connection->cstate == C_STANDALONE)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004482 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004483
Lars Ellenberg545752d2011-12-05 14:39:25 +01004484 /* We are about to start the cleanup after connection loss.
4485 * Make sure drbd_make_request knows about that.
4486 * Usually we should be in some network failure state already,
4487 * but just in case we are not, we fix it up here.
4488 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004489 conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
Lars Ellenberg545752d2011-12-05 14:39:25 +01004490
Philipp Reisnerb411b362009-09-25 16:07:19 -07004491 /* asender does not clean up anything. it must not interfere, either */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004492 drbd_thread_stop(&connection->asender);
4493 drbd_free_sock(connection);
Philipp Reisner360cc742011-02-08 14:29:53 +01004494
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02004495 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02004496 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
4497 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004498 kref_get(&device->kref);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02004499 rcu_read_unlock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004500 drbd_disconnected(peer_device);
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02004501 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02004502 rcu_read_lock();
4503 }
4504 rcu_read_unlock();
4505
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004506 if (!list_empty(&connection->current_epoch->list))
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004507 drbd_err(connection, "ASSERTION FAILED: connection->current_epoch->list not empty\n");
Philipp Reisner12038a32011-11-09 19:18:00 +01004508 /* ok, no more ee's on the fly, it is safe to reset the epoch_size */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004509 atomic_set(&connection->current_epoch->epoch_size, 0);
4510 connection->send.seen_any_write_yet = false;
Philipp Reisner12038a32011-11-09 19:18:00 +01004511
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004512 drbd_info(connection, "Connection closed\n");
Philipp Reisner360cc742011-02-08 14:29:53 +01004513
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004514 if (conn_highest_role(connection) == R_PRIMARY && conn_highest_pdsk(connection) >= D_UNKNOWN)
4515 conn_try_outdate_peer_async(connection);
Philipp Reisnercb703452011-03-24 11:03:07 +01004516
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004517 spin_lock_irq(&connection->resource->req_lock);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004518 oc = connection->cstate;
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004519 if (oc >= C_UNCONNECTED)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004520 _conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE);
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004521
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004522 spin_unlock_irq(&connection->resource->req_lock);
Philipp Reisner360cc742011-02-08 14:29:53 +01004523
Lars Ellenbergf3dfa402011-05-02 10:45:05 +02004524 if (oc == C_DISCONNECTING)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004525 conn_request_state(connection, NS(conn, C_STANDALONE), CS_VERBOSE | CS_HARD);
Philipp Reisner360cc742011-02-08 14:29:53 +01004526}
4527
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004528static int drbd_disconnected(struct drbd_peer_device *peer_device)
Philipp Reisner360cc742011-02-08 14:29:53 +01004529{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004530 struct drbd_device *device = peer_device->device;
Philipp Reisner360cc742011-02-08 14:29:53 +01004531 unsigned int i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004532
Philipp Reisner85719572010-07-21 10:20:17 +02004533 /* wait for current activity to cease. */
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004534 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004535 _drbd_wait_ee_list_empty(device, &device->active_ee);
4536 _drbd_wait_ee_list_empty(device, &device->sync_ee);
4537 _drbd_wait_ee_list_empty(device, &device->read_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004538 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004539
4540 /* We do not have data structures that would allow us to
4541 * get the rs_pending_cnt down to 0 again.
4542 * * On C_SYNC_TARGET we do not have any data structures describing
4543 * the pending RSDataRequest's we have sent.
4544 * * On C_SYNC_SOURCE there is no data structure that tracks
4545 * the P_RS_DATA_REPLY blocks that we sent to the SyncTarget.
4546 * And no, it is not the sum of the reference counts in the
4547 * resync_LRU. The resync_LRU tracks the whole operation including
4548 * the disk-IO, while the rs_pending_cnt only tracks the blocks
4549 * on the fly. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004550 drbd_rs_cancel_all(device);
4551 device->rs_total = 0;
4552 device->rs_failed = 0;
4553 atomic_set(&device->rs_pending_cnt, 0);
4554 wake_up(&device->misc_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004555
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004556 del_timer_sync(&device->resync_timer);
4557 resync_timer_fn((unsigned long)device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004558
Philipp Reisnerb411b362009-09-25 16:07:19 -07004559 /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
4560 * w_make_resync_request etc. which may still be on the worker queue
4561 * to be "canceled" */
Andreas Gruenbacherb5043c52011-07-28 15:56:02 +02004562 drbd_flush_workqueue(&peer_device->connection->sender_work);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004563
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004564 drbd_finish_peer_reqs(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004565
Philipp Reisnerd10b4ea2011-11-30 23:25:36 +01004566 /* This second workqueue flush is necessary, since drbd_finish_peer_reqs()
4567 might have issued a work again. The one before drbd_finish_peer_reqs() is
4568 necessary to reclain net_ee in drbd_finish_peer_reqs(). */
Andreas Gruenbacherb5043c52011-07-28 15:56:02 +02004569 drbd_flush_workqueue(&peer_device->connection->sender_work);
Philipp Reisnerd10b4ea2011-11-30 23:25:36 +01004570
Lars Ellenberg08332d72012-08-17 15:09:13 +02004571 /* need to do it again, drbd_finish_peer_reqs() may have populated it
4572 * again via drbd_try_clear_on_disk_bm(). */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004573 drbd_rs_cancel_all(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004574
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004575 kfree(device->p_uuid);
4576 device->p_uuid = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004577
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004578 if (!drbd_suspended(device))
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004579 tl_clear(peer_device->connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004580
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004581 drbd_md_sync(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004582
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004583 /* serialize with bitmap writeout triggered by the state change,
4584 * if any. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004585 wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004586
Philipp Reisnerb411b362009-09-25 16:07:19 -07004587 /* tcp_close and release of sendpage pages can be deferred. I don't
4588 * want to use SO_LINGER, because apparently it can be deferred for
4589 * more than 20 seconds (longest time I checked).
4590 *
4591 * Actually we don't care for exactly when the network stack does its
4592 * put_page(), but release our reference on these pages right here.
4593 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004594 i = drbd_free_peer_reqs(device, &device->net_ee);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004595 if (i)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004596 drbd_info(device, "net_ee not empty, killed %u entries\n", i);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004597 i = atomic_read(&device->pp_in_use_by_net);
Lars Ellenberg435f0742010-09-06 12:30:25 +02004598 if (i)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004599 drbd_info(device, "pp_in_use_by_net = %d, expected 0\n", i);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004600 i = atomic_read(&device->pp_in_use);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004601 if (i)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004602 drbd_info(device, "pp_in_use = %d, expected 0\n", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004603
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02004604 D_ASSERT(device, list_empty(&device->read_ee));
4605 D_ASSERT(device, list_empty(&device->active_ee));
4606 D_ASSERT(device, list_empty(&device->sync_ee));
4607 D_ASSERT(device, list_empty(&device->done_ee));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004608
Philipp Reisner360cc742011-02-08 14:29:53 +01004609 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004610}
4611
4612/*
4613 * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version
4614 * we can agree on is stored in agreed_pro_version.
4615 *
4616 * feature flags and the reserved array should be enough room for future
4617 * enhancements of the handshake protocol, and possible plugins...
4618 *
4619 * for now, they are expected to be zero, but ignored.
4620 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004621static int drbd_send_features(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004622{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004623 struct drbd_socket *sock;
4624 struct p_connection_features *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004625
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004626 sock = &connection->data;
4627 p = conn_prepare_command(connection, sock);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004628 if (!p)
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004629 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004630 memset(p, 0, sizeof(*p));
4631 p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
4632 p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004633 return conn_send_command(connection, sock, P_CONNECTION_FEATURES, sizeof(*p), NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004634}
4635
4636/*
4637 * return values:
4638 * 1 yes, we have a valid connection
4639 * 0 oops, did not work out, please try again
4640 * -1 peer talks different language,
4641 * no point in trying again, please go standalone.
4642 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004643static int drbd_do_features(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004644{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004645 /* ASSERT current == connection->receiver ... */
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004646 struct p_connection_features *p;
4647 const int expect = sizeof(struct p_connection_features);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004648 struct packet_info pi;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004649 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004650
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004651 err = drbd_send_features(connection);
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004652 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004653 return 0;
4654
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004655 err = drbd_recv_header(connection, &pi);
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004656 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004657 return 0;
4658
Andreas Gruenbacher60381782011-03-28 17:05:50 +02004659 if (pi.cmd != P_CONNECTION_FEATURES) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004660 drbd_err(connection, "expected ConnectionFeatures packet, received: %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004661 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004662 return -1;
4663 }
4664
Philipp Reisner77351055b2011-02-07 17:24:26 +01004665 if (pi.size != expect) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004666 drbd_err(connection, "expected ConnectionFeatures length: %u, received: %u\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004667 expect, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004668 return -1;
4669 }
4670
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004671 p = pi.data;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004672 err = drbd_recv_all_warn(connection, p, expect);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004673 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004674 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004675
Philipp Reisnerb411b362009-09-25 16:07:19 -07004676 p->protocol_min = be32_to_cpu(p->protocol_min);
4677 p->protocol_max = be32_to_cpu(p->protocol_max);
4678 if (p->protocol_max == 0)
4679 p->protocol_max = p->protocol_min;
4680
4681 if (PRO_VERSION_MAX < p->protocol_min ||
4682 PRO_VERSION_MIN > p->protocol_max)
4683 goto incompat;
4684
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004685 connection->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004686
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004687 drbd_info(connection, "Handshake successful: "
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004688 "Agreed network protocol version %d\n", connection->agreed_pro_version);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004689
4690 return 1;
4691
4692 incompat:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004693 drbd_err(connection, "incompatible DRBD dialects: "
Philipp Reisnerb411b362009-09-25 16:07:19 -07004694 "I support %d-%d, peer supports %d-%d\n",
4695 PRO_VERSION_MIN, PRO_VERSION_MAX,
4696 p->protocol_min, p->protocol_max);
4697 return -1;
4698}
4699
4700#if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004701static int drbd_do_auth(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004702{
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004703 drbd_err(connection, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
4704 drbd_err(connection, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004705 return -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004706}
4707#else
4708#define CHALLENGE_LEN 64
Johannes Thomab10d96c2010-01-07 16:02:50 +01004709
4710/* Return value:
4711 1 - auth succeeded,
4712 0 - failed, try again (network error),
4713 -1 - auth failed, don't try again.
4714*/
4715
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004716static int drbd_do_auth(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004717{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004718 struct drbd_socket *sock;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004719 char my_challenge[CHALLENGE_LEN]; /* 64 Bytes... */
4720 struct scatterlist sg;
4721 char *response = NULL;
4722 char *right_response = NULL;
4723 char *peers_ch = NULL;
Philipp Reisner44ed1672011-04-19 17:10:19 +02004724 unsigned int key_len;
4725 char secret[SHARED_SECRET_MAX]; /* 64 byte */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004726 unsigned int resp_size;
4727 struct hash_desc desc;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004728 struct packet_info pi;
Philipp Reisner44ed1672011-04-19 17:10:19 +02004729 struct net_conf *nc;
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004730 int err, rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004731
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004732 /* FIXME: Put the challenge/response into the preallocated socket buffer. */
4733
Philipp Reisner44ed1672011-04-19 17:10:19 +02004734 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004735 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +02004736 key_len = strlen(nc->shared_secret);
4737 memcpy(secret, nc->shared_secret, key_len);
4738 rcu_read_unlock();
4739
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004740 desc.tfm = connection->cram_hmac_tfm;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004741 desc.flags = 0;
4742
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004743 rv = crypto_hash_setkey(connection->cram_hmac_tfm, (u8 *)secret, key_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004744 if (rv) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004745 drbd_err(connection, "crypto_hash_setkey() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004746 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004747 goto fail;
4748 }
4749
4750 get_random_bytes(my_challenge, CHALLENGE_LEN);
4751
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004752 sock = &connection->data;
4753 if (!conn_prepare_command(connection, sock)) {
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004754 rv = 0;
4755 goto fail;
4756 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004757 rv = !conn_send_command(connection, sock, P_AUTH_CHALLENGE, 0,
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004758 my_challenge, CHALLENGE_LEN);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004759 if (!rv)
4760 goto fail;
4761
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004762 err = drbd_recv_header(connection, &pi);
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004763 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004764 rv = 0;
4765 goto fail;
4766 }
4767
Philipp Reisner77351055b2011-02-07 17:24:26 +01004768 if (pi.cmd != P_AUTH_CHALLENGE) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004769 drbd_err(connection, "expected AuthChallenge packet, received: %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004770 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004771 rv = 0;
4772 goto fail;
4773 }
4774
Philipp Reisner77351055b2011-02-07 17:24:26 +01004775 if (pi.size > CHALLENGE_LEN * 2) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004776 drbd_err(connection, "expected AuthChallenge payload too big.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004777 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004778 goto fail;
4779 }
4780
Philipp Reisner77351055b2011-02-07 17:24:26 +01004781 peers_ch = kmalloc(pi.size, GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004782 if (peers_ch == NULL) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004783 drbd_err(connection, "kmalloc of peers_ch failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004784 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004785 goto fail;
4786 }
4787
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004788 err = drbd_recv_all_warn(connection, peers_ch, pi.size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004789 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004790 rv = 0;
4791 goto fail;
4792 }
4793
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004794 resp_size = crypto_hash_digestsize(connection->cram_hmac_tfm);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004795 response = kmalloc(resp_size, GFP_NOIO);
4796 if (response == NULL) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004797 drbd_err(connection, "kmalloc of response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004798 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004799 goto fail;
4800 }
4801
4802 sg_init_table(&sg, 1);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004803 sg_set_buf(&sg, peers_ch, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004804
4805 rv = crypto_hash_digest(&desc, &sg, sg.length, response);
4806 if (rv) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004807 drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004808 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004809 goto fail;
4810 }
4811
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004812 if (!conn_prepare_command(connection, sock)) {
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004813 rv = 0;
4814 goto fail;
4815 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004816 rv = !conn_send_command(connection, sock, P_AUTH_RESPONSE, 0,
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004817 response, resp_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004818 if (!rv)
4819 goto fail;
4820
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004821 err = drbd_recv_header(connection, &pi);
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004822 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004823 rv = 0;
4824 goto fail;
4825 }
4826
Philipp Reisner77351055b2011-02-07 17:24:26 +01004827 if (pi.cmd != P_AUTH_RESPONSE) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004828 drbd_err(connection, "expected AuthResponse packet, received: %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004829 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004830 rv = 0;
4831 goto fail;
4832 }
4833
Philipp Reisner77351055b2011-02-07 17:24:26 +01004834 if (pi.size != resp_size) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004835 drbd_err(connection, "expected AuthResponse payload of wrong size\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004836 rv = 0;
4837 goto fail;
4838 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004839
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004840 err = drbd_recv_all_warn(connection, response , resp_size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004841 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004842 rv = 0;
4843 goto fail;
4844 }
4845
4846 right_response = kmalloc(resp_size, GFP_NOIO);
Julia Lawall2d1ee872009-12-27 22:27:11 +01004847 if (right_response == NULL) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004848 drbd_err(connection, "kmalloc of right_response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004849 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004850 goto fail;
4851 }
4852
4853 sg_set_buf(&sg, my_challenge, CHALLENGE_LEN);
4854
4855 rv = crypto_hash_digest(&desc, &sg, sg.length, right_response);
4856 if (rv) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004857 drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004858 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004859 goto fail;
4860 }
4861
4862 rv = !memcmp(response, right_response, resp_size);
4863
4864 if (rv)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004865 drbd_info(connection, "Peer authenticated using %d bytes HMAC\n",
Philipp Reisner44ed1672011-04-19 17:10:19 +02004866 resp_size);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004867 else
4868 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004869
4870 fail:
4871 kfree(peers_ch);
4872 kfree(response);
4873 kfree(right_response);
4874
4875 return rv;
4876}
4877#endif
4878
Andreas Gruenbacher8fe60552011-07-22 11:04:36 +02004879int drbd_receiver(struct drbd_thread *thi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004880{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004881 struct drbd_connection *connection = thi->connection;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004882 int h;
4883
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004884 drbd_info(connection, "receiver (re)started\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004885
4886 do {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004887 h = conn_connect(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004888 if (h == 0) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004889 conn_disconnect(connection);
Philipp Reisner20ee6392011-01-18 15:28:59 +01004890 schedule_timeout_interruptible(HZ);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004891 }
4892 if (h == -1) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004893 drbd_warn(connection, "Discarding network configuration.\n");
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004894 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004895 }
4896 } while (h == 0);
4897
Philipp Reisner91fd4da2011-04-20 17:47:29 +02004898 if (h > 0)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004899 drbdd(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004900
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004901 conn_disconnect(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004902
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004903 drbd_info(connection, "receiver terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004904 return 0;
4905}
4906
4907/* ********* acknowledge sender ******** */
4908
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004909static int got_conn_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004910{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004911 struct p_req_state_reply *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004912 int retcode = be32_to_cpu(p->retcode);
4913
4914 if (retcode >= SS_SUCCESS) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004915 set_bit(CONN_WD_ST_CHG_OKAY, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004916 } else {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004917 set_bit(CONN_WD_ST_CHG_FAIL, &connection->flags);
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004918 drbd_err(connection, "Requested state change failed by peer: %s (%d)\n",
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004919 drbd_set_st_err_str(retcode), retcode);
4920 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004921 wake_up(&connection->ping_wait);
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004922
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004923 return 0;
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004924}
4925
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004926static int got_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004927{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004928 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004929 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004930 struct p_req_state_reply *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004931 int retcode = be32_to_cpu(p->retcode);
4932
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004933 peer_device = conn_peer_device(connection, pi->vnr);
4934 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004935 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004936 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004937
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004938 if (test_bit(CONN_WD_ST_CHG_REQ, &connection->flags)) {
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02004939 D_ASSERT(device, connection->agreed_pro_version < 100);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004940 return got_conn_RqSReply(connection, pi);
Philipp Reisner4d0fc3f2012-01-20 13:52:27 +01004941 }
4942
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004943 if (retcode >= SS_SUCCESS) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004944 set_bit(CL_ST_CHG_SUCCESS, &device->flags);
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004945 } else {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004946 set_bit(CL_ST_CHG_FAIL, &device->flags);
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004947 drbd_err(device, "Requested state change failed by peer: %s (%d)\n",
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01004948 drbd_set_st_err_str(retcode), retcode);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004949 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004950 wake_up(&device->state_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004951
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004952 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004953}
4954
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004955static int got_Ping(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004956{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004957 return drbd_send_ping_ack(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004958
4959}
4960
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004961static int got_PingAck(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004962{
4963 /* restore idle timeout */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004964 connection->meta.socket->sk->sk_rcvtimeo = connection->net_conf->ping_int*HZ;
4965 if (!test_and_set_bit(GOT_PING_ACK, &connection->flags))
4966 wake_up(&connection->ping_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004967
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004968 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004969}
4970
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004971static int got_IsInSync(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004972{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004973 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004974 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004975 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004976 sector_t sector = be64_to_cpu(p->sector);
4977 int blksize = be32_to_cpu(p->blksize);
4978
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004979 peer_device = conn_peer_device(connection, pi->vnr);
4980 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004981 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004982 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01004983
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004984 D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004985
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004986 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004987
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004988 if (get_ldev(device)) {
4989 drbd_rs_complete_io(device, sector);
4990 drbd_set_in_sync(device, sector, blksize);
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004991 /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004992 device->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
4993 put_ldev(device);
Lars Ellenberg1d53f092010-09-05 01:13:24 +02004994 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004995 dec_rs_pending(device);
4996 atomic_add(blksize >> 9, &device->rs_sect_in);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004997
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02004998 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004999}
5000
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01005001static int
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005002validate_req_change_req_state(struct drbd_device *device, u64 id, sector_t sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01005003 struct rb_root *root, const char *func,
5004 enum drbd_req_event what, bool missing_ok)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005005{
5006 struct drbd_request *req;
5007 struct bio_and_error m;
5008
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005009 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005010 req = find_request(device, root, id, sector, missing_ok, func);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005011 if (unlikely(!req)) {
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005012 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005013 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005014 }
5015 __req_mod(req, what, &m);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005016 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005017
5018 if (m.bio)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005019 complete_master_bio(device, &m);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005020 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005021}
5022
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005023static int got_BlockAck(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005024{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005025 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005026 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005027 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005028 sector_t sector = be64_to_cpu(p->sector);
5029 int blksize = be32_to_cpu(p->blksize);
5030 enum drbd_req_event what;
5031
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005032 peer_device = conn_peer_device(connection, pi->vnr);
5033 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005034 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005035 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005036
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005037 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005038
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01005039 if (p->block_id == ID_SYNCER) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005040 drbd_set_in_sync(device, sector, blksize);
5041 dec_rs_pending(device);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005042 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005043 }
Andreas Gruenbachere05e1e52011-03-25 15:16:26 +01005044 switch (pi->cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07005045 case P_RS_WRITE_ACK:
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01005046 what = WRITE_ACKED_BY_PEER_AND_SIS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005047 break;
5048 case P_WRITE_ACK:
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01005049 what = WRITE_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005050 break;
5051 case P_RECV_ACK:
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01005052 what = RECV_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005053 break;
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02005054 case P_SUPERSEDED:
5055 what = CONFLICT_RESOLVED;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01005056 break;
5057 case P_RETRY_WRITE:
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01005058 what = POSTPONE_WRITE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005059 break;
5060 default:
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005061 BUG();
Philipp Reisnerb411b362009-09-25 16:07:19 -07005062 }
5063
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005064 return validate_req_change_req_state(device, p->block_id, sector,
5065 &device->write_requests, __func__,
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005066 what, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005067}
5068
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005069static int got_NegAck(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005070{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005071 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005072 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005073 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005074 sector_t sector = be64_to_cpu(p->sector);
Philipp Reisner2deb8332011-01-17 18:39:18 +01005075 int size = be32_to_cpu(p->blksize);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005076 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005077
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005078 peer_device = conn_peer_device(connection, pi->vnr);
5079 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005080 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005081 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005082
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005083 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005084
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01005085 if (p->block_id == ID_SYNCER) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005086 dec_rs_pending(device);
5087 drbd_rs_failed_io(device, sector, size);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005088 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005089 }
Philipp Reisner2deb8332011-01-17 18:39:18 +01005090
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005091 err = validate_req_change_req_state(device, p->block_id, sector,
5092 &device->write_requests, __func__,
Philipp Reisner303d1442011-04-13 16:24:47 -07005093 NEG_ACKED, true);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005094 if (err) {
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01005095 /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs.
5096 The master bio might already be completed, therefore the
5097 request is no longer in the collision hash. */
5098 /* In Protocol B we might already have got a P_RECV_ACK
5099 but then get a P_NEG_ACK afterwards. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005100 drbd_set_out_of_sync(device, sector, size);
Philipp Reisner2deb8332011-01-17 18:39:18 +01005101 }
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005102 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005103}
5104
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005105static int got_NegDReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005106{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005107 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005108 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005109 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005110 sector_t sector = be64_to_cpu(p->sector);
5111
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005112 peer_device = conn_peer_device(connection, pi->vnr);
5113 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005114 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005115 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005116
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005117 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01005118
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02005119 drbd_err(device, "Got NegDReply; Sector %llus, len %u.\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07005120 (unsigned long long)sector, be32_to_cpu(p->blksize));
5121
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005122 return validate_req_change_req_state(device, p->block_id, sector,
5123 &device->read_requests, __func__,
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005124 NEG_ACKED, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005125}
5126
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005127static int got_NegRSDReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005128{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005129 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005130 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005131 sector_t sector;
5132 int size;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005133 struct p_block_ack *p = pi->data;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005134
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005135 peer_device = conn_peer_device(connection, pi->vnr);
5136 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005137 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005138 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005139
5140 sector = be64_to_cpu(p->sector);
5141 size = be32_to_cpu(p->blksize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005142
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005143 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005144
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005145 dec_rs_pending(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005146
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005147 if (get_ldev_if_state(device, D_FAILED)) {
5148 drbd_rs_complete_io(device, sector);
Andreas Gruenbachere05e1e52011-03-25 15:16:26 +01005149 switch (pi->cmd) {
Philipp Reisnerd612d302010-12-27 10:53:28 +01005150 case P_NEG_RS_DREPLY:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005151 drbd_rs_failed_io(device, sector, size);
Philipp Reisnerd612d302010-12-27 10:53:28 +01005152 case P_RS_CANCEL:
5153 break;
5154 default:
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005155 BUG();
Philipp Reisnerd612d302010-12-27 10:53:28 +01005156 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005157 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005158 }
5159
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005160 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005161}
5162
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005163static int got_BarrierAck(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005164{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005165 struct p_barrier_ack *p = pi->data;
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005166 struct drbd_peer_device *peer_device;
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005167 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005168
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005169 tl_release(connection, p->barrier, be32_to_cpu(p->set_size));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005170
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005171 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005172 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5173 struct drbd_device *device = peer_device->device;
5174
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005175 if (device->state.conn == C_AHEAD &&
5176 atomic_read(&device->ap_in_flight) == 0 &&
5177 !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &device->flags)) {
5178 device->start_resync_timer.expires = jiffies + HZ;
5179 add_timer(&device->start_resync_timer);
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005180 }
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02005181 }
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005182 rcu_read_unlock();
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02005183
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005184 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005185}
5186
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005187static int got_OVResult(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005188{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005189 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005190 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005191 struct p_block_ack *p = pi->data;
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02005192 struct drbd_device_work *dw;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005193 sector_t sector;
5194 int size;
5195
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005196 peer_device = conn_peer_device(connection, pi->vnr);
5197 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005198 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005199 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005200
Philipp Reisnerb411b362009-09-25 16:07:19 -07005201 sector = be64_to_cpu(p->sector);
5202 size = be32_to_cpu(p->blksize);
5203
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005204 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005205
5206 if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005207 drbd_ov_out_of_sync_found(device, sector, size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005208 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005209 ov_out_of_sync_print(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005210
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005211 if (!get_ldev(device))
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005212 return 0;
Lars Ellenberg1d53f092010-09-05 01:13:24 +02005213
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005214 drbd_rs_complete_io(device, sector);
5215 dec_rs_pending(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005216
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005217 --device->ov_left;
Lars Ellenbergea5442a2010-11-05 09:48:01 +01005218
5219 /* let's advance progress step marks only for every other megabyte */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005220 if ((device->ov_left & 0x200) == 0x200)
5221 drbd_advance_rs_marks(device, device->ov_left);
Lars Ellenbergea5442a2010-11-05 09:48:01 +01005222
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005223 if (device->ov_left == 0) {
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02005224 dw = kmalloc(sizeof(*dw), GFP_NOIO);
5225 if (dw) {
5226 dw->w.cb = w_ov_finished;
5227 dw->device = device;
5228 drbd_queue_work(&peer_device->connection->sender_work, &dw->w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005229 } else {
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02005230 drbd_err(device, "kmalloc(dw) failed.");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005231 ov_out_of_sync_print(device);
5232 drbd_resync_finished(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005233 }
5234 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005235 put_ldev(device);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005236 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005237}
5238
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005239static int got_skip(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisner0ced55a2010-04-30 15:26:20 +02005240{
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005241 return 0;
Philipp Reisner0ced55a2010-04-30 15:26:20 +02005242}
5243
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005244static int connection_finish_peer_reqs(struct drbd_connection *connection)
Philipp Reisner32862ec2011-02-08 16:41:01 +01005245{
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005246 struct drbd_peer_device *peer_device;
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005247 int vnr, not_empty = 0;
Philipp Reisner32862ec2011-02-08 16:41:01 +01005248
5249 do {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005250 clear_bit(SIGNAL_ASENDER, &connection->flags);
Philipp Reisner32862ec2011-02-08 16:41:01 +01005251 flush_signals(current);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005252
5253 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005254 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5255 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005256 kref_get(&device->kref);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005257 rcu_read_unlock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005258 if (drbd_finish_peer_reqs(device)) {
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02005259 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005260 return 1;
Philipp Reisnerd3fcb492011-04-13 14:46:05 -07005261 }
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02005262 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005263 rcu_read_lock();
Philipp Reisner082a3432011-03-15 16:05:42 +01005264 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005265 set_bit(SIGNAL_ASENDER, &connection->flags);
Philipp Reisner082a3432011-03-15 16:05:42 +01005266
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005267 spin_lock_irq(&connection->resource->req_lock);
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005268 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5269 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005270 not_empty = !list_empty(&device->done_ee);
Philipp Reisner082a3432011-03-15 16:05:42 +01005271 if (not_empty)
5272 break;
5273 }
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005274 spin_unlock_irq(&connection->resource->req_lock);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005275 rcu_read_unlock();
Philipp Reisner32862ec2011-02-08 16:41:01 +01005276 } while (not_empty);
5277
5278 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005279}
5280
5281struct asender_cmd {
5282 size_t pkt_size;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005283 int (*fn)(struct drbd_connection *connection, struct packet_info *);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005284};
5285
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01005286static struct asender_cmd asender_tbl[] = {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005287 [P_PING] = { 0, got_Ping },
5288 [P_PING_ACK] = { 0, got_PingAck },
Philipp Reisnerb411b362009-09-25 16:07:19 -07005289 [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
5290 [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
5291 [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02005292 [P_SUPERSEDED] = { sizeof(struct p_block_ack), got_BlockAck },
Philipp Reisnerb411b362009-09-25 16:07:19 -07005293 [P_NEG_ACK] = { sizeof(struct p_block_ack), got_NegAck },
5294 [P_NEG_DREPLY] = { sizeof(struct p_block_ack), got_NegDReply },
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005295 [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), got_NegRSDReply },
Philipp Reisnerb411b362009-09-25 16:07:19 -07005296 [P_OV_RESULT] = { sizeof(struct p_block_ack), got_OVResult },
5297 [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck },
5298 [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
5299 [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync },
Philipp Reisner02918be2010-08-20 14:35:10 +02005300 [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip },
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005301 [P_RS_CANCEL] = { sizeof(struct p_block_ack), got_NegRSDReply },
5302 [P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), got_conn_RqSReply },
5303 [P_RETRY_WRITE] = { sizeof(struct p_block_ack), got_BlockAck },
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01005304};
Philipp Reisnerb411b362009-09-25 16:07:19 -07005305
5306int drbd_asender(struct drbd_thread *thi)
5307{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005308 struct drbd_connection *connection = thi->connection;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005309 struct asender_cmd *cmd = NULL;
Philipp Reisner77351055b2011-02-07 17:24:26 +01005310 struct packet_info pi;
Philipp Reisner257d0af2011-01-26 12:15:29 +01005311 int rv;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005312 void *buf = connection->meta.rbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005313 int received = 0;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005314 unsigned int header_size = drbd_header_size(connection);
Andreas Gruenbacher52b061a2011-03-30 11:38:49 +02005315 int expect = header_size;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005316 bool ping_timeout_active = false;
5317 struct net_conf *nc;
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005318 int ping_timeo, tcp_cork, ping_int;
Philipp Reisner3990e042013-03-27 14:08:48 +01005319 struct sched_param param = { .sched_priority = 2 };
Philipp Reisnerb411b362009-09-25 16:07:19 -07005320
Philipp Reisner3990e042013-03-27 14:08:48 +01005321 rv = sched_setscheduler(current, SCHED_RR, &param);
5322 if (rv < 0)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005323 drbd_err(connection, "drbd_asender: ERROR set priority, ret=%d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005324
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +01005325 while (get_t_state(thi) == RUNNING) {
Philipp Reisner80822282011-02-08 12:46:30 +01005326 drbd_thread_current_set_cpu(thi);
Philipp Reisner44ed1672011-04-19 17:10:19 +02005327
5328 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005329 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +02005330 ping_timeo = nc->ping_timeo;
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005331 tcp_cork = nc->tcp_cork;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005332 ping_int = nc->ping_int;
5333 rcu_read_unlock();
5334
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005335 if (test_and_clear_bit(SEND_PING, &connection->flags)) {
5336 if (drbd_send_ping(connection)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005337 drbd_err(connection, "drbd_send_ping has failed\n");
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01005338 goto reconnect;
5339 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005340 connection->meta.socket->sk->sk_rcvtimeo = ping_timeo * HZ / 10;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005341 ping_timeout_active = true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005342 }
5343
Philipp Reisner32862ec2011-02-08 16:41:01 +01005344 /* TODO: conditionally cork; it may hurt latency if we cork without
5345 much to send */
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005346 if (tcp_cork)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005347 drbd_tcp_cork(connection->meta.socket);
5348 if (connection_finish_peer_reqs(connection)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005349 drbd_err(connection, "connection_finish_peer_reqs() failed\n");
Philipp Reisner32862ec2011-02-08 16:41:01 +01005350 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005351 }
5352 /* but unconditionally uncork unless disabled */
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005353 if (tcp_cork)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005354 drbd_tcp_uncork(connection->meta.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005355
5356 /* short circuit, recv_msg would return EINTR anyways. */
5357 if (signal_pending(current))
5358 continue;
5359
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005360 rv = drbd_recv_short(connection->meta.socket, buf, expect-received, 0);
5361 clear_bit(SIGNAL_ASENDER, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005362
5363 flush_signals(current);
5364
5365 /* Note:
5366 * -EINTR (on meta) we got a signal
5367 * -EAGAIN (on meta) rcvtimeo expired
5368 * -ECONNRESET other side closed the connection
5369 * -ERESTARTSYS (on data) we got a signal
5370 * rv < 0 other than above: unexpected error!
5371 * rv == expected: full header or command
5372 * rv < expected: "woken" by signal during receive
5373 * rv == 0 : "connection shut down by peer"
5374 */
5375 if (likely(rv > 0)) {
5376 received += rv;
5377 buf += rv;
5378 } else if (rv == 0) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005379 if (test_bit(DISCONNECT_SENT, &connection->flags)) {
Philipp Reisnerb66623e2012-08-08 21:19:09 +02005380 long t;
5381 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005382 t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
Philipp Reisnerb66623e2012-08-08 21:19:09 +02005383 rcu_read_unlock();
5384
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005385 t = wait_event_timeout(connection->ping_wait,
5386 connection->cstate < C_WF_REPORT_PARAMS,
Philipp Reisnerb66623e2012-08-08 21:19:09 +02005387 t);
Philipp Reisner599377a2012-08-17 14:50:22 +02005388 if (t)
5389 break;
5390 }
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005391 drbd_err(connection, "meta connection shut down by peer.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005392 goto reconnect;
5393 } else if (rv == -EAGAIN) {
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02005394 /* If the data socket received something meanwhile,
5395 * that is good enough: peer is still alive. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005396 if (time_after(connection->last_received,
5397 jiffies - connection->meta.socket->sk->sk_rcvtimeo))
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02005398 continue;
Lars Ellenbergf36af182011-03-09 22:44:55 +01005399 if (ping_timeout_active) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005400 drbd_err(connection, "PingAck did not arrive in time.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005401 goto reconnect;
5402 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005403 set_bit(SEND_PING, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005404 continue;
5405 } else if (rv == -EINTR) {
5406 continue;
5407 } else {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005408 drbd_err(connection, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005409 goto reconnect;
5410 }
5411
5412 if (received == expect && cmd == NULL) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005413 if (decode_header(connection, connection->meta.rbuf, &pi))
Philipp Reisnerb411b362009-09-25 16:07:19 -07005414 goto reconnect;
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01005415 cmd = &asender_tbl[pi.cmd];
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005416 if (pi.cmd >= ARRAY_SIZE(asender_tbl) || !cmd->fn) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005417 drbd_err(connection, "Unexpected meta packet %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02005418 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005419 goto disconnect;
5420 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005421 expect = header_size + cmd->pkt_size;
Andreas Gruenbacher52b061a2011-03-30 11:38:49 +02005422 if (pi.size != expect - header_size) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005423 drbd_err(connection, "Wrong packet size on meta (c: %d, l: %d)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01005424 pi.cmd, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005425 goto reconnect;
Philipp Reisner257d0af2011-01-26 12:15:29 +01005426 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005427 }
5428 if (received == expect) {
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005429 bool err;
Philipp Reisnera4fbda82011-03-16 11:13:17 +01005430
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005431 err = cmd->fn(connection, &pi);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005432 if (err) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005433 drbd_err(connection, "%pf failed\n", cmd->fn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005434 goto reconnect;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005435 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005436
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005437 connection->last_received = jiffies;
Lars Ellenbergf36af182011-03-09 22:44:55 +01005438
Philipp Reisner44ed1672011-04-19 17:10:19 +02005439 if (cmd == &asender_tbl[P_PING_ACK]) {
5440 /* restore idle timeout */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005441 connection->meta.socket->sk->sk_rcvtimeo = ping_int * HZ;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005442 ping_timeout_active = false;
5443 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005444
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005445 buf = connection->meta.rbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005446 received = 0;
Andreas Gruenbacher52b061a2011-03-30 11:38:49 +02005447 expect = header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005448 cmd = NULL;
5449 }
5450 }
5451
5452 if (0) {
5453reconnect:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005454 conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
5455 conn_md_sync(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005456 }
5457 if (0) {
5458disconnect:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005459 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005460 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005461 clear_bit(SIGNAL_ASENDER, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005462
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005463 drbd_info(connection, "asender terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005464
5465 return 0;
5466}