blob: 5b17ec88ea058e766071e66eeadf3d8fca3f4940 [file] [log] [blame]
Philipp Reisnerb411b362009-09-25 16:07:19 -07001/*
2 drbd_receiver.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23 */
24
25
Philipp Reisnerb411b362009-09-25 16:07:19 -070026#include <linux/module.h>
27
28#include <asm/uaccess.h>
29#include <net/sock.h>
30
Philipp Reisnerb411b362009-09-25 16:07:19 -070031#include <linux/drbd.h>
32#include <linux/fs.h>
33#include <linux/file.h>
34#include <linux/in.h>
35#include <linux/mm.h>
36#include <linux/memcontrol.h>
37#include <linux/mm_inline.h>
38#include <linux/slab.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070039#include <linux/pkt_sched.h>
40#define __KERNEL_SYSCALLS__
41#include <linux/unistd.h>
42#include <linux/vmalloc.h>
43#include <linux/random.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070044#include <linux/string.h>
45#include <linux/scatterlist.h>
46#include "drbd_int.h"
Andreas Gruenbachera3603a62011-05-30 11:47:37 +020047#include "drbd_protocol.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070048#include "drbd_req.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070049#include "drbd_vli.h"
50
Lars Ellenberg20c68fd2014-04-28 18:43:25 +020051#define PRO_FEATURES (FF_TRIM)
52
Philipp Reisner77351055b2011-02-07 17:24:26 +010053struct packet_info {
54 enum drbd_packet cmd;
Andreas Gruenbachere2857212011-03-25 00:57:38 +010055 unsigned int size;
56 unsigned int vnr;
Andreas Gruenbachere6589832011-03-30 12:54:42 +020057 void *data;
Philipp Reisner77351055b2011-02-07 17:24:26 +010058};
59
Philipp Reisnerb411b362009-09-25 16:07:19 -070060enum finish_epoch {
61 FE_STILL_LIVE,
62 FE_DESTROYED,
63 FE_RECYCLED,
64};
65
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +020066static int drbd_do_features(struct drbd_connection *connection);
67static int drbd_do_auth(struct drbd_connection *connection);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +020068static int drbd_disconnected(struct drbd_peer_device *);
Lars Ellenberga0fb3c42014-04-28 18:43:23 +020069static void conn_wait_active_ee_empty(struct drbd_connection *connection);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +020070static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *, struct drbd_epoch *, enum epoch_event);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +010071static int e_end_block(struct drbd_work *, int);
Philipp Reisnerb411b362009-09-25 16:07:19 -070072
Philipp Reisnerb411b362009-09-25 16:07:19 -070073
74#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
75
Lars Ellenberg45bb9122010-05-14 17:10:48 +020076/*
77 * some helper functions to deal with single linked page lists,
78 * page->private being our "next" pointer.
79 */
80
81/* If at least n pages are linked at head, get n pages off.
82 * Otherwise, don't modify head, and return NULL.
83 * Locking is the responsibility of the caller.
84 */
85static struct page *page_chain_del(struct page **head, int n)
86{
87 struct page *page;
88 struct page *tmp;
89
90 BUG_ON(!n);
91 BUG_ON(!head);
92
93 page = *head;
Philipp Reisner23ce4222010-05-20 13:35:31 +020094
95 if (!page)
96 return NULL;
97
Lars Ellenberg45bb9122010-05-14 17:10:48 +020098 while (page) {
99 tmp = page_chain_next(page);
100 if (--n == 0)
101 break; /* found sufficient pages */
102 if (tmp == NULL)
103 /* insufficient pages, don't use any of them. */
104 return NULL;
105 page = tmp;
106 }
107
108 /* add end of list marker for the returned list */
109 set_page_private(page, 0);
110 /* actual return value, and adjustment of head */
111 page = *head;
112 *head = tmp;
113 return page;
114}
115
116/* may be used outside of locks to find the tail of a (usually short)
117 * "private" page chain, before adding it back to a global chain head
118 * with page_chain_add() under a spinlock. */
119static struct page *page_chain_tail(struct page *page, int *len)
120{
121 struct page *tmp;
122 int i = 1;
123 while ((tmp = page_chain_next(page)))
124 ++i, page = tmp;
125 if (len)
126 *len = i;
127 return page;
128}
129
130static int page_chain_free(struct page *page)
131{
132 struct page *tmp;
133 int i = 0;
134 page_chain_for_each_safe(page, tmp) {
135 put_page(page);
136 ++i;
137 }
138 return i;
139}
140
141static void page_chain_add(struct page **head,
142 struct page *chain_first, struct page *chain_last)
143{
144#if 1
145 struct page *tmp;
146 tmp = page_chain_tail(chain_first, NULL);
147 BUG_ON(tmp != chain_last);
148#endif
149
150 /* add chain to head */
151 set_page_private(chain_last, (unsigned long)*head);
152 *head = chain_first;
153}
154
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200155static struct page *__drbd_alloc_pages(struct drbd_device *device,
Andreas Gruenbacher18c2d522011-04-07 21:08:50 +0200156 unsigned int number)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700157{
158 struct page *page = NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200159 struct page *tmp = NULL;
Andreas Gruenbacher18c2d522011-04-07 21:08:50 +0200160 unsigned int i = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700161
162 /* Yes, testing drbd_pp_vacant outside the lock is racy.
163 * So what. It saves a spin_lock. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200164 if (drbd_pp_vacant >= number) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700165 spin_lock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200166 page = page_chain_del(&drbd_pp_pool, number);
167 if (page)
168 drbd_pp_vacant -= number;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700169 spin_unlock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200170 if (page)
171 return page;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700172 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200173
Philipp Reisnerb411b362009-09-25 16:07:19 -0700174 /* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD
175 * "criss-cross" setup, that might cause write-out on some other DRBD,
176 * which in turn might block on the other node at this very place. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200177 for (i = 0; i < number; i++) {
178 tmp = alloc_page(GFP_TRY);
179 if (!tmp)
180 break;
181 set_page_private(tmp, (unsigned long)page);
182 page = tmp;
183 }
184
185 if (i == number)
186 return page;
187
188 /* Not enough pages immediately available this time.
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200189 * No need to jump around here, drbd_alloc_pages will retry this
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200190 * function "soon". */
191 if (page) {
192 tmp = page_chain_tail(page, NULL);
193 spin_lock(&drbd_pp_lock);
194 page_chain_add(&drbd_pp_pool, page, tmp);
195 drbd_pp_vacant += i;
196 spin_unlock(&drbd_pp_lock);
197 }
198 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700199}
200
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200201static void reclaim_finished_net_peer_reqs(struct drbd_device *device,
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200202 struct list_head *to_be_freed)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700203{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200204 struct drbd_peer_request *peer_req, *tmp;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700205
206 /* The EEs are always appended to the end of the list. Since
207 they are sent in order over the wire, they have to finish
208 in order. As soon as we see the first not finished we can
209 stop to examine the list... */
210
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200211 list_for_each_entry_safe(peer_req, tmp, &device->net_ee, w.list) {
Andreas Gruenbacher045417f2011-04-07 21:34:24 +0200212 if (drbd_peer_req_has_active_page(peer_req))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700213 break;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200214 list_move(&peer_req->w.list, to_be_freed);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700215 }
216}
217
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200218static void drbd_kick_lo_and_reclaim_net(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700219{
220 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100221 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700222
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200223 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200224 reclaim_finished_net_peer_reqs(device, &reclaimed);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200225 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700226
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200227 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200228 drbd_free_net_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700229}
230
231/**
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200232 * drbd_alloc_pages() - Returns @number pages, retries forever (or until signalled)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200233 * @device: DRBD device.
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200234 * @number: number of pages requested
235 * @retry: whether to retry, if not enough pages are available right now
Philipp Reisnerb411b362009-09-25 16:07:19 -0700236 *
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200237 * Tries to allocate number pages, first from our own page pool, then from
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200238 * the kernel.
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200239 * Possibly retry until DRBD frees sufficient pages somewhere else.
240 *
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200241 * If this allocation would exceed the max_buffers setting, we throttle
242 * allocation (schedule_timeout) to give the system some room to breathe.
243 *
244 * We do not use max-buffers as hard limit, because it could lead to
245 * congestion and further to a distributed deadlock during online-verify or
246 * (checksum based) resync, if the max-buffers, socket buffer sizes and
247 * resync-rate settings are mis-configured.
248 *
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200249 * Returns a page chain linked via page->private.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700250 */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200251struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int number,
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200252 bool retry)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700253{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200254 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700255 struct page *page = NULL;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200256 struct net_conf *nc;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700257 DEFINE_WAIT(wait);
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200258 unsigned int mxb;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700259
Philipp Reisner44ed1672011-04-19 17:10:19 +0200260 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200261 nc = rcu_dereference(peer_device->connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200262 mxb = nc ? nc->max_buffers : 1000000;
263 rcu_read_unlock();
264
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200265 if (atomic_read(&device->pp_in_use) < mxb)
266 page = __drbd_alloc_pages(device, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700267
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200268 while (page == NULL) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700269 prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
270
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200271 drbd_kick_lo_and_reclaim_net(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700272
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200273 if (atomic_read(&device->pp_in_use) < mxb) {
274 page = __drbd_alloc_pages(device, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700275 if (page)
276 break;
277 }
278
279 if (!retry)
280 break;
281
282 if (signal_pending(current)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200283 drbd_warn(device, "drbd_alloc_pages interrupted!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700284 break;
285 }
286
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200287 if (schedule_timeout(HZ/10) == 0)
288 mxb = UINT_MAX;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700289 }
290 finish_wait(&drbd_pp_wait, &wait);
291
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200292 if (page)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200293 atomic_add(number, &device->pp_in_use);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700294 return page;
295}
296
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200297/* Must not be used from irq, as that may deadlock: see drbd_alloc_pages.
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200298 * Is also used from inside an other spin_lock_irq(&resource->req_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200299 * Either links the page chain back to the global pool,
300 * or returns all pages to the system. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200301static void drbd_free_pages(struct drbd_device *device, struct page *page, int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700302{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200303 atomic_t *a = is_net ? &device->pp_in_use_by_net : &device->pp_in_use;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700304 int i;
Lars Ellenberg435f0742010-09-06 12:30:25 +0200305
Lars Ellenberga73ff322012-06-25 19:15:38 +0200306 if (page == NULL)
307 return;
308
Philipp Reisner81a5d602011-02-22 19:53:16 -0500309 if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200310 i = page_chain_free(page);
311 else {
312 struct page *tmp;
313 tmp = page_chain_tail(page, &i);
314 spin_lock(&drbd_pp_lock);
315 page_chain_add(&drbd_pp_pool, page, tmp);
316 drbd_pp_vacant += i;
317 spin_unlock(&drbd_pp_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700318 }
Lars Ellenberg435f0742010-09-06 12:30:25 +0200319 i = atomic_sub_return(i, a);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200320 if (i < 0)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200321 drbd_warn(device, "ASSERTION FAILED: %s: %d < 0\n",
Lars Ellenberg435f0742010-09-06 12:30:25 +0200322 is_net ? "pp_in_use_by_net" : "pp_in_use", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700323 wake_up(&drbd_pp_wait);
324}
325
326/*
327You need to hold the req_lock:
328 _drbd_wait_ee_list_empty()
329
330You must not have the req_lock:
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +0200331 drbd_free_peer_req()
Andreas Gruenbacher0db55362011-04-06 16:09:15 +0200332 drbd_alloc_peer_req()
Andreas Gruenbacher7721f562011-04-06 17:14:02 +0200333 drbd_free_peer_reqs()
Philipp Reisnerb411b362009-09-25 16:07:19 -0700334 drbd_ee_fix_bhs()
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200335 drbd_finish_peer_reqs()
Philipp Reisnerb411b362009-09-25 16:07:19 -0700336 drbd_clear_done_ee()
337 drbd_wait_ee_list_empty()
338*/
339
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100340struct drbd_peer_request *
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200341drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
Lars Ellenberga0fb3c42014-04-28 18:43:23 +0200342 unsigned int data_size, bool has_payload, gfp_t gfp_mask) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700343{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200344 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100345 struct drbd_peer_request *peer_req;
Lars Ellenberga73ff322012-06-25 19:15:38 +0200346 struct page *page = NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200347 unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700348
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200349 if (drbd_insert_fault(device, DRBD_FAULT_AL_EE))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700350 return NULL;
351
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100352 peer_req = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
353 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700354 if (!(gfp_mask & __GFP_NOWARN))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200355 drbd_err(device, "%s: allocation failed\n", __func__);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700356 return NULL;
357 }
358
Lars Ellenberga0fb3c42014-04-28 18:43:23 +0200359 if (has_payload && data_size) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200360 page = drbd_alloc_pages(peer_device, nr_pages, (gfp_mask & __GFP_WAIT));
Lars Ellenberga73ff322012-06-25 19:15:38 +0200361 if (!page)
362 goto fail;
363 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700364
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100365 drbd_clear_interval(&peer_req->i);
366 peer_req->i.size = data_size;
367 peer_req->i.sector = sector;
368 peer_req->i.local = false;
369 peer_req->i.waiting = false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700370
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100371 peer_req->epoch = NULL;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200372 peer_req->peer_device = peer_device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100373 peer_req->pages = page;
374 atomic_set(&peer_req->pending_bios, 0);
375 peer_req->flags = 0;
Andreas Gruenbacher9a8e7752011-01-11 14:04:09 +0100376 /*
377 * The block_id is opaque to the receiver. It is not endianness
378 * converted, and sent back to the sender unchanged.
379 */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100380 peer_req->block_id = id;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700381
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100382 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700383
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200384 fail:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100385 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700386 return NULL;
387}
388
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200389void __drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request *peer_req,
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100390 int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700391{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100392 if (peer_req->flags & EE_HAS_DIGEST)
393 kfree(peer_req->digest);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200394 drbd_free_pages(device, peer_req->pages, is_net);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +0200395 D_ASSERT(device, atomic_read(&peer_req->pending_bios) == 0);
396 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100397 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700398}
399
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200400int drbd_free_peer_reqs(struct drbd_device *device, struct list_head *list)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700401{
402 LIST_HEAD(work_list);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100403 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700404 int count = 0;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200405 int is_net = list == &device->net_ee;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700406
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200407 spin_lock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700408 list_splice_init(list, &work_list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200409 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700410
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200411 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200412 __drbd_free_peer_req(device, peer_req, is_net);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700413 count++;
414 }
415 return count;
416}
417
Philipp Reisnerb411b362009-09-25 16:07:19 -0700418/*
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200419 * See also comments in _req_mod(,BARRIER_ACKED) and receive_Barrier.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700420 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200421static int drbd_finish_peer_reqs(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700422{
423 LIST_HEAD(work_list);
424 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100425 struct drbd_peer_request *peer_req, *t;
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100426 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700427
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200428 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200429 reclaim_finished_net_peer_reqs(device, &reclaimed);
430 list_splice_init(&device->done_ee, &work_list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200431 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700432
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200433 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200434 drbd_free_net_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700435
436 /* possible callbacks here:
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +0200437 * e_end_block, and e_end_resync_block, e_send_superseded.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700438 * all ignore the last argument.
439 */
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200440 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100441 int err2;
442
Philipp Reisnerb411b362009-09-25 16:07:19 -0700443 /* list_del not necessary, next/prev members not touched */
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200444 err2 = peer_req->w.cb(&peer_req->w, !!err);
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100445 if (!err)
446 err = err2;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200447 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700448 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200449 wake_up(&device->ee_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700450
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100451 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700452}
453
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200454static void _drbd_wait_ee_list_empty(struct drbd_device *device,
Andreas Gruenbacherd4da1532011-04-07 00:06:56 +0200455 struct list_head *head)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700456{
457 DEFINE_WAIT(wait);
458
459 /* avoids spin_lock/unlock
460 * and calling prepare_to_wait in the fast path */
461 while (!list_empty(head)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200462 prepare_to_wait(&device->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200463 spin_unlock_irq(&device->resource->req_lock);
Jens Axboe7eaceac2011-03-10 08:52:07 +0100464 io_schedule();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200465 finish_wait(&device->ee_wait, &wait);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200466 spin_lock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700467 }
468}
469
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200470static void drbd_wait_ee_list_empty(struct drbd_device *device,
Andreas Gruenbacherd4da1532011-04-07 00:06:56 +0200471 struct list_head *head)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700472{
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200473 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200474 _drbd_wait_ee_list_empty(device, head);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200475 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700476}
477
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100478static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700479{
Philipp Reisnerb411b362009-09-25 16:07:19 -0700480 struct kvec iov = {
481 .iov_base = buf,
482 .iov_len = size,
483 };
484 struct msghdr msg = {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700485 .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
486 };
Al Virof730c842014-02-08 21:07:38 -0500487 return kernel_recvmsg(sock, &msg, &iov, 1, size, msg.msg_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700488}
489
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200490static int drbd_recv(struct drbd_connection *connection, void *buf, size_t size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700491{
Philipp Reisnerb411b362009-09-25 16:07:19 -0700492 int rv;
493
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200494 rv = drbd_recv_short(connection->data.socket, buf, size, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700495
Philipp Reisnerdbd08202012-08-17 16:55:47 +0200496 if (rv < 0) {
497 if (rv == -ECONNRESET)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200498 drbd_info(connection, "sock was reset by peer\n");
Philipp Reisnerdbd08202012-08-17 16:55:47 +0200499 else if (rv != -ERESTARTSYS)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200500 drbd_err(connection, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerdbd08202012-08-17 16:55:47 +0200501 } else if (rv == 0) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200502 if (test_bit(DISCONNECT_SENT, &connection->flags)) {
Philipp Reisnerb66623e2012-08-08 21:19:09 +0200503 long t;
504 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200505 t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
Philipp Reisnerb66623e2012-08-08 21:19:09 +0200506 rcu_read_unlock();
507
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200508 t = wait_event_timeout(connection->ping_wait, connection->cstate < C_WF_REPORT_PARAMS, t);
Philipp Reisnerb66623e2012-08-08 21:19:09 +0200509
Philipp Reisner599377a2012-08-17 14:50:22 +0200510 if (t)
511 goto out;
512 }
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200513 drbd_info(connection, "sock was shut down by peer\n");
Philipp Reisner599377a2012-08-17 14:50:22 +0200514 }
515
Philipp Reisnerb411b362009-09-25 16:07:19 -0700516 if (rv != size)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200517 conn_request_state(connection, NS(conn, C_BROKEN_PIPE), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700518
Philipp Reisner599377a2012-08-17 14:50:22 +0200519out:
Philipp Reisnerb411b362009-09-25 16:07:19 -0700520 return rv;
521}
522
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200523static int drbd_recv_all(struct drbd_connection *connection, void *buf, size_t size)
Andreas Gruenbacherc6967742011-03-17 17:15:20 +0100524{
525 int err;
526
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200527 err = drbd_recv(connection, buf, size);
Andreas Gruenbacherc6967742011-03-17 17:15:20 +0100528 if (err != size) {
529 if (err >= 0)
530 err = -EIO;
531 } else
532 err = 0;
533 return err;
534}
535
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200536static int drbd_recv_all_warn(struct drbd_connection *connection, void *buf, size_t size)
Andreas Gruenbachera5c31902011-03-24 03:28:04 +0100537{
538 int err;
539
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200540 err = drbd_recv_all(connection, buf, size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +0100541 if (err && !signal_pending(current))
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200542 drbd_warn(connection, "short read (expected size %d)\n", (int)size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +0100543 return err;
544}
545
Lars Ellenberg5dbf1672010-05-25 16:18:01 +0200546/* quoting tcp(7):
547 * On individual connections, the socket buffer size must be set prior to the
548 * listen(2) or connect(2) calls in order to have it take effect.
549 * This is our wrapper to do so.
550 */
551static void drbd_setbufsize(struct socket *sock, unsigned int snd,
552 unsigned int rcv)
553{
554 /* open coded SO_SNDBUF, SO_RCVBUF */
555 if (snd) {
556 sock->sk->sk_sndbuf = snd;
557 sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
558 }
559 if (rcv) {
560 sock->sk->sk_rcvbuf = rcv;
561 sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
562 }
563}
564
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200565static struct socket *drbd_try_connect(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700566{
567 const char *what;
568 struct socket *sock;
569 struct sockaddr_in6 src_in6;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200570 struct sockaddr_in6 peer_in6;
571 struct net_conf *nc;
572 int err, peer_addr_len, my_addr_len;
Andreas Gruenbacher69ef82d2011-05-11 14:34:35 +0200573 int sndbuf_size, rcvbuf_size, connect_int;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700574 int disconnect_on_error = 1;
575
Philipp Reisner44ed1672011-04-19 17:10:19 +0200576 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200577 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200578 if (!nc) {
579 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700580 return NULL;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200581 }
Philipp Reisner44ed1672011-04-19 17:10:19 +0200582 sndbuf_size = nc->sndbuf_size;
583 rcvbuf_size = nc->rcvbuf_size;
Andreas Gruenbacher69ef82d2011-05-11 14:34:35 +0200584 connect_int = nc->connect_int;
Andreas Gruenbacher089c0752011-06-14 18:28:09 +0200585 rcu_read_unlock();
Philipp Reisner44ed1672011-04-19 17:10:19 +0200586
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200587 my_addr_len = min_t(int, connection->my_addr_len, sizeof(src_in6));
588 memcpy(&src_in6, &connection->my_addr, my_addr_len);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200589
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200590 if (((struct sockaddr *)&connection->my_addr)->sa_family == AF_INET6)
Philipp Reisner44ed1672011-04-19 17:10:19 +0200591 src_in6.sin6_port = 0;
592 else
593 ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
594
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200595 peer_addr_len = min_t(int, connection->peer_addr_len, sizeof(src_in6));
596 memcpy(&peer_in6, &connection->peer_addr, peer_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700597
598 what = "sock_create_kern";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200599 err = sock_create_kern(((struct sockaddr *)&src_in6)->sa_family,
600 SOCK_STREAM, IPPROTO_TCP, &sock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700601 if (err < 0) {
602 sock = NULL;
603 goto out;
604 }
605
606 sock->sk->sk_rcvtimeo =
Andreas Gruenbacher69ef82d2011-05-11 14:34:35 +0200607 sock->sk->sk_sndtimeo = connect_int * HZ;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200608 drbd_setbufsize(sock, sndbuf_size, rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700609
610 /* explicitly bind to the configured IP as source IP
611 * for the outgoing connections.
612 * This is needed for multihomed hosts and to be
613 * able to use lo: interfaces for drbd.
614 * Make sure to use 0 as port number, so linux selects
615 * a free one dynamically.
616 */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700617 what = "bind before connect";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200618 err = sock->ops->bind(sock, (struct sockaddr *) &src_in6, my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700619 if (err < 0)
620 goto out;
621
622 /* connect may fail, peer not yet available.
623 * stay C_WF_CONNECTION, don't go Disconnecting! */
624 disconnect_on_error = 0;
625 what = "connect";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200626 err = sock->ops->connect(sock, (struct sockaddr *) &peer_in6, peer_addr_len, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700627
628out:
629 if (err < 0) {
630 if (sock) {
631 sock_release(sock);
632 sock = NULL;
633 }
634 switch (-err) {
635 /* timeout, busy, signal pending */
636 case ETIMEDOUT: case EAGAIN: case EINPROGRESS:
637 case EINTR: case ERESTARTSYS:
638 /* peer not (yet) available, network problem */
639 case ECONNREFUSED: case ENETUNREACH:
640 case EHOSTDOWN: case EHOSTUNREACH:
641 disconnect_on_error = 0;
642 break;
643 default:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200644 drbd_err(connection, "%s failed, err = %d\n", what, err);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700645 }
646 if (disconnect_on_error)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200647 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700648 }
Philipp Reisner44ed1672011-04-19 17:10:19 +0200649
Philipp Reisnerb411b362009-09-25 16:07:19 -0700650 return sock;
651}
652
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200653struct accept_wait_data {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200654 struct drbd_connection *connection;
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200655 struct socket *s_listen;
656 struct completion door_bell;
657 void (*original_sk_state_change)(struct sock *sk);
658
659};
660
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200661static void drbd_incoming_connection(struct sock *sk)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700662{
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200663 struct accept_wait_data *ad = sk->sk_user_data;
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200664 void (*state_change)(struct sock *sk);
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200665
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200666 state_change = ad->original_sk_state_change;
667 if (sk->sk_state == TCP_ESTABLISHED)
668 complete(&ad->door_bell);
669 state_change(sk);
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200670}
671
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200672static int prepare_listen_socket(struct drbd_connection *connection, struct accept_wait_data *ad)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700673{
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200674 int err, sndbuf_size, rcvbuf_size, my_addr_len;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200675 struct sockaddr_in6 my_addr;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200676 struct socket *s_listen;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200677 struct net_conf *nc;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700678 const char *what;
679
Philipp Reisner44ed1672011-04-19 17:10:19 +0200680 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200681 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200682 if (!nc) {
683 rcu_read_unlock();
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200684 return -EIO;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200685 }
Philipp Reisner44ed1672011-04-19 17:10:19 +0200686 sndbuf_size = nc->sndbuf_size;
687 rcvbuf_size = nc->rcvbuf_size;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200688 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700689
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200690 my_addr_len = min_t(int, connection->my_addr_len, sizeof(struct sockaddr_in6));
691 memcpy(&my_addr, &connection->my_addr, my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700692
693 what = "sock_create_kern";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200694 err = sock_create_kern(((struct sockaddr *)&my_addr)->sa_family,
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200695 SOCK_STREAM, IPPROTO_TCP, &s_listen);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700696 if (err) {
697 s_listen = NULL;
698 goto out;
699 }
700
Philipp Reisner98683652012-11-09 14:18:43 +0100701 s_listen->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
Philipp Reisner44ed1672011-04-19 17:10:19 +0200702 drbd_setbufsize(s_listen, sndbuf_size, rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700703
704 what = "bind before listen";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200705 err = s_listen->ops->bind(s_listen, (struct sockaddr *)&my_addr, my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700706 if (err < 0)
707 goto out;
708
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200709 ad->s_listen = s_listen;
710 write_lock_bh(&s_listen->sk->sk_callback_lock);
711 ad->original_sk_state_change = s_listen->sk->sk_state_change;
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200712 s_listen->sk->sk_state_change = drbd_incoming_connection;
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200713 s_listen->sk->sk_user_data = ad;
714 write_unlock_bh(&s_listen->sk->sk_callback_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700715
Philipp Reisner2820fd32012-07-12 10:22:48 +0200716 what = "listen";
717 err = s_listen->ops->listen(s_listen, 5);
718 if (err < 0)
719 goto out;
720
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200721 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700722out:
723 if (s_listen)
724 sock_release(s_listen);
725 if (err < 0) {
726 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200727 drbd_err(connection, "%s failed, err = %d\n", what, err);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200728 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700729 }
730 }
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200731
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200732 return -EIO;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200733}
734
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200735static void unregister_state_change(struct sock *sk, struct accept_wait_data *ad)
736{
737 write_lock_bh(&sk->sk_callback_lock);
738 sk->sk_state_change = ad->original_sk_state_change;
739 sk->sk_user_data = NULL;
740 write_unlock_bh(&sk->sk_callback_lock);
741}
742
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200743static struct socket *drbd_wait_for_connect(struct drbd_connection *connection, struct accept_wait_data *ad)
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200744{
745 int timeo, connect_int, err = 0;
746 struct socket *s_estab = NULL;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200747 struct net_conf *nc;
748
749 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200750 nc = rcu_dereference(connection->net_conf);
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200751 if (!nc) {
752 rcu_read_unlock();
753 return NULL;
754 }
755 connect_int = nc->connect_int;
756 rcu_read_unlock();
757
758 timeo = connect_int * HZ;
Akinobu Mita38b682b22013-04-29 16:21:31 -0700759 /* 28.5% random jitter */
760 timeo += (prandom_u32() & 1) ? timeo / 7 : -timeo / 7;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200761
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200762 err = wait_for_completion_interruptible_timeout(&ad->door_bell, timeo);
763 if (err <= 0)
764 return NULL;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200765
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200766 err = kernel_accept(ad->s_listen, &s_estab, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700767 if (err < 0) {
768 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200769 drbd_err(connection, "accept failed, err = %d\n", err);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200770 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700771 }
772 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700773
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200774 if (s_estab)
775 unregister_state_change(s_estab->sk, ad);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700776
777 return s_estab;
778}
779
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200780static int decode_header(struct drbd_connection *, void *, struct packet_info *);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700781
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200782static int send_first_packet(struct drbd_connection *connection, struct drbd_socket *sock,
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200783 enum drbd_packet cmd)
784{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200785 if (!conn_prepare_command(connection, sock))
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200786 return -EIO;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200787 return conn_send_command(connection, sock, cmd, 0, NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700788}
789
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200790static int receive_first_packet(struct drbd_connection *connection, struct socket *sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700791{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200792 unsigned int header_size = drbd_header_size(connection);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200793 struct packet_info pi;
794 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700795
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200796 err = drbd_recv_short(sock, connection->data.rbuf, header_size, 0);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200797 if (err != header_size) {
798 if (err >= 0)
799 err = -EIO;
800 return err;
801 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200802 err = decode_header(connection, connection->data.rbuf, &pi);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200803 if (err)
804 return err;
805 return pi.cmd;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700806}
807
808/**
809 * drbd_socket_okay() - Free the socket if its connection is not okay
Philipp Reisnerb411b362009-09-25 16:07:19 -0700810 * @sock: pointer to the pointer to the socket.
811 */
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100812static int drbd_socket_okay(struct socket **sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700813{
814 int rr;
815 char tb[4];
816
817 if (!*sock)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100818 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700819
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100820 rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700821
822 if (rr > 0 || rr == -EAGAIN) {
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100823 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700824 } else {
825 sock_release(*sock);
826 *sock = NULL;
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100827 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700828 }
829}
Philipp Reisner2325eb62011-03-15 16:56:18 +0100830/* Gets called if a connection is established, or if a new minor gets created
831 in a connection */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200832int drbd_connected(struct drbd_peer_device *peer_device)
Philipp Reisner907599e2011-02-08 11:25:37 +0100833{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200834 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100835 int err;
Philipp Reisner907599e2011-02-08 11:25:37 +0100836
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200837 atomic_set(&device->packet_seq, 0);
838 device->peer_seq = 0;
Philipp Reisner907599e2011-02-08 11:25:37 +0100839
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200840 device->state_mutex = peer_device->connection->agreed_pro_version < 100 ?
841 &peer_device->connection->cstate_mutex :
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200842 &device->own_state_mutex;
Philipp Reisner8410da82011-02-11 20:11:10 +0100843
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200844 err = drbd_send_sync_param(peer_device);
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100845 if (!err)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200846 err = drbd_send_sizes(peer_device, 0, 0);
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100847 if (!err)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200848 err = drbd_send_uuids(peer_device);
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100849 if (!err)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200850 err = drbd_send_current_state(peer_device);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200851 clear_bit(USE_DEGR_WFC_T, &device->flags);
852 clear_bit(RESIZE_PENDING, &device->flags);
853 atomic_set(&device->ap_in_flight, 0);
854 mod_timer(&device->request_timer, jiffies + HZ); /* just start it here. */
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100855 return err;
Philipp Reisner907599e2011-02-08 11:25:37 +0100856}
Philipp Reisnerb411b362009-09-25 16:07:19 -0700857
858/*
859 * return values:
860 * 1 yes, we have a valid connection
861 * 0 oops, did not work out, please try again
862 * -1 peer talks different language,
863 * no point in trying again, please go standalone.
864 * -2 We do not have a network config...
865 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200866static int conn_connect(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700867{
Philipp Reisner7da35862011-12-19 22:42:56 +0100868 struct drbd_socket sock, msock;
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +0200869 struct drbd_peer_device *peer_device;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200870 struct net_conf *nc;
Philipp Reisner92f14952012-08-01 11:41:01 +0200871 int vnr, timeout, h, ok;
Philipp Reisner08b165b2011-09-05 16:22:33 +0200872 bool discard_my_data;
Philipp Reisner197296f2012-03-26 16:47:11 +0200873 enum drbd_state_rv rv;
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200874 struct accept_wait_data ad = {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200875 .connection = connection,
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200876 .door_bell = COMPLETION_INITIALIZER_ONSTACK(ad.door_bell),
877 };
Philipp Reisnerb411b362009-09-25 16:07:19 -0700878
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200879 clear_bit(DISCONNECT_SENT, &connection->flags);
880 if (conn_request_state(connection, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700881 return -2;
882
Philipp Reisner7da35862011-12-19 22:42:56 +0100883 mutex_init(&sock.mutex);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200884 sock.sbuf = connection->data.sbuf;
885 sock.rbuf = connection->data.rbuf;
Philipp Reisner7da35862011-12-19 22:42:56 +0100886 sock.socket = NULL;
887 mutex_init(&msock.mutex);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200888 msock.sbuf = connection->meta.sbuf;
889 msock.rbuf = connection->meta.rbuf;
Philipp Reisner7da35862011-12-19 22:42:56 +0100890 msock.socket = NULL;
891
Andreas Gruenbacher0916e0e2011-03-21 14:10:15 +0100892 /* Assume that the peer only understands protocol 80 until we know better. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200893 connection->agreed_pro_version = 80;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700894
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200895 if (prepare_listen_socket(connection, &ad))
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200896 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700897
898 do {
Andreas Gruenbacher2bf89622011-03-28 16:33:12 +0200899 struct socket *s;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700900
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200901 s = drbd_try_connect(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700902 if (s) {
Philipp Reisner7da35862011-12-19 22:42:56 +0100903 if (!sock.socket) {
904 sock.socket = s;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200905 send_first_packet(connection, &sock, P_INITIAL_DATA);
Philipp Reisner7da35862011-12-19 22:42:56 +0100906 } else if (!msock.socket) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200907 clear_bit(RESOLVE_CONFLICTS, &connection->flags);
Philipp Reisner7da35862011-12-19 22:42:56 +0100908 msock.socket = s;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200909 send_first_packet(connection, &msock, P_INITIAL_META);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700910 } else {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200911 drbd_err(connection, "Logic error in conn_connect()\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700912 goto out_release_sockets;
913 }
914 }
915
Philipp Reisner7da35862011-12-19 22:42:56 +0100916 if (sock.socket && msock.socket) {
917 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200918 nc = rcu_dereference(connection->net_conf);
Philipp Reisner7da35862011-12-19 22:42:56 +0100919 timeout = nc->ping_timeo * HZ / 10;
920 rcu_read_unlock();
921 schedule_timeout_interruptible(timeout);
922 ok = drbd_socket_okay(&sock.socket);
923 ok = drbd_socket_okay(&msock.socket) && ok;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700924 if (ok)
925 break;
926 }
927
928retry:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200929 s = drbd_wait_for_connect(connection, &ad);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700930 if (s) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200931 int fp = receive_first_packet(connection, s);
Philipp Reisner7da35862011-12-19 22:42:56 +0100932 drbd_socket_okay(&sock.socket);
933 drbd_socket_okay(&msock.socket);
Philipp Reisner92f14952012-08-01 11:41:01 +0200934 switch (fp) {
Andreas Gruenbachere5d6f332011-03-28 16:44:40 +0200935 case P_INITIAL_DATA:
Philipp Reisner7da35862011-12-19 22:42:56 +0100936 if (sock.socket) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200937 drbd_warn(connection, "initial packet S crossed\n");
Philipp Reisner7da35862011-12-19 22:42:56 +0100938 sock_release(sock.socket);
Philipp Reisner80c6eed2012-08-01 14:53:39 +0200939 sock.socket = s;
940 goto randomize;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700941 }
Philipp Reisner7da35862011-12-19 22:42:56 +0100942 sock.socket = s;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700943 break;
Andreas Gruenbachere5d6f332011-03-28 16:44:40 +0200944 case P_INITIAL_META:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200945 set_bit(RESOLVE_CONFLICTS, &connection->flags);
Philipp Reisner7da35862011-12-19 22:42:56 +0100946 if (msock.socket) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200947 drbd_warn(connection, "initial packet M crossed\n");
Philipp Reisner7da35862011-12-19 22:42:56 +0100948 sock_release(msock.socket);
Philipp Reisner80c6eed2012-08-01 14:53:39 +0200949 msock.socket = s;
950 goto randomize;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700951 }
Philipp Reisner7da35862011-12-19 22:42:56 +0100952 msock.socket = s;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700953 break;
954 default:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200955 drbd_warn(connection, "Error receiving initial packet\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700956 sock_release(s);
Philipp Reisner80c6eed2012-08-01 14:53:39 +0200957randomize:
Akinobu Mita38b682b22013-04-29 16:21:31 -0700958 if (prandom_u32() & 1)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700959 goto retry;
960 }
961 }
962
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200963 if (connection->cstate <= C_DISCONNECTING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700964 goto out_release_sockets;
965 if (signal_pending(current)) {
966 flush_signals(current);
967 smp_rmb();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200968 if (get_t_state(&connection->receiver) == EXITING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700969 goto out_release_sockets;
970 }
971
Philipp Reisnerb666dbf2012-07-26 14:12:59 +0200972 ok = drbd_socket_okay(&sock.socket);
973 ok = drbd_socket_okay(&msock.socket) && ok;
974 } while (!ok);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700975
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200976 if (ad.s_listen)
977 sock_release(ad.s_listen);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700978
Philipp Reisner98683652012-11-09 14:18:43 +0100979 sock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
980 msock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700981
Philipp Reisner7da35862011-12-19 22:42:56 +0100982 sock.socket->sk->sk_allocation = GFP_NOIO;
983 msock.socket->sk->sk_allocation = GFP_NOIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700984
Philipp Reisner7da35862011-12-19 22:42:56 +0100985 sock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
986 msock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700987
Philipp Reisnerb411b362009-09-25 16:07:19 -0700988 /* NOT YET ...
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200989 * sock.socket->sk->sk_sndtimeo = connection->net_conf->timeout*HZ/10;
Philipp Reisner7da35862011-12-19 22:42:56 +0100990 * sock.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
Andreas Gruenbacher60381782011-03-28 17:05:50 +0200991 * first set it to the P_CONNECTION_FEATURES timeout,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700992 * which we set to 4x the configured ping_timeout. */
Philipp Reisner44ed1672011-04-19 17:10:19 +0200993 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200994 nc = rcu_dereference(connection->net_conf);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700995
Philipp Reisner7da35862011-12-19 22:42:56 +0100996 sock.socket->sk->sk_sndtimeo =
997 sock.socket->sk->sk_rcvtimeo = nc->ping_timeo*4*HZ/10;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200998
Philipp Reisner7da35862011-12-19 22:42:56 +0100999 msock.socket->sk->sk_rcvtimeo = nc->ping_int*HZ;
Philipp Reisner44ed1672011-04-19 17:10:19 +02001000 timeout = nc->timeout * HZ / 10;
Philipp Reisner08b165b2011-09-05 16:22:33 +02001001 discard_my_data = nc->discard_my_data;
Philipp Reisner44ed1672011-04-19 17:10:19 +02001002 rcu_read_unlock();
1003
Philipp Reisner7da35862011-12-19 22:42:56 +01001004 msock.socket->sk->sk_sndtimeo = timeout;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001005
1006 /* we don't want delays.
Lucas De Marchi25985ed2011-03-30 22:57:33 -03001007 * we use TCP_CORK where appropriate, though */
Philipp Reisner7da35862011-12-19 22:42:56 +01001008 drbd_tcp_nodelay(sock.socket);
1009 drbd_tcp_nodelay(msock.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001010
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001011 connection->data.socket = sock.socket;
1012 connection->meta.socket = msock.socket;
1013 connection->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001014
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001015 h = drbd_do_features(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001016 if (h <= 0)
1017 return h;
1018
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001019 if (connection->cram_hmac_tfm) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001020 /* drbd_request_state(device, NS(conn, WFAuth)); */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001021 switch (drbd_do_auth(connection)) {
Johannes Thomab10d96c2010-01-07 16:02:50 +01001022 case -1:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001023 drbd_err(connection, "Authentication of peer failed\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001024 return -1;
Johannes Thomab10d96c2010-01-07 16:02:50 +01001025 case 0:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001026 drbd_err(connection, "Authentication of peer failed, trying again.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01001027 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001028 }
1029 }
1030
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001031 connection->data.socket->sk->sk_sndtimeo = timeout;
1032 connection->data.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001033
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001034 if (drbd_send_protocol(connection) == -EOPNOTSUPP)
Philipp Reisner7e2455c2010-04-22 14:50:23 +02001035 return -1;
Philipp Reisner1e86ac42011-08-04 10:33:08 +02001036
Philipp Reisner31007742014-04-28 18:43:12 +02001037 /* Prevent a race between resync-handshake and
1038 * being promoted to Primary.
1039 *
1040 * Grab and release the state mutex, so we know that any current
1041 * drbd_set_role() is finished, and any incoming drbd_set_role
1042 * will see the STATE_SENT flag, and wait for it to be cleared.
1043 */
1044 idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
1045 mutex_lock(peer_device->device->state_mutex);
1046
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001047 set_bit(STATE_SENT, &connection->flags);
Philipp Reisner197296f2012-03-26 16:47:11 +02001048
Philipp Reisner31007742014-04-28 18:43:12 +02001049 idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
1050 mutex_unlock(peer_device->device->state_mutex);
1051
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02001052 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001053 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1054 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001055 kref_get(&device->kref);
Andreas Gruenbacher26ea8f92013-06-25 16:50:03 +02001056 rcu_read_unlock();
1057
Philipp Reisner08b165b2011-09-05 16:22:33 +02001058 if (discard_my_data)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001059 set_bit(DISCARD_MY_DATA, &device->flags);
Philipp Reisner08b165b2011-09-05 16:22:33 +02001060 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001061 clear_bit(DISCARD_MY_DATA, &device->flags);
Philipp Reisner08b165b2011-09-05 16:22:33 +02001062
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001063 drbd_connected(peer_device);
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001064 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02001065 rcu_read_lock();
1066 }
1067 rcu_read_unlock();
1068
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001069 rv = conn_request_state(connection, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE);
1070 if (rv < SS_SUCCESS || connection->cstate != C_WF_REPORT_PARAMS) {
1071 clear_bit(STATE_SENT, &connection->flags);
Philipp Reisner1e86ac42011-08-04 10:33:08 +02001072 return 0;
Philipp Reisnera1096a62012-04-06 12:07:34 +02001073 }
Philipp Reisner1e86ac42011-08-04 10:33:08 +02001074
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001075 drbd_thread_start(&connection->asender);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001076
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001077 mutex_lock(&connection->resource->conf_update);
Philipp Reisner08b165b2011-09-05 16:22:33 +02001078 /* The discard_my_data flag is a single-shot modifier to the next
1079 * connection attempt, the handshake of which is now well underway.
1080 * No need for rcu style copying of the whole struct
1081 * just to clear a single value. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001082 connection->net_conf->discard_my_data = 0;
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001083 mutex_unlock(&connection->resource->conf_update);
Philipp Reisner08b165b2011-09-05 16:22:33 +02001084
Philipp Reisnerd3fcb492011-04-13 14:46:05 -07001085 return h;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001086
1087out_release_sockets:
Philipp Reisner7a426fd2012-07-12 14:22:37 +02001088 if (ad.s_listen)
1089 sock_release(ad.s_listen);
Philipp Reisner7da35862011-12-19 22:42:56 +01001090 if (sock.socket)
1091 sock_release(sock.socket);
1092 if (msock.socket)
1093 sock_release(msock.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001094 return -1;
1095}
1096
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001097static int decode_header(struct drbd_connection *connection, void *header, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001098{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001099 unsigned int header_size = drbd_header_size(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001100
Andreas Gruenbacher0c8e36d2011-03-30 16:00:17 +02001101 if (header_size == sizeof(struct p_header100) &&
1102 *(__be32 *)header == cpu_to_be32(DRBD_MAGIC_100)) {
1103 struct p_header100 *h = header;
1104 if (h->pad != 0) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001105 drbd_err(connection, "Header padding is not zero\n");
Andreas Gruenbacher0c8e36d2011-03-30 16:00:17 +02001106 return -EINVAL;
1107 }
1108 pi->vnr = be16_to_cpu(h->volume);
1109 pi->cmd = be16_to_cpu(h->command);
1110 pi->size = be32_to_cpu(h->length);
1111 } else if (header_size == sizeof(struct p_header95) &&
1112 *(__be16 *)header == cpu_to_be16(DRBD_MAGIC_BIG)) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001113 struct p_header95 *h = header;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001114 pi->cmd = be16_to_cpu(h->command);
Andreas Gruenbacherb55d84b2011-03-22 13:17:47 +01001115 pi->size = be32_to_cpu(h->length);
1116 pi->vnr = 0;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001117 } else if (header_size == sizeof(struct p_header80) &&
1118 *(__be32 *)header == cpu_to_be32(DRBD_MAGIC)) {
1119 struct p_header80 *h = header;
1120 pi->cmd = be16_to_cpu(h->command);
1121 pi->size = be16_to_cpu(h->length);
Philipp Reisner77351055b2011-02-07 17:24:26 +01001122 pi->vnr = 0;
Philipp Reisner02918be2010-08-20 14:35:10 +02001123 } else {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001124 drbd_err(connection, "Wrong magic value 0x%08x in protocol version %d\n",
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001125 be32_to_cpu(*(__be32 *)header),
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001126 connection->agreed_pro_version);
Andreas Gruenbacher8172f3e2011-03-16 17:22:39 +01001127 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001128 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001129 pi->data = header + header_size;
Andreas Gruenbacher8172f3e2011-03-16 17:22:39 +01001130 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001131}
1132
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001133static int drbd_recv_header(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisner257d0af2011-01-26 12:15:29 +01001134{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001135 void *buffer = connection->data.rbuf;
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001136 int err;
Philipp Reisner257d0af2011-01-26 12:15:29 +01001137
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001138 err = drbd_recv_all_warn(connection, buffer, drbd_header_size(connection));
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001139 if (err)
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001140 return err;
Philipp Reisner257d0af2011-01-26 12:15:29 +01001141
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001142 err = decode_header(connection, buffer, pi);
1143 connection->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001144
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001145 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001146}
1147
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001148static void drbd_flush(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001149{
1150 int rv;
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001151 struct drbd_peer_device *peer_device;
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001152 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001153
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001154 if (connection->write_ordering >= WO_bdev_flush) {
Lars Ellenberg615e0872011-11-17 14:32:12 +01001155 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001156 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1157 struct drbd_device *device = peer_device->device;
1158
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001159 if (!get_ldev(device))
Lars Ellenberg615e0872011-11-17 14:32:12 +01001160 continue;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001161 kref_get(&device->kref);
Lars Ellenberg615e0872011-11-17 14:32:12 +01001162 rcu_read_unlock();
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001163
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001164 rv = blkdev_issue_flush(device->ldev->backing_bdev,
Lars Ellenberg615e0872011-11-17 14:32:12 +01001165 GFP_NOIO, NULL);
1166 if (rv) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001167 drbd_info(device, "local disk flush failed with status %d\n", rv);
Lars Ellenberg615e0872011-11-17 14:32:12 +01001168 /* would rather check on EOPNOTSUPP, but that is not reliable.
1169 * don't try again for ANY return value != 0
1170 * if (rv == -EOPNOTSUPP) */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001171 drbd_bump_write_ordering(connection, WO_drain_io);
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001172 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001173 put_ldev(device);
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001174 kref_put(&device->kref, drbd_destroy_device);
Lars Ellenberg615e0872011-11-17 14:32:12 +01001175
1176 rcu_read_lock();
1177 if (rv)
1178 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001179 }
Lars Ellenberg615e0872011-11-17 14:32:12 +01001180 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07001181 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001182}
1183
1184/**
1185 * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it.
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001186 * @device: DRBD device.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001187 * @epoch: Epoch object.
1188 * @ev: Epoch event.
1189 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001190static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *connection,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001191 struct drbd_epoch *epoch,
1192 enum epoch_event ev)
1193{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001194 int epoch_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001195 struct drbd_epoch *next_epoch;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001196 enum finish_epoch rv = FE_STILL_LIVE;
1197
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001198 spin_lock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001199 do {
1200 next_epoch = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001201
1202 epoch_size = atomic_read(&epoch->epoch_size);
1203
1204 switch (ev & ~EV_CLEANUP) {
1205 case EV_PUT:
1206 atomic_dec(&epoch->active);
1207 break;
1208 case EV_GOT_BARRIER_NR:
1209 set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001210 break;
1211 case EV_BECAME_LAST:
1212 /* nothing to do*/
1213 break;
1214 }
1215
Philipp Reisnerb411b362009-09-25 16:07:19 -07001216 if (epoch_size != 0 &&
1217 atomic_read(&epoch->active) == 0 &&
Philipp Reisner80f9fd52011-07-18 15:45:15 +02001218 (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) || ev & EV_CLEANUP)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001219 if (!(ev & EV_CLEANUP)) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001220 spin_unlock(&connection->epoch_lock);
1221 drbd_send_b_ack(epoch->connection, epoch->barrier_nr, epoch_size);
1222 spin_lock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001223 }
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001224#if 0
1225 /* FIXME: dec unacked on connection, once we have
1226 * something to count pending connection packets in. */
Philipp Reisner80f9fd52011-07-18 15:45:15 +02001227 if (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags))
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001228 dec_unacked(epoch->connection);
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001229#endif
Philipp Reisnerb411b362009-09-25 16:07:19 -07001230
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001231 if (connection->current_epoch != epoch) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001232 next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
1233 list_del(&epoch->list);
1234 ev = EV_BECAME_LAST | (ev & EV_CLEANUP);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001235 connection->epochs--;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001236 kfree(epoch);
1237
1238 if (rv == FE_STILL_LIVE)
1239 rv = FE_DESTROYED;
1240 } else {
1241 epoch->flags = 0;
1242 atomic_set(&epoch->epoch_size, 0);
Uwe Kleine-König698f9312010-07-02 20:41:51 +02001243 /* atomic_set(&epoch->active, 0); is already zero */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001244 if (rv == FE_STILL_LIVE)
1245 rv = FE_RECYCLED;
1246 }
1247 }
1248
1249 if (!next_epoch)
1250 break;
1251
1252 epoch = next_epoch;
1253 } while (1);
1254
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001255 spin_unlock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001256
Philipp Reisnerb411b362009-09-25 16:07:19 -07001257 return rv;
1258}
1259
1260/**
1261 * drbd_bump_write_ordering() - Fall back to an other write ordering method
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001262 * @connection: DRBD connection.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001263 * @wo: Write ordering method to try.
1264 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001265void drbd_bump_write_ordering(struct drbd_connection *connection, enum write_ordering_e wo)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001266{
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001267 struct disk_conf *dc;
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001268 struct drbd_peer_device *peer_device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001269 enum write_ordering_e pwo;
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001270 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001271 static char *write_ordering_str[] = {
1272 [WO_none] = "none",
1273 [WO_drain_io] = "drain",
1274 [WO_bdev_flush] = "flush",
Philipp Reisnerb411b362009-09-25 16:07:19 -07001275 };
1276
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001277 pwo = connection->write_ordering;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001278 wo = min(pwo, wo);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001279 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001280 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1281 struct drbd_device *device = peer_device->device;
1282
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001283 if (!get_ldev_if_state(device, D_ATTACHING))
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001284 continue;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001285 dc = rcu_dereference(device->ldev->disk_conf);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001286
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001287 if (wo == WO_bdev_flush && !dc->disk_flushes)
1288 wo = WO_drain_io;
1289 if (wo == WO_drain_io && !dc->disk_drain)
1290 wo = WO_none;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001291 put_ldev(device);
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001292 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001293 rcu_read_unlock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001294 connection->write_ordering = wo;
1295 if (pwo != connection->write_ordering || wo == WO_bdev_flush)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001296 drbd_info(connection, "Method to ensure write ordering: %s\n", write_ordering_str[connection->write_ordering]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001297}
1298
1299/**
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001300 * drbd_submit_peer_request()
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001301 * @device: DRBD device.
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001302 * @peer_req: peer request
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001303 * @rw: flag field, see bio->bi_rw
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001304 *
1305 * May spread the pages to multiple bios,
1306 * depending on bio_add_page restrictions.
1307 *
1308 * Returns 0 if all bios have been submitted,
1309 * -ENOMEM if we could not allocate enough bios,
1310 * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a
1311 * single page to an empty bio (which should never happen and likely indicates
1312 * that the lower level IO stack is in some way broken). This has been observed
1313 * on certain Xen deployments.
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001314 */
1315/* TODO allocate from our own bio_set. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001316int drbd_submit_peer_request(struct drbd_device *device,
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001317 struct drbd_peer_request *peer_req,
1318 const unsigned rw, const int fault_type)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001319{
1320 struct bio *bios = NULL;
1321 struct bio *bio;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001322 struct page *page = peer_req->pages;
1323 sector_t sector = peer_req->i.sector;
1324 unsigned ds = peer_req->i.size;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001325 unsigned n_bios = 0;
1326 unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT;
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001327 int err = -ENOMEM;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001328
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001329 if (peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) {
1330 /* wait for all pending IO completions, before we start
1331 * zeroing things out. */
1332 conn_wait_active_ee_empty(first_peer_device(device)->connection);
1333 if (blkdev_issue_zeroout(device->ldev->backing_bdev,
1334 sector, ds >> 9, GFP_NOIO))
1335 peer_req->flags |= EE_WAS_ERROR;
1336 drbd_endio_write_sec_final(peer_req);
1337 return 0;
1338 }
1339
Lars Ellenberg54ed4ed2014-06-25 17:52:38 +02001340 /* Discards don't have any payload.
1341 * But the scsi layer still expects a bio_vec it can use internally,
1342 * see sd_setup_discard_cmnd() and blk_add_request_payload(). */
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001343 if (peer_req->flags & EE_IS_TRIM)
Lars Ellenberg54ed4ed2014-06-25 17:52:38 +02001344 nr_pages = 1;
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001345
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001346 /* In most cases, we will only need one bio. But in case the lower
1347 * level restrictions happen to be different at this offset on this
1348 * side than those of the sending peer, we may need to submit the
Lars Ellenberg9476f392011-02-23 17:02:01 +01001349 * request in more than one bio.
1350 *
1351 * Plain bio_alloc is good enough here, this is no DRBD internally
1352 * generated bio, but a bio allocated on behalf of the peer.
1353 */
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001354next_bio:
1355 bio = bio_alloc(GFP_NOIO, nr_pages);
1356 if (!bio) {
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001357 drbd_err(device, "submit_ee: Allocation of a bio failed (nr_pages=%u)\n", nr_pages);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001358 goto fail;
1359 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001360 /* > peer_req->i.sector, unless this is the first bio */
Kent Overstreet4f024f32013-10-11 15:44:27 -07001361 bio->bi_iter.bi_sector = sector;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001362 bio->bi_bdev = device->ldev->backing_bdev;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001363 bio->bi_rw = rw;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001364 bio->bi_private = peer_req;
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001365 bio->bi_end_io = drbd_peer_request_endio;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001366
1367 bio->bi_next = bios;
1368 bios = bio;
1369 ++n_bios;
1370
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001371 if (rw & REQ_DISCARD) {
1372 bio->bi_iter.bi_size = ds;
1373 goto submit;
1374 }
1375
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001376 page_chain_for_each(page) {
1377 unsigned len = min_t(unsigned, ds, PAGE_SIZE);
1378 if (!bio_add_page(bio, page, len, 0)) {
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001379 /* A single page must always be possible!
1380 * But in case it fails anyways,
1381 * we deal with it, and complain (below). */
1382 if (bio->bi_vcnt == 0) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001383 drbd_err(device,
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001384 "bio_add_page failed for len=%u, "
1385 "bi_vcnt=0 (bi_sector=%llu)\n",
Kent Overstreet4f024f32013-10-11 15:44:27 -07001386 len, (uint64_t)bio->bi_iter.bi_sector);
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001387 err = -ENOSPC;
1388 goto fail;
1389 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001390 goto next_bio;
1391 }
1392 ds -= len;
1393 sector += len >> 9;
1394 --nr_pages;
1395 }
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001396 D_ASSERT(device, ds == 0);
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001397submit:
1398 D_ASSERT(device, page == NULL);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001399
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001400 atomic_set(&peer_req->pending_bios, n_bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001401 do {
1402 bio = bios;
1403 bios = bios->bi_next;
1404 bio->bi_next = NULL;
1405
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001406 drbd_generic_make_request(device, fault_type, bio);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001407 } while (bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001408 return 0;
1409
1410fail:
1411 while (bios) {
1412 bio = bios;
1413 bios = bios->bi_next;
1414 bio_put(bio);
1415 }
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001416 return err;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001417}
1418
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001419static void drbd_remove_epoch_entry_interval(struct drbd_device *device,
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001420 struct drbd_peer_request *peer_req)
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001421{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001422 struct drbd_interval *i = &peer_req->i;
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001423
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001424 drbd_remove_interval(&device->write_requests, i);
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001425 drbd_clear_interval(i);
1426
Andreas Gruenbacher6c852be2011-02-04 15:38:52 +01001427 /* Wake up any processes waiting for this peer request to complete. */
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001428 if (i->waiting)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001429 wake_up(&device->misc_wait);
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001430}
1431
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001432static void conn_wait_active_ee_empty(struct drbd_connection *connection)
Philipp Reisner77fede52011-11-10 21:19:11 +01001433{
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001434 struct drbd_peer_device *peer_device;
Philipp Reisner77fede52011-11-10 21:19:11 +01001435 int vnr;
1436
1437 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001438 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1439 struct drbd_device *device = peer_device->device;
1440
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001441 kref_get(&device->kref);
Philipp Reisner77fede52011-11-10 21:19:11 +01001442 rcu_read_unlock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001443 drbd_wait_ee_list_empty(device, &device->active_ee);
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001444 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisner77fede52011-11-10 21:19:11 +01001445 rcu_read_lock();
1446 }
1447 rcu_read_unlock();
1448}
1449
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001450static struct drbd_peer_device *
1451conn_peer_device(struct drbd_connection *connection, int volume_number)
1452{
1453 return idr_find(&connection->peer_devices, volume_number);
1454}
1455
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001456static int receive_Barrier(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001457{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001458 int rv;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001459 struct p_barrier *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001460 struct drbd_epoch *epoch;
1461
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001462 /* FIXME these are unacked on connection,
1463 * not a specific (peer)device.
1464 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001465 connection->current_epoch->barrier_nr = p->barrier;
1466 connection->current_epoch->connection = connection;
1467 rv = drbd_may_finish_epoch(connection, connection->current_epoch, EV_GOT_BARRIER_NR);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001468
1469 /* P_BARRIER_ACK may imply that the corresponding extent is dropped from
1470 * the activity log, which means it would not be resynced in case the
1471 * R_PRIMARY crashes now.
1472 * Therefore we must send the barrier_ack after the barrier request was
1473 * completed. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001474 switch (connection->write_ordering) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001475 case WO_none:
1476 if (rv == FE_RECYCLED)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001477 return 0;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001478
1479 /* receiver context, in the writeout path of the other node.
1480 * avoid potential distributed deadlock */
1481 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1482 if (epoch)
1483 break;
1484 else
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001485 drbd_warn(connection, "Allocation of an epoch failed, slowing down\n");
Philipp Reisner2451fc32010-08-24 13:43:11 +02001486 /* Fall through */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001487
1488 case WO_bdev_flush:
1489 case WO_drain_io:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001490 conn_wait_active_ee_empty(connection);
1491 drbd_flush(connection);
Philipp Reisner2451fc32010-08-24 13:43:11 +02001492
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001493 if (atomic_read(&connection->current_epoch->epoch_size)) {
Philipp Reisner2451fc32010-08-24 13:43:11 +02001494 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1495 if (epoch)
1496 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001497 }
1498
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001499 return 0;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001500 default:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001501 drbd_err(connection, "Strangeness in connection->write_ordering %d\n", connection->write_ordering);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001502 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001503 }
1504
1505 epoch->flags = 0;
1506 atomic_set(&epoch->epoch_size, 0);
1507 atomic_set(&epoch->active, 0);
1508
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001509 spin_lock(&connection->epoch_lock);
1510 if (atomic_read(&connection->current_epoch->epoch_size)) {
1511 list_add(&epoch->list, &connection->current_epoch->list);
1512 connection->current_epoch = epoch;
1513 connection->epochs++;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001514 } else {
1515 /* The current_epoch got recycled while we allocated this one... */
1516 kfree(epoch);
1517 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001518 spin_unlock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001519
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001520 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001521}
1522
1523/* used from receive_RSDataReply (recv_resync_read)
1524 * and from receive_Data */
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +01001525static struct drbd_peer_request *
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001526read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001527 struct packet_info *pi) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001528{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001529 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001530 const sector_t capacity = drbd_get_capacity(device->this_bdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001531 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001532 struct page *page;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001533 int dgs, ds, err;
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001534 int data_size = pi->size;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001535 void *dig_in = peer_device->connection->int_dig_in;
1536 void *dig_vv = peer_device->connection->int_dig_vv;
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001537 unsigned long *data;
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001538 struct p_trim *trim = (pi->cmd == P_TRIM) ? pi->data : NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001539
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001540 dgs = 0;
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001541 if (!trim && peer_device->connection->peer_integrity_tfm) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001542 dgs = crypto_hash_digestsize(peer_device->connection->peer_integrity_tfm);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001543 /*
1544 * FIXME: Receive the incoming digest into the receive buffer
1545 * here, together with its struct p_data?
1546 */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001547 err = drbd_recv_all_warn(peer_device->connection, dig_in, dgs);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001548 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001549 return NULL;
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001550 data_size -= dgs;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001551 }
1552
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001553 if (trim) {
1554 D_ASSERT(peer_device, data_size == 0);
1555 data_size = be32_to_cpu(trim->size);
1556 }
1557
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01001558 if (!expect(IS_ALIGNED(data_size, 512)))
1559 return NULL;
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001560 /* prepare for larger trim requests. */
1561 if (!trim && !expect(data_size <= DRBD_MAX_BIO_SIZE))
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01001562 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001563
Lars Ellenberg66660322010-04-06 12:15:04 +02001564 /* even though we trust out peer,
1565 * we sometimes have to double check. */
1566 if (sector + (data_size>>9) > capacity) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001567 drbd_err(device, "request from peer beyond end of local disk: "
Lars Ellenbergfdda6542011-01-24 15:11:01 +01001568 "capacity: %llus < sector: %llus + size: %u\n",
Lars Ellenberg66660322010-04-06 12:15:04 +02001569 (unsigned long long)capacity,
1570 (unsigned long long)sector, data_size);
1571 return NULL;
1572 }
1573
Philipp Reisnerb411b362009-09-25 16:07:19 -07001574 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
1575 * "criss-cross" setup, that might cause write-out on some other DRBD,
1576 * which in turn might block on the other node at this very place. */
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001577 peer_req = drbd_alloc_peer_req(peer_device, id, sector, data_size, trim == NULL, GFP_NOIO);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001578 if (!peer_req)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001579 return NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001580
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001581 if (trim)
Lars Ellenberg81a35372012-07-30 09:00:54 +02001582 return peer_req;
Lars Ellenberga73ff322012-06-25 19:15:38 +02001583
Philipp Reisnerb411b362009-09-25 16:07:19 -07001584 ds = data_size;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001585 page = peer_req->pages;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001586 page_chain_for_each(page) {
1587 unsigned len = min_t(int, ds, PAGE_SIZE);
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001588 data = kmap(page);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001589 err = drbd_recv_all_warn(peer_device->connection, data, len);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001590 if (drbd_insert_fault(device, DRBD_FAULT_RECEIVE)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001591 drbd_err(device, "Fault injection: Corrupting data on receive\n");
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001592 data[0] = data[0] ^ (unsigned long)-1;
1593 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001594 kunmap(page);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001595 if (err) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001596 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001597 return NULL;
1598 }
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001599 ds -= len;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001600 }
1601
1602 if (dgs) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001603 drbd_csum_ee(peer_device->connection->peer_integrity_tfm, peer_req, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001604 if (memcmp(dig_in, dig_vv, dgs)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001605 drbd_err(device, "Digest integrity check FAILED: %llus +%u\n",
Lars Ellenberg470be442010-11-10 10:36:52 +01001606 (unsigned long long)sector, data_size);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001607 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001608 return NULL;
1609 }
1610 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001611 device->recv_cnt += data_size>>9;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001612 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001613}
1614
1615/* drbd_drain_block() just takes a data block
1616 * out of the socket input buffer, and discards it.
1617 */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001618static int drbd_drain_block(struct drbd_peer_device *peer_device, int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001619{
1620 struct page *page;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001621 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001622 void *data;
1623
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001624 if (!data_size)
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001625 return 0;
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001626
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001627 page = drbd_alloc_pages(peer_device, 1, 1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001628
1629 data = kmap(page);
1630 while (data_size) {
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001631 unsigned int len = min_t(int, data_size, PAGE_SIZE);
1632
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001633 err = drbd_recv_all_warn(peer_device->connection, data, len);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001634 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001635 break;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001636 data_size -= len;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001637 }
1638 kunmap(page);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001639 drbd_free_pages(peer_device->device, page, 0);
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001640 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001641}
1642
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001643static int recv_dless_read(struct drbd_peer_device *peer_device, struct drbd_request *req,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001644 sector_t sector, int data_size)
1645{
Kent Overstreet79886132013-11-23 17:19:00 -08001646 struct bio_vec bvec;
1647 struct bvec_iter iter;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001648 struct bio *bio;
Kent Overstreet79886132013-11-23 17:19:00 -08001649 int dgs, err, expect;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001650 void *dig_in = peer_device->connection->int_dig_in;
1651 void *dig_vv = peer_device->connection->int_dig_vv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001652
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001653 dgs = 0;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001654 if (peer_device->connection->peer_integrity_tfm) {
1655 dgs = crypto_hash_digestsize(peer_device->connection->peer_integrity_tfm);
1656 err = drbd_recv_all_warn(peer_device->connection, dig_in, dgs);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001657 if (err)
1658 return err;
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001659 data_size -= dgs;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001660 }
1661
Philipp Reisnerb411b362009-09-25 16:07:19 -07001662 /* optimistically update recv_cnt. if receiving fails below,
1663 * we disconnect anyways, and counters will be reset. */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001664 peer_device->device->recv_cnt += data_size>>9;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001665
1666 bio = req->master_bio;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001667 D_ASSERT(peer_device->device, sector == bio->bi_iter.bi_sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001668
Kent Overstreet79886132013-11-23 17:19:00 -08001669 bio_for_each_segment(bvec, bio, iter) {
1670 void *mapped = kmap(bvec.bv_page) + bvec.bv_offset;
1671 expect = min_t(int, data_size, bvec.bv_len);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001672 err = drbd_recv_all_warn(peer_device->connection, mapped, expect);
Kent Overstreet79886132013-11-23 17:19:00 -08001673 kunmap(bvec.bv_page);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001674 if (err)
1675 return err;
1676 data_size -= expect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001677 }
1678
1679 if (dgs) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001680 drbd_csum_bio(peer_device->connection->peer_integrity_tfm, bio, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001681 if (memcmp(dig_in, dig_vv, dgs)) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001682 drbd_err(peer_device, "Digest integrity check FAILED. Broken NICs?\n");
Andreas Gruenbacher28284ce2011-03-16 17:54:02 +01001683 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001684 }
1685 }
1686
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001687 D_ASSERT(peer_device->device, data_size == 0);
Andreas Gruenbacher28284ce2011-03-16 17:54:02 +01001688 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001689}
1690
Andreas Gruenbachera990be42011-04-06 17:56:48 +02001691/*
1692 * e_end_resync_block() is called in asender context via
1693 * drbd_finish_peer_reqs().
1694 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001695static int e_end_resync_block(struct drbd_work *w, int unused)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001696{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001697 struct drbd_peer_request *peer_req =
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001698 container_of(w, struct drbd_peer_request, w);
1699 struct drbd_peer_device *peer_device = peer_req->peer_device;
1700 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001701 sector_t sector = peer_req->i.sector;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001702 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001703
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001704 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001705
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001706 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001707 drbd_set_in_sync(device, sector, peer_req->i.size);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001708 err = drbd_send_ack(peer_device, P_RS_WRITE_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001709 } else {
1710 /* Record failure to sync */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001711 drbd_rs_failed_io(device, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001712
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001713 err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001714 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001715 dec_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001716
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001717 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001718}
1719
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001720static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t sector,
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001721 struct packet_info *pi) __releases(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001722{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001723 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001724 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001725
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001726 peer_req = read_in_block(peer_device, ID_SYNCER, sector, pi);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001727 if (!peer_req)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001728 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001729
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001730 dec_rs_pending(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001731
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001732 inc_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001733 /* corresponding dec_unacked() in e_end_resync_block()
1734 * respective _drbd_clear_done_ee */
1735
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001736 peer_req->w.cb = e_end_resync_block;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001737
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001738 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001739 list_add(&peer_req->w.list, &device->sync_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001740 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001741
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001742 atomic_add(pi->size >> 9, &device->rs_sect_ev);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001743 if (drbd_submit_peer_request(device, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0)
Andreas Gruenbachere1c1b0f2011-03-16 17:58:27 +01001744 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001745
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001746 /* don't care for the reason here */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001747 drbd_err(device, "submit failed, triggering re-connect\n");
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001748 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001749 list_del(&peer_req->w.list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001750 spin_unlock_irq(&device->resource->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02001751
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001752 drbd_free_peer_req(device, peer_req);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001753fail:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001754 put_ldev(device);
Andreas Gruenbachere1c1b0f2011-03-16 17:58:27 +01001755 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001756}
1757
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001758static struct drbd_request *
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001759find_request(struct drbd_device *device, struct rb_root *root, u64 id,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001760 sector_t sector, bool missing_ok, const char *func)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001761{
1762 struct drbd_request *req;
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001763
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001764 /* Request object according to our peer */
1765 req = (struct drbd_request *)(unsigned long)id;
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001766 if (drbd_contains_interval(root, sector, &req->i) && req->i.local)
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001767 return req;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001768 if (!missing_ok) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001769 drbd_err(device, "%s: failed to find request 0x%lx, sector %llus\n", func,
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001770 (unsigned long)id, (unsigned long long)sector);
1771 }
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001772 return NULL;
1773}
1774
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001775static int receive_DataReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001776{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001777 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001778 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001779 struct drbd_request *req;
1780 sector_t sector;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001781 int err;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001782 struct p_data *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001783
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001784 peer_device = conn_peer_device(connection, pi->vnr);
1785 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001786 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001787 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001788
1789 sector = be64_to_cpu(p->sector);
1790
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001791 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001792 req = find_request(device, &device->read_requests, p->block_id, sector, false, __func__);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001793 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001794 if (unlikely(!req))
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001795 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001796
Bart Van Assche24c48302011-05-21 18:32:29 +02001797 /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid
Philipp Reisnerb411b362009-09-25 16:07:19 -07001798 * special casing it there for the various failure cases.
1799 * still no race with drbd_fail_pending_reads */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001800 err = recv_dless_read(peer_device, req, sector, pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001801 if (!err)
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001802 req_mod(req, DATA_RECEIVED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001803 /* else: nothing. handled from drbd_disconnect...
1804 * I don't think we may complete this just yet
1805 * in case we are "on-disconnect: freeze" */
1806
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001807 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001808}
1809
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001810static int receive_RSDataReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001811{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001812 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001813 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001814 sector_t sector;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001815 int err;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001816 struct p_data *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001817
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001818 peer_device = conn_peer_device(connection, pi->vnr);
1819 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001820 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001821 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001822
1823 sector = be64_to_cpu(p->sector);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001824 D_ASSERT(device, p->block_id == ID_SYNCER);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001825
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001826 if (get_ldev(device)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001827 /* data is submitted to disk within recv_resync_read.
1828 * corresponding put_ldev done below on error,
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001829 * or in drbd_peer_request_endio. */
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001830 err = recv_resync_read(peer_device, sector, pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001831 } else {
1832 if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001833 drbd_err(device, "Can not write resync data to local disk.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001834
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001835 err = drbd_drain_block(peer_device, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001836
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001837 drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001838 }
1839
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001840 atomic_add(pi->size >> 9, &device->rs_sect_in);
Philipp Reisner778f2712010-07-06 11:14:00 +02001841
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001842 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001843}
1844
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001845static void restart_conflicting_writes(struct drbd_device *device,
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001846 sector_t sector, int size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001847{
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001848 struct drbd_interval *i;
1849 struct drbd_request *req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001850
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001851 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001852 if (!i->local)
1853 continue;
1854 req = container_of(i, struct drbd_request, i);
1855 if (req->rq_state & RQ_LOCAL_PENDING ||
1856 !(req->rq_state & RQ_POSTPONED))
1857 continue;
Lars Ellenberg2312f0b32011-11-24 10:36:25 +01001858 /* as it is RQ_POSTPONED, this will cause it to
1859 * be queued on the retry workqueue. */
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02001860 __req_mod(req, CONFLICT_RESOLVED, NULL);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001861 }
1862}
1863
Andreas Gruenbachera990be42011-04-06 17:56:48 +02001864/*
1865 * e_end_block() is called in asender context via drbd_finish_peer_reqs().
Philipp Reisnerb411b362009-09-25 16:07:19 -07001866 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001867static int e_end_block(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001868{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001869 struct drbd_peer_request *peer_req =
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001870 container_of(w, struct drbd_peer_request, w);
1871 struct drbd_peer_device *peer_device = peer_req->peer_device;
1872 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001873 sector_t sector = peer_req->i.sector;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001874 int err = 0, pcmd;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001875
Philipp Reisner303d1442011-04-13 16:24:47 -07001876 if (peer_req->flags & EE_SEND_WRITE_ACK) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001877 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001878 pcmd = (device->state.conn >= C_SYNC_SOURCE &&
1879 device->state.conn <= C_PAUSED_SYNC_T &&
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001880 peer_req->flags & EE_MAY_SET_IN_SYNC) ?
Philipp Reisnerb411b362009-09-25 16:07:19 -07001881 P_RS_WRITE_ACK : P_WRITE_ACK;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001882 err = drbd_send_ack(peer_device, pcmd, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001883 if (pcmd == P_RS_WRITE_ACK)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001884 drbd_set_in_sync(device, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001885 } else {
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001886 err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001887 /* we expect it to be marked out of sync anyways...
1888 * maybe assert this? */
1889 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001890 dec_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001891 }
1892 /* we delete from the conflict detection hash _after_ we sent out the
1893 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */
Philipp Reisner302bdea2011-04-21 11:36:49 +02001894 if (peer_req->flags & EE_IN_INTERVAL_TREE) {
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001895 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001896 D_ASSERT(device, !drbd_interval_empty(&peer_req->i));
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001897 drbd_remove_epoch_entry_interval(device, peer_req);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001898 if (peer_req->flags & EE_RESTART_REQUESTS)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001899 restart_conflicting_writes(device, sector, peer_req->i.size);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001900 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +01001901 } else
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001902 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001903
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001904 drbd_may_finish_epoch(first_peer_device(device)->connection, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001905
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001906 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001907}
1908
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001909static int e_send_ack(struct drbd_work *w, enum drbd_packet ack)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001910{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001911 struct drbd_peer_request *peer_req =
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001912 container_of(w, struct drbd_peer_request, w);
1913 struct drbd_peer_device *peer_device = peer_req->peer_device;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001914 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001915
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001916 err = drbd_send_ack(peer_device, ack, peer_req);
1917 dec_unacked(peer_device->device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001918
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001919 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001920}
1921
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02001922static int e_send_superseded(struct drbd_work *w, int unused)
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001923{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001924 return e_send_ack(w, P_SUPERSEDED);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001925}
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001926
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001927static int e_send_retry_write(struct drbd_work *w, int unused)
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001928{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001929 struct drbd_peer_request *peer_req =
1930 container_of(w, struct drbd_peer_request, w);
1931 struct drbd_connection *connection = peer_req->peer_device->connection;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001932
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001933 return e_send_ack(w, connection->agreed_pro_version >= 100 ?
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02001934 P_RETRY_WRITE : P_SUPERSEDED);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001935}
1936
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001937static bool seq_greater(u32 a, u32 b)
1938{
1939 /*
1940 * We assume 32-bit wrap-around here.
1941 * For 24-bit wrap-around, we would have to shift:
1942 * a <<= 8; b <<= 8;
1943 */
1944 return (s32)a - (s32)b > 0;
1945}
1946
1947static u32 seq_max(u32 a, u32 b)
1948{
1949 return seq_greater(a, b) ? a : b;
1950}
1951
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001952static void update_peer_seq(struct drbd_peer_device *peer_device, unsigned int peer_seq)
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001953{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001954 struct drbd_device *device = peer_device->device;
Lars Ellenberg3c13b682011-02-23 16:10:01 +01001955 unsigned int newest_peer_seq;
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001956
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001957 if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001958 spin_lock(&device->peer_seq_lock);
1959 newest_peer_seq = seq_max(device->peer_seq, peer_seq);
1960 device->peer_seq = newest_peer_seq;
1961 spin_unlock(&device->peer_seq_lock);
1962 /* wake up only if we actually changed device->peer_seq */
Lars Ellenberg3c13b682011-02-23 16:10:01 +01001963 if (peer_seq == newest_peer_seq)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001964 wake_up(&device->seq_wait);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001965 }
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001966}
1967
Lars Ellenbergd93f6302012-03-26 15:49:13 +02001968static inline int overlaps(sector_t s1, int l1, sector_t s2, int l2)
1969{
1970 return !((s1 + (l1>>9) <= s2) || (s1 >= s2 + (l2>>9)));
1971}
1972
1973/* maybe change sync_ee into interval trees as well? */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001974static bool overlapping_resync_write(struct drbd_device *device, struct drbd_peer_request *peer_req)
Lars Ellenbergd93f6302012-03-26 15:49:13 +02001975{
1976 struct drbd_peer_request *rs_req;
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001977 bool rv = 0;
1978
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001979 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001980 list_for_each_entry(rs_req, &device->sync_ee, w.list) {
Lars Ellenbergd93f6302012-03-26 15:49:13 +02001981 if (overlaps(peer_req->i.sector, peer_req->i.size,
1982 rs_req->i.sector, rs_req->i.size)) {
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001983 rv = 1;
1984 break;
1985 }
1986 }
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001987 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001988
1989 return rv;
1990}
1991
Philipp Reisnerb411b362009-09-25 16:07:19 -07001992/* Called from receive_Data.
1993 * Synchronize packets on sock with packets on msock.
1994 *
1995 * This is here so even when a P_DATA packet traveling via sock overtook an Ack
1996 * packet traveling on msock, they are still processed in the order they have
1997 * been sent.
1998 *
1999 * Note: we don't care for Ack packets overtaking P_DATA packets.
2000 *
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002001 * In case packet_seq is larger than device->peer_seq number, there are
Philipp Reisnerb411b362009-09-25 16:07:19 -07002002 * outstanding packets on the msock. We wait for them to arrive.
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002003 * In case we are the logically next packet, we update device->peer_seq
Philipp Reisnerb411b362009-09-25 16:07:19 -07002004 * ourselves. Correctly handles 32bit wrap around.
2005 *
2006 * Assume we have a 10 GBit connection, that is about 1<<30 byte per second,
2007 * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds
2008 * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have
2009 * 1<<9 == 512 seconds aka ages for the 32bit wrap around...
2010 *
2011 * returns 0 if we may process the packet,
2012 * -ERESTARTSYS if we were interrupted (by disconnect signal). */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002013static int wait_for_and_update_peer_seq(struct drbd_peer_device *peer_device, const u32 peer_seq)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002014{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002015 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002016 DEFINE_WAIT(wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002017 long timeout;
Philipp Reisnerb874d232013-10-23 10:59:16 +02002018 int ret = 0, tp;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002019
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002020 if (!test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags))
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002021 return 0;
2022
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002023 spin_lock(&device->peer_seq_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002024 for (;;) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002025 if (!seq_greater(peer_seq - 1, device->peer_seq)) {
2026 device->peer_seq = seq_max(device->peer_seq, peer_seq);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002027 break;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002028 }
Philipp Reisnerb874d232013-10-23 10:59:16 +02002029
Philipp Reisnerb411b362009-09-25 16:07:19 -07002030 if (signal_pending(current)) {
2031 ret = -ERESTARTSYS;
2032 break;
2033 }
Philipp Reisnerb874d232013-10-23 10:59:16 +02002034
2035 rcu_read_lock();
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002036 tp = rcu_dereference(first_peer_device(device)->connection->net_conf)->two_primaries;
Philipp Reisnerb874d232013-10-23 10:59:16 +02002037 rcu_read_unlock();
2038
2039 if (!tp)
2040 break;
2041
2042 /* Only need to wait if two_primaries is enabled */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002043 prepare_to_wait(&device->seq_wait, &wait, TASK_INTERRUPTIBLE);
2044 spin_unlock(&device->peer_seq_lock);
Philipp Reisner44ed1672011-04-19 17:10:19 +02002045 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002046 timeout = rcu_dereference(peer_device->connection->net_conf)->ping_timeo*HZ/10;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002047 rcu_read_unlock();
Andreas Gruenbacher71b1c1e2011-03-01 15:40:43 +01002048 timeout = schedule_timeout(timeout);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002049 spin_lock(&device->peer_seq_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002050 if (!timeout) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002051 ret = -ETIMEDOUT;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002052 drbd_err(device, "Timed out waiting for missing ack packets; disconnecting\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002053 break;
2054 }
2055 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002056 spin_unlock(&device->peer_seq_lock);
2057 finish_wait(&device->seq_wait, &wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002058 return ret;
2059}
2060
Lars Ellenberg688593c2010-11-17 22:25:03 +01002061/* see also bio_flags_to_wire()
2062 * DRBD_REQ_*, because we need to semantically map the flags to data packet
2063 * flags and back. We may replicate to other kernel versions. */
Andreas Gruenbacher81f0ffd2011-08-30 16:22:33 +02002064static unsigned long wire_flags_to_bio(u32 dpf)
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02002065{
Lars Ellenberg688593c2010-11-17 22:25:03 +01002066 return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
2067 (dpf & DP_FUA ? REQ_FUA : 0) |
2068 (dpf & DP_FLUSH ? REQ_FLUSH : 0) |
2069 (dpf & DP_DISCARD ? REQ_DISCARD : 0);
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02002070}
2071
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002072static void fail_postponed_requests(struct drbd_device *device, sector_t sector,
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002073 unsigned int size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002074{
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002075 struct drbd_interval *i;
2076
2077 repeat:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002078 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002079 struct drbd_request *req;
2080 struct bio_and_error m;
2081
2082 if (!i->local)
2083 continue;
2084 req = container_of(i, struct drbd_request, i);
2085 if (!(req->rq_state & RQ_POSTPONED))
2086 continue;
2087 req->rq_state &= ~RQ_POSTPONED;
2088 __req_mod(req, NEG_ACKED, &m);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002089 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002090 if (m.bio)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002091 complete_master_bio(device, &m);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002092 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002093 goto repeat;
2094 }
2095}
2096
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002097static int handle_write_conflicts(struct drbd_device *device,
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002098 struct drbd_peer_request *peer_req)
2099{
Andreas Gruenbachere33b32d2011-08-30 15:38:04 +02002100 struct drbd_connection *connection = peer_req->peer_device->connection;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002101 bool resolve_conflicts = test_bit(RESOLVE_CONFLICTS, &connection->flags);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002102 sector_t sector = peer_req->i.sector;
2103 const unsigned int size = peer_req->i.size;
2104 struct drbd_interval *i;
2105 bool equal;
2106 int err;
2107
2108 /*
2109 * Inserting the peer request into the write_requests tree will prevent
2110 * new conflicting local requests from being added.
2111 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002112 drbd_insert_interval(&device->write_requests, &peer_req->i);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002113
2114 repeat:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002115 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002116 if (i == &peer_req->i)
2117 continue;
2118
2119 if (!i->local) {
2120 /*
2121 * Our peer has sent a conflicting remote request; this
2122 * should not happen in a two-node setup. Wait for the
2123 * earlier peer request to complete.
2124 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002125 err = drbd_wait_misc(device, i);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002126 if (err)
2127 goto out;
2128 goto repeat;
2129 }
2130
2131 equal = i->sector == sector && i->size == size;
2132 if (resolve_conflicts) {
2133 /*
2134 * If the peer request is fully contained within the
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002135 * overlapping request, it can be considered overwritten
2136 * and thus superseded; otherwise, it will be retried
2137 * once all overlapping requests have completed.
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002138 */
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002139 bool superseded = i->sector <= sector && i->sector +
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002140 (i->size >> 9) >= sector + (size >> 9);
2141
2142 if (!equal)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002143 drbd_alert(device, "Concurrent writes detected: "
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002144 "local=%llus +%u, remote=%llus +%u, "
2145 "assuming %s came first\n",
2146 (unsigned long long)i->sector, i->size,
2147 (unsigned long long)sector, size,
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002148 superseded ? "local" : "remote");
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002149
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002150 inc_unacked(device);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002151 peer_req->w.cb = superseded ? e_send_superseded :
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002152 e_send_retry_write;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002153 list_add_tail(&peer_req->w.list, &device->done_ee);
Andreas Gruenbachere33b32d2011-08-30 15:38:04 +02002154 wake_asender(connection);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002155
2156 err = -ENOENT;
2157 goto out;
2158 } else {
2159 struct drbd_request *req =
2160 container_of(i, struct drbd_request, i);
2161
2162 if (!equal)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002163 drbd_alert(device, "Concurrent writes detected: "
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002164 "local=%llus +%u, remote=%llus +%u\n",
2165 (unsigned long long)i->sector, i->size,
2166 (unsigned long long)sector, size);
2167
2168 if (req->rq_state & RQ_LOCAL_PENDING ||
2169 !(req->rq_state & RQ_POSTPONED)) {
2170 /*
2171 * Wait for the node with the discard flag to
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002172 * decide if this request has been superseded
2173 * or needs to be retried.
2174 * Requests that have been superseded will
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002175 * disappear from the write_requests tree.
2176 *
2177 * In addition, wait for the conflicting
2178 * request to finish locally before submitting
2179 * the conflicting peer request.
2180 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002181 err = drbd_wait_misc(device, &req->i);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002182 if (err) {
Andreas Gruenbachere33b32d2011-08-30 15:38:04 +02002183 _conn_request_state(connection, NS(conn, C_TIMEOUT), CS_HARD);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002184 fail_postponed_requests(device, sector, size);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002185 goto out;
2186 }
2187 goto repeat;
2188 }
2189 /*
2190 * Remember to restart the conflicting requests after
2191 * the new peer request has completed.
2192 */
2193 peer_req->flags |= EE_RESTART_REQUESTS;
2194 }
2195 }
2196 err = 0;
2197
2198 out:
2199 if (err)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002200 drbd_remove_epoch_entry_interval(device, peer_req);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002201 return err;
2202}
2203
Philipp Reisnerb411b362009-09-25 16:07:19 -07002204/* mirrored write */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002205static int receive_Data(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002206{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002207 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002208 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002209 sector_t sector;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002210 struct drbd_peer_request *peer_req;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02002211 struct p_data *p = pi->data;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002212 u32 peer_seq = be32_to_cpu(p->seq_num);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002213 int rw = WRITE;
2214 u32 dp_flags;
Philipp Reisner302bdea2011-04-21 11:36:49 +02002215 int err, tp;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002216
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002217 peer_device = conn_peer_device(connection, pi->vnr);
2218 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002219 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002220 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002221
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002222 if (!get_ldev(device)) {
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002223 int err2;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002224
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002225 err = wait_for_and_update_peer_seq(peer_device, peer_seq);
2226 drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002227 atomic_inc(&connection->current_epoch->epoch_size);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002228 err2 = drbd_drain_block(peer_device, pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002229 if (!err)
2230 err = err2;
2231 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002232 }
2233
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01002234 /*
2235 * Corresponding put_ldev done either below (on various errors), or in
2236 * drbd_peer_request_endio, if we successfully submit the data at the
2237 * end of this function.
2238 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002239
2240 sector = be64_to_cpu(p->sector);
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02002241 peer_req = read_in_block(peer_device, p->block_id, sector, pi);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002242 if (!peer_req) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002243 put_ldev(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002244 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002245 }
2246
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002247 peer_req->w.cb = e_end_block;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002248
Lars Ellenberg688593c2010-11-17 22:25:03 +01002249 dp_flags = be32_to_cpu(p->dp_flags);
Andreas Gruenbacher81f0ffd2011-08-30 16:22:33 +02002250 rw |= wire_flags_to_bio(dp_flags);
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02002251 if (pi->cmd == P_TRIM) {
2252 struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev);
2253 peer_req->flags |= EE_IS_TRIM;
2254 if (!blk_queue_discard(q))
2255 peer_req->flags |= EE_IS_TRIM_USE_ZEROOUT;
2256 D_ASSERT(peer_device, peer_req->i.size > 0);
2257 D_ASSERT(peer_device, rw & REQ_DISCARD);
2258 D_ASSERT(peer_device, peer_req->pages == NULL);
2259 } else if (peer_req->pages == NULL) {
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02002260 D_ASSERT(device, peer_req->i.size == 0);
2261 D_ASSERT(device, dp_flags & DP_FLUSH);
Lars Ellenberga73ff322012-06-25 19:15:38 +02002262 }
Lars Ellenberg688593c2010-11-17 22:25:03 +01002263
2264 if (dp_flags & DP_MAY_SET_IN_SYNC)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002265 peer_req->flags |= EE_MAY_SET_IN_SYNC;
Lars Ellenberg688593c2010-11-17 22:25:03 +01002266
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002267 spin_lock(&connection->epoch_lock);
2268 peer_req->epoch = connection->current_epoch;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002269 atomic_inc(&peer_req->epoch->epoch_size);
2270 atomic_inc(&peer_req->epoch->active);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002271 spin_unlock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002272
Philipp Reisner302bdea2011-04-21 11:36:49 +02002273 rcu_read_lock();
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002274 tp = rcu_dereference(peer_device->connection->net_conf)->two_primaries;
Philipp Reisner302bdea2011-04-21 11:36:49 +02002275 rcu_read_unlock();
2276 if (tp) {
2277 peer_req->flags |= EE_IN_INTERVAL_TREE;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002278 err = wait_for_and_update_peer_seq(peer_device, peer_seq);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002279 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002280 goto out_interrupted;
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002281 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002282 err = handle_write_conflicts(device, peer_req);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002283 if (err) {
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002284 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002285 if (err == -ENOENT) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002286 put_ldev(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002287 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002288 }
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002289 goto out_interrupted;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002290 }
Philipp Reisnerb874d232013-10-23 10:59:16 +02002291 } else {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002292 update_peer_seq(peer_device, peer_seq);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002293 spin_lock_irq(&device->resource->req_lock);
Philipp Reisnerb874d232013-10-23 10:59:16 +02002294 }
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02002295 /* if we use the zeroout fallback code, we process synchronously
2296 * and we wait for all pending requests, respectively wait for
2297 * active_ee to become empty in drbd_submit_peer_request();
2298 * better not add ourselves here. */
2299 if ((peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) == 0)
2300 list_add(&peer_req->w.list, &device->active_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002301 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002302
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002303 if (device->state.conn == C_SYNC_TARGET)
2304 wait_event(device->ee_wait, !overlapping_resync_write(device, peer_req));
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01002305
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002306 if (peer_device->connection->agreed_pro_version < 100) {
Philipp Reisner44ed1672011-04-19 17:10:19 +02002307 rcu_read_lock();
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002308 switch (rcu_dereference(peer_device->connection->net_conf)->wire_protocol) {
Philipp Reisner303d1442011-04-13 16:24:47 -07002309 case DRBD_PROT_C:
2310 dp_flags |= DP_SEND_WRITE_ACK;
2311 break;
2312 case DRBD_PROT_B:
2313 dp_flags |= DP_SEND_RECEIVE_ACK;
2314 break;
2315 }
Philipp Reisner44ed1672011-04-19 17:10:19 +02002316 rcu_read_unlock();
Philipp Reisner303d1442011-04-13 16:24:47 -07002317 }
2318
2319 if (dp_flags & DP_SEND_WRITE_ACK) {
2320 peer_req->flags |= EE_SEND_WRITE_ACK;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002321 inc_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002322 /* corresponding dec_unacked() in e_end_block()
2323 * respective _drbd_clear_done_ee */
Philipp Reisner303d1442011-04-13 16:24:47 -07002324 }
2325
2326 if (dp_flags & DP_SEND_RECEIVE_ACK) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002327 /* I really don't like it that the receiver thread
2328 * sends on the msock, but anyways */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002329 drbd_send_ack(first_peer_device(device), P_RECV_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002330 }
2331
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002332 if (device->state.pdsk < D_INCONSISTENT) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002333 /* In case we have the only disk of the cluster, */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002334 drbd_set_out_of_sync(device, peer_req->i.sector, peer_req->i.size);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002335 peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
2336 peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002337 drbd_al_begin_io(device, &peer_req->i, true);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002338 }
2339
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002340 err = drbd_submit_peer_request(device, peer_req, rw, DRBD_FAULT_DT_WR);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002341 if (!err)
2342 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002343
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002344 /* don't care for the reason here */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002345 drbd_err(device, "submit failed, triggering re-connect\n");
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002346 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002347 list_del(&peer_req->w.list);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002348 drbd_remove_epoch_entry_interval(device, peer_req);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002349 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002350 if (peer_req->flags & EE_CALL_AL_COMPLETE_IO)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002351 drbd_al_complete_io(device, &peer_req->i);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002352
Philipp Reisnerb411b362009-09-25 16:07:19 -07002353out_interrupted:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002354 drbd_may_finish_epoch(connection, peer_req->epoch, EV_PUT + EV_CLEANUP);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002355 put_ldev(device);
2356 drbd_free_peer_req(device, peer_req);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002357 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002358}
2359
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002360/* We may throttle resync, if the lower device seems to be busy,
2361 * and current sync rate is above c_min_rate.
2362 *
2363 * To decide whether or not the lower device is busy, we use a scheme similar
2364 * to MD RAID is_mddev_idle(): if the partition stats reveal "significant"
2365 * (more than 64 sectors) of activity we cannot account for with our own resync
2366 * activity, it obviously is "busy".
2367 *
2368 * The current sync rate used here uses only the most recent two step marks,
2369 * to have a short time average so we can react faster.
2370 */
Lars Ellenberge8299872014-04-28 18:43:19 +02002371bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector)
2372{
2373 struct lc_element *tmp;
2374 bool throttle = true;
2375
2376 if (!drbd_rs_c_min_rate_throttle(device))
2377 return false;
2378
2379 spin_lock_irq(&device->al_lock);
2380 tmp = lc_find(device->resync, BM_SECT_TO_EXT(sector));
2381 if (tmp) {
2382 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
2383 if (test_bit(BME_PRIORITY, &bm_ext->flags))
2384 throttle = false;
2385 /* Do not slow down if app IO is already waiting for this extent */
2386 }
2387 spin_unlock_irq(&device->al_lock);
2388
2389 return throttle;
2390}
2391
2392bool drbd_rs_c_min_rate_throttle(struct drbd_device *device)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002393{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002394 struct gendisk *disk = device->ldev->backing_bdev->bd_contains->bd_disk;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002395 unsigned long db, dt, dbdt;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002396 unsigned int c_min_rate;
Lars Ellenberge8299872014-04-28 18:43:19 +02002397 int curr_events;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002398
2399 rcu_read_lock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002400 c_min_rate = rcu_dereference(device->ldev->disk_conf)->c_min_rate;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002401 rcu_read_unlock();
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002402
2403 /* feature disabled? */
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002404 if (c_min_rate == 0)
Lars Ellenberge8299872014-04-28 18:43:19 +02002405 return false;
Philipp Reisnere3555d82010-11-07 15:56:29 +01002406
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002407 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
2408 (int)part_stat_read(&disk->part0, sectors[1]) -
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002409 atomic_read(&device->rs_sect_ev);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002410 if (!device->rs_last_events || curr_events - device->rs_last_events > 64) {
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002411 unsigned long rs_left;
2412 int i;
2413
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002414 device->rs_last_events = curr_events;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002415
2416 /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
2417 * approx. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002418 i = (device->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
Lars Ellenberg2649f082010-11-05 10:05:47 +01002419
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002420 if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T)
2421 rs_left = device->ov_left;
Lars Ellenberg2649f082010-11-05 10:05:47 +01002422 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002423 rs_left = drbd_bm_total_weight(device) - device->rs_failed;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002424
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002425 dt = ((long)jiffies - (long)device->rs_mark_time[i]) / HZ;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002426 if (!dt)
2427 dt++;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002428 db = device->rs_mark_left[i] - rs_left;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002429 dbdt = Bit2KB(db/dt);
2430
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002431 if (dbdt > c_min_rate)
Lars Ellenberge8299872014-04-28 18:43:19 +02002432 return true;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002433 }
Lars Ellenberge8299872014-04-28 18:43:19 +02002434 return false;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002435}
2436
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002437static int receive_DataRequest(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002438{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002439 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002440 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002441 sector_t sector;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002442 sector_t capacity;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002443 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002444 struct digest_info *di = NULL;
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002445 int size, verb;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002446 unsigned int fault_type;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02002447 struct p_block_req *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002448
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002449 peer_device = conn_peer_device(connection, pi->vnr);
2450 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002451 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002452 device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002453 capacity = drbd_get_capacity(device->this_bdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002454
2455 sector = be64_to_cpu(p->sector);
2456 size = be32_to_cpu(p->blksize);
2457
Andreas Gruenbacherc670a392011-02-21 12:41:39 +01002458 if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002459 drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
Philipp Reisnerb411b362009-09-25 16:07:19 -07002460 (unsigned long long)sector, size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002461 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002462 }
2463 if (sector + (size>>9) > capacity) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002464 drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
Philipp Reisnerb411b362009-09-25 16:07:19 -07002465 (unsigned long long)sector, size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002466 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002467 }
2468
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002469 if (!get_ldev_if_state(device, D_UP_TO_DATE)) {
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002470 verb = 1;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002471 switch (pi->cmd) {
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002472 case P_DATA_REQUEST:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002473 drbd_send_ack_rp(peer_device, P_NEG_DREPLY, p);
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002474 break;
2475 case P_RS_DATA_REQUEST:
2476 case P_CSUM_RS_REQUEST:
2477 case P_OV_REQUEST:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002478 drbd_send_ack_rp(peer_device, P_NEG_RS_DREPLY , p);
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002479 break;
2480 case P_OV_REPLY:
2481 verb = 0;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002482 dec_rs_pending(device);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002483 drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size, ID_IN_SYNC);
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002484 break;
2485 default:
Andreas Gruenbacher49ba9b12011-03-25 00:35:45 +01002486 BUG();
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002487 }
2488 if (verb && __ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002489 drbd_err(device, "Can not satisfy peer's read request, "
Philipp Reisnerb411b362009-09-25 16:07:19 -07002490 "no local data.\n");
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002491
Lars Ellenberga821cc42010-09-06 12:31:37 +02002492 /* drain possibly payload */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002493 return drbd_drain_block(peer_device, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002494 }
2495
2496 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
2497 * "criss-cross" setup, that might cause write-out on some other DRBD,
2498 * which in turn might block on the other node at this very place. */
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02002499 peer_req = drbd_alloc_peer_req(peer_device, p->block_id, sector, size,
2500 true /* has real payload */, GFP_NOIO);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002501 if (!peer_req) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002502 put_ldev(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002503 return -ENOMEM;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002504 }
2505
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002506 switch (pi->cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002507 case P_DATA_REQUEST:
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002508 peer_req->w.cb = w_e_end_data_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002509 fault_type = DRBD_FAULT_DT_RD;
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002510 /* application IO, don't drbd_rs_begin_io */
2511 goto submit;
2512
Philipp Reisnerb411b362009-09-25 16:07:19 -07002513 case P_RS_DATA_REQUEST:
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002514 peer_req->w.cb = w_e_end_rsdata_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002515 fault_type = DRBD_FAULT_RS_RD;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002516 /* used in the sector offset progress display */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002517 device->bm_resync_fo = BM_SECT_TO_BIT(sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002518 break;
2519
2520 case P_OV_REPLY:
2521 case P_CSUM_RS_REQUEST:
2522 fault_type = DRBD_FAULT_RS_RD;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002523 di = kmalloc(sizeof(*di) + pi->size, GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002524 if (!di)
2525 goto out_free_e;
2526
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002527 di->digest_size = pi->size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002528 di->digest = (((char *)di)+sizeof(struct digest_info));
2529
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002530 peer_req->digest = di;
2531 peer_req->flags |= EE_HAS_DIGEST;
Lars Ellenbergc36c3ce2010-08-11 20:42:55 +02002532
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002533 if (drbd_recv_all(peer_device->connection, di->digest, pi->size))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002534 goto out_free_e;
2535
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002536 if (pi->cmd == P_CSUM_RS_REQUEST) {
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002537 D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002538 peer_req->w.cb = w_e_end_csum_rs_req;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002539 /* used in the sector offset progress display */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002540 device->bm_resync_fo = BM_SECT_TO_BIT(sector);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002541 } else if (pi->cmd == P_OV_REPLY) {
Lars Ellenberg2649f082010-11-05 10:05:47 +01002542 /* track progress, we may need to throttle */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002543 atomic_add(size >> 9, &device->rs_sect_in);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002544 peer_req->w.cb = w_e_end_ov_reply;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002545 dec_rs_pending(device);
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002546 /* drbd_rs_begin_io done when we sent this request,
2547 * but accounting still needs to be done. */
2548 goto submit_for_resync;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002549 }
2550 break;
2551
2552 case P_OV_REQUEST:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002553 if (device->ov_start_sector == ~(sector_t)0 &&
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002554 peer_device->connection->agreed_pro_version >= 90) {
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002555 unsigned long now = jiffies;
2556 int i;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002557 device->ov_start_sector = sector;
2558 device->ov_position = sector;
2559 device->ov_left = drbd_bm_bits(device) - BM_SECT_TO_BIT(sector);
2560 device->rs_total = device->ov_left;
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002561 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002562 device->rs_mark_left[i] = device->ov_left;
2563 device->rs_mark_time[i] = now;
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002564 }
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002565 drbd_info(device, "Online Verify start sector: %llu\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07002566 (unsigned long long)sector);
2567 }
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002568 peer_req->w.cb = w_e_end_ov_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002569 fault_type = DRBD_FAULT_RS_RD;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002570 break;
2571
Philipp Reisnerb411b362009-09-25 16:07:19 -07002572 default:
Andreas Gruenbacher49ba9b12011-03-25 00:35:45 +01002573 BUG();
Philipp Reisnerb411b362009-09-25 16:07:19 -07002574 }
2575
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002576 /* Throttle, drbd_rs_begin_io and submit should become asynchronous
2577 * wrt the receiver, but it is not as straightforward as it may seem.
2578 * Various places in the resync start and stop logic assume resync
2579 * requests are processed in order, requeuing this on the worker thread
2580 * introduces a bunch of new code for synchronization between threads.
2581 *
2582 * Unlimited throttling before drbd_rs_begin_io may stall the resync
2583 * "forever", throttling after drbd_rs_begin_io will lock that extent
2584 * for application writes for the same time. For now, just throttle
2585 * here, where the rest of the code expects the receiver to sleep for
2586 * a while, anyways.
2587 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002588
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002589 /* Throttle before drbd_rs_begin_io, as that locks out application IO;
2590 * this defers syncer requests for some time, before letting at least
2591 * on request through. The resync controller on the receiving side
2592 * will adapt to the incoming rate accordingly.
2593 *
2594 * We cannot throttle here if remote is Primary/SyncTarget:
2595 * we would also throttle its application reads.
2596 * In that case, throttling is done on the SyncTarget only.
2597 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002598 if (device->state.peer != R_PRIMARY && drbd_rs_should_slow_down(device, sector))
Philipp Reisnere3555d82010-11-07 15:56:29 +01002599 schedule_timeout_uninterruptible(HZ/10);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002600 if (drbd_rs_begin_io(device, sector))
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002601 goto out_free_e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002602
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002603submit_for_resync:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002604 atomic_add(size >> 9, &device->rs_sect_ev);
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002605
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002606submit:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002607 inc_unacked(device);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002608 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002609 list_add_tail(&peer_req->w.list, &device->read_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002610 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002611
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002612 if (drbd_submit_peer_request(device, peer_req, READ, fault_type) == 0)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002613 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002614
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002615 /* don't care for the reason here */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002616 drbd_err(device, "submit failed, triggering re-connect\n");
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002617 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002618 list_del(&peer_req->w.list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002619 spin_unlock_irq(&device->resource->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002620 /* no drbd_rs_complete_io(), we are dropping the connection anyways */
2621
Philipp Reisnerb411b362009-09-25 16:07:19 -07002622out_free_e:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002623 put_ldev(device);
2624 drbd_free_peer_req(device, peer_req);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002625 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002626}
2627
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002628/**
2629 * drbd_asb_recover_0p - Recover after split-brain with no remaining primaries
2630 */
2631static int drbd_asb_recover_0p(struct drbd_peer_device *peer_device) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002632{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002633 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002634 int self, peer, rv = -100;
2635 unsigned long ch_self, ch_peer;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002636 enum drbd_after_sb_p after_sb_0p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002637
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002638 self = device->ldev->md.uuid[UI_BITMAP] & 1;
2639 peer = device->p_uuid[UI_BITMAP] & 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002640
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002641 ch_peer = device->p_uuid[UI_SIZE];
2642 ch_self = device->comm_bm_set;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002643
Philipp Reisner44ed1672011-04-19 17:10:19 +02002644 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002645 after_sb_0p = rcu_dereference(peer_device->connection->net_conf)->after_sb_0p;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002646 rcu_read_unlock();
2647 switch (after_sb_0p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002648 case ASB_CONSENSUS:
2649 case ASB_DISCARD_SECONDARY:
2650 case ASB_CALL_HELPER:
Philipp Reisner44ed1672011-04-19 17:10:19 +02002651 case ASB_VIOLENTLY:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002652 drbd_err(device, "Configuration error.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002653 break;
2654 case ASB_DISCONNECT:
2655 break;
2656 case ASB_DISCARD_YOUNGER_PRI:
2657 if (self == 0 && peer == 1) {
2658 rv = -1;
2659 break;
2660 }
2661 if (self == 1 && peer == 0) {
2662 rv = 1;
2663 break;
2664 }
2665 /* Else fall through to one of the other strategies... */
2666 case ASB_DISCARD_OLDER_PRI:
2667 if (self == 0 && peer == 1) {
2668 rv = 1;
2669 break;
2670 }
2671 if (self == 1 && peer == 0) {
2672 rv = -1;
2673 break;
2674 }
2675 /* Else fall through to one of the other strategies... */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002676 drbd_warn(device, "Discard younger/older primary did not find a decision\n"
Philipp Reisnerb411b362009-09-25 16:07:19 -07002677 "Using discard-least-changes instead\n");
2678 case ASB_DISCARD_ZERO_CHG:
2679 if (ch_peer == 0 && ch_self == 0) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002680 rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002681 ? -1 : 1;
2682 break;
2683 } else {
2684 if (ch_peer == 0) { rv = 1; break; }
2685 if (ch_self == 0) { rv = -1; break; }
2686 }
Philipp Reisner44ed1672011-04-19 17:10:19 +02002687 if (after_sb_0p == ASB_DISCARD_ZERO_CHG)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002688 break;
2689 case ASB_DISCARD_LEAST_CHG:
2690 if (ch_self < ch_peer)
2691 rv = -1;
2692 else if (ch_self > ch_peer)
2693 rv = 1;
2694 else /* ( ch_self == ch_peer ) */
2695 /* Well, then use something else. */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002696 rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002697 ? -1 : 1;
2698 break;
2699 case ASB_DISCARD_LOCAL:
2700 rv = -1;
2701 break;
2702 case ASB_DISCARD_REMOTE:
2703 rv = 1;
2704 }
2705
2706 return rv;
2707}
2708
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002709/**
2710 * drbd_asb_recover_1p - Recover after split-brain with one remaining primary
2711 */
2712static int drbd_asb_recover_1p(struct drbd_peer_device *peer_device) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002713{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002714 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002715 int hg, rv = -100;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002716 enum drbd_after_sb_p after_sb_1p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002717
Philipp Reisner44ed1672011-04-19 17:10:19 +02002718 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002719 after_sb_1p = rcu_dereference(peer_device->connection->net_conf)->after_sb_1p;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002720 rcu_read_unlock();
2721 switch (after_sb_1p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002722 case ASB_DISCARD_YOUNGER_PRI:
2723 case ASB_DISCARD_OLDER_PRI:
2724 case ASB_DISCARD_LEAST_CHG:
2725 case ASB_DISCARD_LOCAL:
2726 case ASB_DISCARD_REMOTE:
Philipp Reisner44ed1672011-04-19 17:10:19 +02002727 case ASB_DISCARD_ZERO_CHG:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002728 drbd_err(device, "Configuration error.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002729 break;
2730 case ASB_DISCONNECT:
2731 break;
2732 case ASB_CONSENSUS:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002733 hg = drbd_asb_recover_0p(peer_device);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002734 if (hg == -1 && device->state.role == R_SECONDARY)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002735 rv = hg;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002736 if (hg == 1 && device->state.role == R_PRIMARY)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002737 rv = hg;
2738 break;
2739 case ASB_VIOLENTLY:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002740 rv = drbd_asb_recover_0p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002741 break;
2742 case ASB_DISCARD_SECONDARY:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002743 return device->state.role == R_PRIMARY ? 1 : -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002744 case ASB_CALL_HELPER:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002745 hg = drbd_asb_recover_0p(peer_device);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002746 if (hg == -1 && device->state.role == R_PRIMARY) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002747 enum drbd_state_rv rv2;
2748
Philipp Reisnerb411b362009-09-25 16:07:19 -07002749 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2750 * we might be here in C_WF_REPORT_PARAMS which is transient.
2751 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002752 rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002753 if (rv2 != SS_SUCCESS) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002754 drbd_khelper(device, "pri-lost-after-sb");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002755 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002756 drbd_warn(device, "Successfully gave up primary role.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002757 rv = hg;
2758 }
2759 } else
2760 rv = hg;
2761 }
2762
2763 return rv;
2764}
2765
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002766/**
2767 * drbd_asb_recover_2p - Recover after split-brain with two remaining primaries
2768 */
2769static int drbd_asb_recover_2p(struct drbd_peer_device *peer_device) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002770{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002771 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002772 int hg, rv = -100;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002773 enum drbd_after_sb_p after_sb_2p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002774
Philipp Reisner44ed1672011-04-19 17:10:19 +02002775 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002776 after_sb_2p = rcu_dereference(peer_device->connection->net_conf)->after_sb_2p;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002777 rcu_read_unlock();
2778 switch (after_sb_2p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002779 case ASB_DISCARD_YOUNGER_PRI:
2780 case ASB_DISCARD_OLDER_PRI:
2781 case ASB_DISCARD_LEAST_CHG:
2782 case ASB_DISCARD_LOCAL:
2783 case ASB_DISCARD_REMOTE:
2784 case ASB_CONSENSUS:
2785 case ASB_DISCARD_SECONDARY:
Philipp Reisner44ed1672011-04-19 17:10:19 +02002786 case ASB_DISCARD_ZERO_CHG:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002787 drbd_err(device, "Configuration error.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002788 break;
2789 case ASB_VIOLENTLY:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002790 rv = drbd_asb_recover_0p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002791 break;
2792 case ASB_DISCONNECT:
2793 break;
2794 case ASB_CALL_HELPER:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002795 hg = drbd_asb_recover_0p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002796 if (hg == -1) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002797 enum drbd_state_rv rv2;
2798
Philipp Reisnerb411b362009-09-25 16:07:19 -07002799 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2800 * we might be here in C_WF_REPORT_PARAMS which is transient.
2801 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002802 rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002803 if (rv2 != SS_SUCCESS) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002804 drbd_khelper(device, "pri-lost-after-sb");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002805 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002806 drbd_warn(device, "Successfully gave up primary role.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002807 rv = hg;
2808 }
2809 } else
2810 rv = hg;
2811 }
2812
2813 return rv;
2814}
2815
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002816static void drbd_uuid_dump(struct drbd_device *device, char *text, u64 *uuid,
Philipp Reisnerb411b362009-09-25 16:07:19 -07002817 u64 bits, u64 flags)
2818{
2819 if (!uuid) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002820 drbd_info(device, "%s uuid info vanished while I was looking!\n", text);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002821 return;
2822 }
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002823 drbd_info(device, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07002824 text,
2825 (unsigned long long)uuid[UI_CURRENT],
2826 (unsigned long long)uuid[UI_BITMAP],
2827 (unsigned long long)uuid[UI_HISTORY_START],
2828 (unsigned long long)uuid[UI_HISTORY_END],
2829 (unsigned long long)bits,
2830 (unsigned long long)flags);
2831}
2832
2833/*
2834 100 after split brain try auto recover
2835 2 C_SYNC_SOURCE set BitMap
2836 1 C_SYNC_SOURCE use BitMap
2837 0 no Sync
2838 -1 C_SYNC_TARGET use BitMap
2839 -2 C_SYNC_TARGET set BitMap
2840 -100 after split brain, disconnect
2841-1000 unrelated data
Philipp Reisner4a23f262011-01-11 17:42:17 +01002842-1091 requires proto 91
2843-1096 requires proto 96
Philipp Reisnerb411b362009-09-25 16:07:19 -07002844 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002845static int drbd_uuid_compare(struct drbd_device *device, int *rule_nr) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002846{
2847 u64 self, peer;
2848 int i, j;
2849
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002850 self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2851 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002852
2853 *rule_nr = 10;
2854 if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED)
2855 return 0;
2856
2857 *rule_nr = 20;
2858 if ((self == UUID_JUST_CREATED || self == (u64)0) &&
2859 peer != UUID_JUST_CREATED)
2860 return -2;
2861
2862 *rule_nr = 30;
2863 if (self != UUID_JUST_CREATED &&
2864 (peer == UUID_JUST_CREATED || peer == (u64)0))
2865 return 2;
2866
2867 if (self == peer) {
2868 int rct, dc; /* roles at crash time */
2869
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002870 if (device->p_uuid[UI_BITMAP] == (u64)0 && device->ldev->md.uuid[UI_BITMAP] != (u64)0) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002871
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002872 if (first_peer_device(device)->connection->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002873 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002874
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002875 if ((device->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
2876 (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002877 drbd_info(device, "was SyncSource, missed the resync finished event, corrected myself:\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002878 drbd_uuid_move_history(device);
2879 device->ldev->md.uuid[UI_HISTORY_START] = device->ldev->md.uuid[UI_BITMAP];
2880 device->ldev->md.uuid[UI_BITMAP] = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002881
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002882 drbd_uuid_dump(device, "self", device->ldev->md.uuid,
2883 device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002884 *rule_nr = 34;
2885 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002886 drbd_info(device, "was SyncSource (peer failed to write sync_uuid)\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002887 *rule_nr = 36;
2888 }
2889
2890 return 1;
2891 }
2892
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002893 if (device->ldev->md.uuid[UI_BITMAP] == (u64)0 && device->p_uuid[UI_BITMAP] != (u64)0) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002894
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002895 if (first_peer_device(device)->connection->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002896 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002897
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002898 if ((device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_BITMAP] & ~((u64)1)) &&
2899 (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002900 drbd_info(device, "was SyncTarget, peer missed the resync finished event, corrected peer:\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002901
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002902 device->p_uuid[UI_HISTORY_START + 1] = device->p_uuid[UI_HISTORY_START];
2903 device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_BITMAP];
2904 device->p_uuid[UI_BITMAP] = 0UL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002905
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002906 drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002907 *rule_nr = 35;
2908 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002909 drbd_info(device, "was SyncTarget (failed to write sync_uuid)\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002910 *rule_nr = 37;
2911 }
2912
2913 return -1;
2914 }
2915
2916 /* Common power [off|failure] */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002917 rct = (test_bit(CRASHED_PRIMARY, &device->flags) ? 1 : 0) +
2918 (device->p_uuid[UI_FLAGS] & 2);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002919 /* lowest bit is set when we were primary,
2920 * next bit (weight 2) is set when peer was primary */
2921 *rule_nr = 40;
2922
2923 switch (rct) {
2924 case 0: /* !self_pri && !peer_pri */ return 0;
2925 case 1: /* self_pri && !peer_pri */ return 1;
2926 case 2: /* !self_pri && peer_pri */ return -1;
2927 case 3: /* self_pri && peer_pri */
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002928 dc = test_bit(RESOLVE_CONFLICTS, &first_peer_device(device)->connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002929 return dc ? -1 : 1;
2930 }
2931 }
2932
2933 *rule_nr = 50;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002934 peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002935 if (self == peer)
2936 return -1;
2937
2938 *rule_nr = 51;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002939 peer = device->p_uuid[UI_HISTORY_START] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002940 if (self == peer) {
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002941 if (first_peer_device(device)->connection->agreed_pro_version < 96 ?
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002942 (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
2943 (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
2944 peer + UUID_NEW_BM_OFFSET == (device->p_uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002945 /* The last P_SYNC_UUID did not get though. Undo the last start of
2946 resync as sync source modifications of the peer's UUIDs. */
2947
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002948 if (first_peer_device(device)->connection->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002949 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002950
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002951 device->p_uuid[UI_BITMAP] = device->p_uuid[UI_HISTORY_START];
2952 device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_HISTORY_START + 1];
Philipp Reisner4a23f262011-01-11 17:42:17 +01002953
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002954 drbd_info(device, "Lost last syncUUID packet, corrected:\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002955 drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
Philipp Reisner4a23f262011-01-11 17:42:17 +01002956
Philipp Reisnerb411b362009-09-25 16:07:19 -07002957 return -1;
2958 }
2959 }
2960
2961 *rule_nr = 60;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002962 self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002963 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002964 peer = device->p_uuid[i] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002965 if (self == peer)
2966 return -2;
2967 }
2968
2969 *rule_nr = 70;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002970 self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2971 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002972 if (self == peer)
2973 return 1;
2974
2975 *rule_nr = 71;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002976 self = device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002977 if (self == peer) {
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002978 if (first_peer_device(device)->connection->agreed_pro_version < 96 ?
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002979 (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
2980 (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
2981 self + UUID_NEW_BM_OFFSET == (device->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002982 /* The last P_SYNC_UUID did not get though. Undo the last start of
2983 resync as sync source modifications of our UUIDs. */
2984
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002985 if (first_peer_device(device)->connection->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002986 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002987
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002988 __drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_HISTORY_START]);
2989 __drbd_uuid_set(device, UI_HISTORY_START, device->ldev->md.uuid[UI_HISTORY_START + 1]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002990
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002991 drbd_info(device, "Last syncUUID did not get through, corrected:\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002992 drbd_uuid_dump(device, "self", device->ldev->md.uuid,
2993 device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002994
2995 return 1;
2996 }
2997 }
2998
2999
3000 *rule_nr = 80;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003001 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003002 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003003 self = device->ldev->md.uuid[i] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003004 if (self == peer)
3005 return 2;
3006 }
3007
3008 *rule_nr = 90;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003009 self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
3010 peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003011 if (self == peer && self != ((u64)0))
3012 return 100;
3013
3014 *rule_nr = 100;
3015 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003016 self = device->ldev->md.uuid[i] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003017 for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003018 peer = device->p_uuid[j] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003019 if (self == peer)
3020 return -100;
3021 }
3022 }
3023
3024 return -1000;
3025}
3026
3027/* drbd_sync_handshake() returns the new conn state on success, or
3028 CONN_MASK (-1) on failure.
3029 */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003030static enum drbd_conns drbd_sync_handshake(struct drbd_peer_device *peer_device,
3031 enum drbd_role peer_role,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003032 enum drbd_disk_state peer_disk) __must_hold(local)
3033{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003034 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003035 enum drbd_conns rv = C_MASK;
3036 enum drbd_disk_state mydisk;
Philipp Reisner44ed1672011-04-19 17:10:19 +02003037 struct net_conf *nc;
Andreas Gruenbacher6dff2902011-06-28 14:18:12 +02003038 int hg, rule_nr, rr_conflict, tentative;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003039
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003040 mydisk = device->state.disk;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003041 if (mydisk == D_NEGOTIATING)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003042 mydisk = device->new_state_tmp.disk;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003043
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003044 drbd_info(device, "drbd_sync_handshake:\n");
Philipp Reisner9f2247b2012-08-16 14:25:58 +02003045
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003046 spin_lock_irq(&device->ldev->md.uuid_lock);
3047 drbd_uuid_dump(device, "self", device->ldev->md.uuid, device->comm_bm_set, 0);
3048 drbd_uuid_dump(device, "peer", device->p_uuid,
3049 device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003050
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003051 hg = drbd_uuid_compare(device, &rule_nr);
3052 spin_unlock_irq(&device->ldev->md.uuid_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003053
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003054 drbd_info(device, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003055
3056 if (hg == -1000) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003057 drbd_alert(device, "Unrelated data, aborting!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003058 return C_MASK;
3059 }
Philipp Reisner4a23f262011-01-11 17:42:17 +01003060 if (hg < -1000) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003061 drbd_alert(device, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003062 return C_MASK;
3063 }
3064
3065 if ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) ||
3066 (peer_disk == D_INCONSISTENT && mydisk > D_INCONSISTENT)) {
3067 int f = (hg == -100) || abs(hg) == 2;
3068 hg = mydisk > D_INCONSISTENT ? 1 : -1;
3069 if (f)
3070 hg = hg*2;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003071 drbd_info(device, "Becoming sync %s due to disk states.\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07003072 hg > 0 ? "source" : "target");
3073 }
3074
Adam Gandelman3a11a482010-04-08 16:48:23 -07003075 if (abs(hg) == 100)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003076 drbd_khelper(device, "initial-split-brain");
Adam Gandelman3a11a482010-04-08 16:48:23 -07003077
Philipp Reisner44ed1672011-04-19 17:10:19 +02003078 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003079 nc = rcu_dereference(peer_device->connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +02003080
3081 if (hg == 100 || (hg == -100 && nc->always_asbp)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003082 int pcount = (device->state.role == R_PRIMARY)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003083 + (peer_role == R_PRIMARY);
3084 int forced = (hg == -100);
3085
3086 switch (pcount) {
3087 case 0:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003088 hg = drbd_asb_recover_0p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003089 break;
3090 case 1:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003091 hg = drbd_asb_recover_1p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003092 break;
3093 case 2:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003094 hg = drbd_asb_recover_2p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003095 break;
3096 }
3097 if (abs(hg) < 100) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003098 drbd_warn(device, "Split-Brain detected, %d primaries, "
Philipp Reisnerb411b362009-09-25 16:07:19 -07003099 "automatically solved. Sync from %s node\n",
3100 pcount, (hg < 0) ? "peer" : "this");
3101 if (forced) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003102 drbd_warn(device, "Doing a full sync, since"
Philipp Reisnerb411b362009-09-25 16:07:19 -07003103 " UUIDs where ambiguous.\n");
3104 hg = hg*2;
3105 }
3106 }
3107 }
3108
3109 if (hg == -100) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003110 if (test_bit(DISCARD_MY_DATA, &device->flags) && !(device->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003111 hg = -1;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003112 if (!test_bit(DISCARD_MY_DATA, &device->flags) && (device->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003113 hg = 1;
3114
3115 if (abs(hg) < 100)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003116 drbd_warn(device, "Split-Brain detected, manually solved. "
Philipp Reisnerb411b362009-09-25 16:07:19 -07003117 "Sync from %s node\n",
3118 (hg < 0) ? "peer" : "this");
3119 }
Philipp Reisner44ed1672011-04-19 17:10:19 +02003120 rr_conflict = nc->rr_conflict;
Andreas Gruenbacher6dff2902011-06-28 14:18:12 +02003121 tentative = nc->tentative;
Philipp Reisner44ed1672011-04-19 17:10:19 +02003122 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07003123
3124 if (hg == -100) {
Lars Ellenberg580b9762010-02-26 23:15:23 +01003125 /* FIXME this log message is not correct if we end up here
3126 * after an attempted attach on a diskless node.
3127 * We just refuse to attach -- well, we drop the "connection"
3128 * to that disk, in a way... */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003129 drbd_alert(device, "Split-Brain detected but unresolved, dropping connection!\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003130 drbd_khelper(device, "split-brain");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003131 return C_MASK;
3132 }
3133
3134 if (hg > 0 && mydisk <= D_INCONSISTENT) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003135 drbd_err(device, "I shall become SyncSource, but I am inconsistent!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003136 return C_MASK;
3137 }
3138
3139 if (hg < 0 && /* by intention we do not use mydisk here. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003140 device->state.role == R_PRIMARY && device->state.disk >= D_CONSISTENT) {
Philipp Reisner44ed1672011-04-19 17:10:19 +02003141 switch (rr_conflict) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003142 case ASB_CALL_HELPER:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003143 drbd_khelper(device, "pri-lost");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003144 /* fall through */
3145 case ASB_DISCONNECT:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003146 drbd_err(device, "I shall become SyncTarget, but I am primary!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003147 return C_MASK;
3148 case ASB_VIOLENTLY:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003149 drbd_warn(device, "Becoming SyncTarget, violating the stable-data"
Philipp Reisnerb411b362009-09-25 16:07:19 -07003150 "assumption\n");
3151 }
3152 }
3153
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003154 if (tentative || test_bit(CONN_DRY_RUN, &peer_device->connection->flags)) {
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003155 if (hg == 0)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003156 drbd_info(device, "dry-run connect: No resync, would become Connected immediately.\n");
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003157 else
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003158 drbd_info(device, "dry-run connect: Would become %s, doing a %s resync.",
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003159 drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET),
3160 abs(hg) >= 2 ? "full" : "bit-map based");
3161 return C_MASK;
3162 }
3163
Philipp Reisnerb411b362009-09-25 16:07:19 -07003164 if (abs(hg) >= 2) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003165 drbd_info(device, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003166 if (drbd_bitmap_io(device, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003167 BM_LOCKED_SET_ALLOWED))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003168 return C_MASK;
3169 }
3170
3171 if (hg > 0) { /* become sync source. */
3172 rv = C_WF_BITMAP_S;
3173 } else if (hg < 0) { /* become sync target */
3174 rv = C_WF_BITMAP_T;
3175 } else {
3176 rv = C_CONNECTED;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003177 if (drbd_bm_total_weight(device)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003178 drbd_info(device, "No resync, but %lu bits in bitmap!\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003179 drbd_bm_total_weight(device));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003180 }
3181 }
3182
3183 return rv;
3184}
3185
Philipp Reisnerf179d762011-05-16 17:31:47 +02003186static enum drbd_after_sb_p convert_after_sb(enum drbd_after_sb_p peer)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003187{
3188 /* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */
Philipp Reisnerf179d762011-05-16 17:31:47 +02003189 if (peer == ASB_DISCARD_REMOTE)
3190 return ASB_DISCARD_LOCAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003191
3192 /* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */
Philipp Reisnerf179d762011-05-16 17:31:47 +02003193 if (peer == ASB_DISCARD_LOCAL)
3194 return ASB_DISCARD_REMOTE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003195
3196 /* everything else is valid if they are equal on both sides. */
Philipp Reisnerf179d762011-05-16 17:31:47 +02003197 return peer;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003198}
3199
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003200static int receive_protocol(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003201{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003202 struct p_protocol *p = pi->data;
Philipp Reisner036b17e2011-05-16 17:38:11 +02003203 enum drbd_after_sb_p p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
3204 int p_proto, p_discard_my_data, p_two_primaries, cf;
3205 struct net_conf *nc, *old_net_conf, *new_net_conf = NULL;
3206 char integrity_alg[SHARED_SECRET_MAX] = "";
Andreas Gruenbacheraccdbcc2011-07-15 17:41:09 +02003207 struct crypto_hash *peer_integrity_tfm = NULL;
Philipp Reisner7aca6c72011-05-17 10:12:56 +02003208 void *int_dig_in = NULL, *int_dig_vv = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003209
Philipp Reisnerb411b362009-09-25 16:07:19 -07003210 p_proto = be32_to_cpu(p->protocol);
3211 p_after_sb_0p = be32_to_cpu(p->after_sb_0p);
3212 p_after_sb_1p = be32_to_cpu(p->after_sb_1p);
3213 p_after_sb_2p = be32_to_cpu(p->after_sb_2p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003214 p_two_primaries = be32_to_cpu(p->two_primaries);
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003215 cf = be32_to_cpu(p->conn_flags);
Andreas Gruenbacher6139f602011-05-06 20:00:02 +02003216 p_discard_my_data = cf & CF_DISCARD_MY_DATA;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003217
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003218 if (connection->agreed_pro_version >= 87) {
Andreas Gruenbacher86db0612011-04-28 15:24:18 +02003219 int err;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003220
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02003221 if (pi->size > sizeof(integrity_alg))
Andreas Gruenbacher86db0612011-04-28 15:24:18 +02003222 return -EIO;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003223 err = drbd_recv_all(connection, integrity_alg, pi->size);
Andreas Gruenbacher86db0612011-04-28 15:24:18 +02003224 if (err)
3225 return err;
Philipp Reisner036b17e2011-05-16 17:38:11 +02003226 integrity_alg[SHARED_SECRET_MAX - 1] = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003227 }
3228
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003229 if (pi->cmd != P_PROTOCOL_UPDATE) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003230 clear_bit(CONN_DRY_RUN, &connection->flags);
Philipp Reisner036b17e2011-05-16 17:38:11 +02003231
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003232 if (cf & CF_DRY_RUN)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003233 set_bit(CONN_DRY_RUN, &connection->flags);
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003234
3235 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003236 nc = rcu_dereference(connection->net_conf);
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003237
3238 if (p_proto != nc->wire_protocol) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003239 drbd_err(connection, "incompatible %s settings\n", "protocol");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003240 goto disconnect_rcu_unlock;
3241 }
3242
3243 if (convert_after_sb(p_after_sb_0p) != nc->after_sb_0p) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003244 drbd_err(connection, "incompatible %s settings\n", "after-sb-0pri");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003245 goto disconnect_rcu_unlock;
3246 }
3247
3248 if (convert_after_sb(p_after_sb_1p) != nc->after_sb_1p) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003249 drbd_err(connection, "incompatible %s settings\n", "after-sb-1pri");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003250 goto disconnect_rcu_unlock;
3251 }
3252
3253 if (convert_after_sb(p_after_sb_2p) != nc->after_sb_2p) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003254 drbd_err(connection, "incompatible %s settings\n", "after-sb-2pri");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003255 goto disconnect_rcu_unlock;
3256 }
3257
3258 if (p_discard_my_data && nc->discard_my_data) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003259 drbd_err(connection, "incompatible %s settings\n", "discard-my-data");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003260 goto disconnect_rcu_unlock;
3261 }
3262
3263 if (p_two_primaries != nc->two_primaries) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003264 drbd_err(connection, "incompatible %s settings\n", "allow-two-primaries");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003265 goto disconnect_rcu_unlock;
3266 }
3267
3268 if (strcmp(integrity_alg, nc->integrity_alg)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003269 drbd_err(connection, "incompatible %s settings\n", "data-integrity-alg");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003270 goto disconnect_rcu_unlock;
3271 }
3272
3273 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07003274 }
3275
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003276 if (integrity_alg[0]) {
3277 int hash_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003278
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003279 /*
3280 * We can only change the peer data integrity algorithm
3281 * here. Changing our own data integrity algorithm
3282 * requires that we send a P_PROTOCOL_UPDATE packet at
3283 * the same time; otherwise, the peer has no way to
3284 * tell between which packets the algorithm should
3285 * change.
3286 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003287
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003288 peer_integrity_tfm = crypto_alloc_hash(integrity_alg, 0, CRYPTO_ALG_ASYNC);
3289 if (!peer_integrity_tfm) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003290 drbd_err(connection, "peer data-integrity-alg %s not supported\n",
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003291 integrity_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003292 goto disconnect;
3293 }
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003294
3295 hash_size = crypto_hash_digestsize(peer_integrity_tfm);
3296 int_dig_in = kmalloc(hash_size, GFP_KERNEL);
3297 int_dig_vv = kmalloc(hash_size, GFP_KERNEL);
3298 if (!(int_dig_in && int_dig_vv)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003299 drbd_err(connection, "Allocation of buffers for data integrity checking failed\n");
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003300 goto disconnect;
3301 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003302 }
3303
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003304 new_net_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL);
3305 if (!new_net_conf) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003306 drbd_err(connection, "Allocation of new net_conf failed\n");
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003307 goto disconnect;
3308 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003309
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003310 mutex_lock(&connection->data.mutex);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003311 mutex_lock(&connection->resource->conf_update);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003312 old_net_conf = connection->net_conf;
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003313 *new_net_conf = *old_net_conf;
3314
3315 new_net_conf->wire_protocol = p_proto;
3316 new_net_conf->after_sb_0p = convert_after_sb(p_after_sb_0p);
3317 new_net_conf->after_sb_1p = convert_after_sb(p_after_sb_1p);
3318 new_net_conf->after_sb_2p = convert_after_sb(p_after_sb_2p);
3319 new_net_conf->two_primaries = p_two_primaries;
3320
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003321 rcu_assign_pointer(connection->net_conf, new_net_conf);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003322 mutex_unlock(&connection->resource->conf_update);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003323 mutex_unlock(&connection->data.mutex);
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003324
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003325 crypto_free_hash(connection->peer_integrity_tfm);
3326 kfree(connection->int_dig_in);
3327 kfree(connection->int_dig_vv);
3328 connection->peer_integrity_tfm = peer_integrity_tfm;
3329 connection->int_dig_in = int_dig_in;
3330 connection->int_dig_vv = int_dig_vv;
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003331
3332 if (strcmp(old_net_conf->integrity_alg, integrity_alg))
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003333 drbd_info(connection, "peer data-integrity-alg: %s\n",
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003334 integrity_alg[0] ? integrity_alg : "(none)");
3335
3336 synchronize_rcu();
3337 kfree(old_net_conf);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003338 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003339
Philipp Reisner44ed1672011-04-19 17:10:19 +02003340disconnect_rcu_unlock:
3341 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07003342disconnect:
Andreas Gruenbacherb792c352011-07-15 16:48:49 +02003343 crypto_free_hash(peer_integrity_tfm);
Philipp Reisner036b17e2011-05-16 17:38:11 +02003344 kfree(int_dig_in);
3345 kfree(int_dig_vv);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003346 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003347 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003348}
3349
3350/* helper function
3351 * input: alg name, feature name
3352 * return: NULL (alg name was "")
3353 * ERR_PTR(error) if something goes wrong
3354 * or the crypto hash ptr, if it worked out ok. */
Rashika Kheriaf63e6312013-12-19 15:11:09 +05303355static
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003356struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_device *device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003357 const char *alg, const char *name)
3358{
3359 struct crypto_hash *tfm;
3360
3361 if (!alg[0])
3362 return NULL;
3363
3364 tfm = crypto_alloc_hash(alg, 0, CRYPTO_ALG_ASYNC);
3365 if (IS_ERR(tfm)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003366 drbd_err(device, "Can not allocate \"%s\" as %s (reason: %ld)\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07003367 alg, name, PTR_ERR(tfm));
3368 return tfm;
3369 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003370 return tfm;
3371}
3372
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003373static int ignore_remaining_packet(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003374{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003375 void *buffer = connection->data.rbuf;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003376 int size = pi->size;
3377
3378 while (size) {
3379 int s = min_t(int, size, DRBD_SOCKET_BUFFER_SIZE);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003380 s = drbd_recv(connection, buffer, s);
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003381 if (s <= 0) {
3382 if (s < 0)
3383 return s;
3384 break;
3385 }
3386 size -= s;
3387 }
3388 if (size)
3389 return -EIO;
3390 return 0;
3391}
3392
3393/*
3394 * config_unknown_volume - device configuration command for unknown volume
3395 *
3396 * When a device is added to an existing connection, the node on which the
3397 * device is added first will send configuration commands to its peer but the
3398 * peer will not know about the device yet. It will warn and ignore these
3399 * commands. Once the device is added on the second node, the second node will
3400 * send the same device configuration commands, but in the other direction.
3401 *
3402 * (We can also end up here if drbd is misconfigured.)
3403 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003404static int config_unknown_volume(struct drbd_connection *connection, struct packet_info *pi)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003405{
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003406 drbd_warn(connection, "%s packet received for volume %u, which is not configured locally\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02003407 cmdname(pi->cmd), pi->vnr);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003408 return ignore_remaining_packet(connection, pi);
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003409}
3410
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003411static int receive_SyncParam(struct drbd_connection *connection, struct packet_info *pi)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003412{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003413 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003414 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003415 struct p_rs_param_95 *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003416 unsigned int header_size, data_size, exp_max_sz;
3417 struct crypto_hash *verify_tfm = NULL;
3418 struct crypto_hash *csums_tfm = NULL;
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003419 struct net_conf *old_net_conf, *new_net_conf = NULL;
Philipp Reisner813472c2011-05-03 16:47:02 +02003420 struct disk_conf *old_disk_conf = NULL, *new_disk_conf = NULL;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003421 const int apv = connection->agreed_pro_version;
Philipp Reisner813472c2011-05-03 16:47:02 +02003422 struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
Philipp Reisner778f2712010-07-06 11:14:00 +02003423 int fifo_size = 0;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003424 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003425
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003426 peer_device = conn_peer_device(connection, pi->vnr);
3427 if (!peer_device)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003428 return config_unknown_volume(connection, pi);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003429 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003430
3431 exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param)
3432 : apv == 88 ? sizeof(struct p_rs_param)
3433 + SHARED_SECRET_MAX
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003434 : apv <= 94 ? sizeof(struct p_rs_param_89)
3435 : /* apv >= 95 */ sizeof(struct p_rs_param_95);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003436
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003437 if (pi->size > exp_max_sz) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003438 drbd_err(device, "SyncParam packet too long: received %u, expected <= %u bytes\n",
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003439 pi->size, exp_max_sz);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003440 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003441 }
3442
3443 if (apv <= 88) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003444 header_size = sizeof(struct p_rs_param);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003445 data_size = pi->size - header_size;
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003446 } else if (apv <= 94) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003447 header_size = sizeof(struct p_rs_param_89);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003448 data_size = pi->size - header_size;
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02003449 D_ASSERT(device, data_size == 0);
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003450 } else {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003451 header_size = sizeof(struct p_rs_param_95);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003452 data_size = pi->size - header_size;
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02003453 D_ASSERT(device, data_size == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003454 }
3455
3456 /* initialize verify_alg and csums_alg */
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003457 p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003458 memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
3459
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003460 err = drbd_recv_all(peer_device->connection, p, header_size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003461 if (err)
3462 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003463
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003464 mutex_lock(&connection->resource->conf_update);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003465 old_net_conf = peer_device->connection->net_conf;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003466 if (get_ldev(device)) {
Philipp Reisner813472c2011-05-03 16:47:02 +02003467 new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3468 if (!new_disk_conf) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003469 put_ldev(device);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003470 mutex_unlock(&connection->resource->conf_update);
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003471 drbd_err(device, "Allocation of new disk_conf failed\n");
Philipp Reisner813472c2011-05-03 16:47:02 +02003472 return -ENOMEM;
3473 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003474
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003475 old_disk_conf = device->ldev->disk_conf;
Philipp Reisner813472c2011-05-03 16:47:02 +02003476 *new_disk_conf = *old_disk_conf;
3477
Andreas Gruenbacher6394b932011-05-11 14:29:52 +02003478 new_disk_conf->resync_rate = be32_to_cpu(p->resync_rate);
Philipp Reisner813472c2011-05-03 16:47:02 +02003479 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003480
3481 if (apv >= 88) {
3482 if (apv == 88) {
Philipp Reisner5de73822012-03-28 10:17:32 +02003483 if (data_size > SHARED_SECRET_MAX || data_size == 0) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003484 drbd_err(device, "verify-alg of wrong size, "
Philipp Reisner5de73822012-03-28 10:17:32 +02003485 "peer wants %u, accepting only up to %u byte\n",
3486 data_size, SHARED_SECRET_MAX);
Philipp Reisner813472c2011-05-03 16:47:02 +02003487 err = -EIO;
3488 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003489 }
3490
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003491 err = drbd_recv_all(peer_device->connection, p->verify_alg, data_size);
Philipp Reisner813472c2011-05-03 16:47:02 +02003492 if (err)
3493 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003494 /* we expect NUL terminated string */
3495 /* but just in case someone tries to be evil */
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02003496 D_ASSERT(device, p->verify_alg[data_size-1] == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003497 p->verify_alg[data_size-1] = 0;
3498
3499 } else /* apv >= 89 */ {
3500 /* we still expect NUL terminated strings */
3501 /* but just in case someone tries to be evil */
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02003502 D_ASSERT(device, p->verify_alg[SHARED_SECRET_MAX-1] == 0);
3503 D_ASSERT(device, p->csums_alg[SHARED_SECRET_MAX-1] == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003504 p->verify_alg[SHARED_SECRET_MAX-1] = 0;
3505 p->csums_alg[SHARED_SECRET_MAX-1] = 0;
3506 }
3507
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003508 if (strcmp(old_net_conf->verify_alg, p->verify_alg)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003509 if (device->state.conn == C_WF_REPORT_PARAMS) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003510 drbd_err(device, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003511 old_net_conf->verify_alg, p->verify_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003512 goto disconnect;
3513 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003514 verify_tfm = drbd_crypto_alloc_digest_safe(device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003515 p->verify_alg, "verify-alg");
3516 if (IS_ERR(verify_tfm)) {
3517 verify_tfm = NULL;
3518 goto disconnect;
3519 }
3520 }
3521
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003522 if (apv >= 89 && strcmp(old_net_conf->csums_alg, p->csums_alg)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003523 if (device->state.conn == C_WF_REPORT_PARAMS) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003524 drbd_err(device, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003525 old_net_conf->csums_alg, p->csums_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003526 goto disconnect;
3527 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003528 csums_tfm = drbd_crypto_alloc_digest_safe(device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003529 p->csums_alg, "csums-alg");
3530 if (IS_ERR(csums_tfm)) {
3531 csums_tfm = NULL;
3532 goto disconnect;
3533 }
3534 }
3535
Philipp Reisner813472c2011-05-03 16:47:02 +02003536 if (apv > 94 && new_disk_conf) {
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003537 new_disk_conf->c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
3538 new_disk_conf->c_delay_target = be32_to_cpu(p->c_delay_target);
3539 new_disk_conf->c_fill_target = be32_to_cpu(p->c_fill_target);
3540 new_disk_conf->c_max_rate = be32_to_cpu(p->c_max_rate);
Philipp Reisner778f2712010-07-06 11:14:00 +02003541
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003542 fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003543 if (fifo_size != device->rs_plan_s->size) {
Philipp Reisner813472c2011-05-03 16:47:02 +02003544 new_plan = fifo_alloc(fifo_size);
3545 if (!new_plan) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003546 drbd_err(device, "kmalloc of fifo_buffer failed");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003547 put_ldev(device);
Philipp Reisner778f2712010-07-06 11:14:00 +02003548 goto disconnect;
3549 }
3550 }
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003551 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003552
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003553 if (verify_tfm || csums_tfm) {
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003554 new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
3555 if (!new_net_conf) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003556 drbd_err(device, "Allocation of new net_conf failed\n");
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003557 goto disconnect;
3558 }
3559
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003560 *new_net_conf = *old_net_conf;
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003561
3562 if (verify_tfm) {
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003563 strcpy(new_net_conf->verify_alg, p->verify_alg);
3564 new_net_conf->verify_alg_len = strlen(p->verify_alg) + 1;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003565 crypto_free_hash(peer_device->connection->verify_tfm);
3566 peer_device->connection->verify_tfm = verify_tfm;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003567 drbd_info(device, "using verify-alg: \"%s\"\n", p->verify_alg);
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003568 }
3569 if (csums_tfm) {
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003570 strcpy(new_net_conf->csums_alg, p->csums_alg);
3571 new_net_conf->csums_alg_len = strlen(p->csums_alg) + 1;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003572 crypto_free_hash(peer_device->connection->csums_tfm);
3573 peer_device->connection->csums_tfm = csums_tfm;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003574 drbd_info(device, "using csums-alg: \"%s\"\n", p->csums_alg);
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003575 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003576 rcu_assign_pointer(connection->net_conf, new_net_conf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003577 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003578 }
3579
Philipp Reisner813472c2011-05-03 16:47:02 +02003580 if (new_disk_conf) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003581 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
3582 put_ldev(device);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003583 }
Philipp Reisner813472c2011-05-03 16:47:02 +02003584
3585 if (new_plan) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003586 old_plan = device->rs_plan_s;
3587 rcu_assign_pointer(device->rs_plan_s, new_plan);
Philipp Reisner813472c2011-05-03 16:47:02 +02003588 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003589
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003590 mutex_unlock(&connection->resource->conf_update);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003591 synchronize_rcu();
3592 if (new_net_conf)
3593 kfree(old_net_conf);
3594 kfree(old_disk_conf);
Philipp Reisner813472c2011-05-03 16:47:02 +02003595 kfree(old_plan);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003596
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003597 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003598
Philipp Reisner813472c2011-05-03 16:47:02 +02003599reconnect:
3600 if (new_disk_conf) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003601 put_ldev(device);
Philipp Reisner813472c2011-05-03 16:47:02 +02003602 kfree(new_disk_conf);
3603 }
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003604 mutex_unlock(&connection->resource->conf_update);
Philipp Reisner813472c2011-05-03 16:47:02 +02003605 return -EIO;
3606
Philipp Reisnerb411b362009-09-25 16:07:19 -07003607disconnect:
Philipp Reisner813472c2011-05-03 16:47:02 +02003608 kfree(new_plan);
3609 if (new_disk_conf) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003610 put_ldev(device);
Philipp Reisner813472c2011-05-03 16:47:02 +02003611 kfree(new_disk_conf);
3612 }
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003613 mutex_unlock(&connection->resource->conf_update);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003614 /* just for completeness: actually not needed,
3615 * as this is not reached if csums_tfm was ok. */
3616 crypto_free_hash(csums_tfm);
3617 /* but free the verify_tfm again, if csums_tfm did not work out */
3618 crypto_free_hash(verify_tfm);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003619 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003620 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003621}
3622
Philipp Reisnerb411b362009-09-25 16:07:19 -07003623/* warn if the arguments differ by more than 12.5% */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003624static void warn_if_differ_considerably(struct drbd_device *device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003625 const char *s, sector_t a, sector_t b)
3626{
3627 sector_t d;
3628 if (a == 0 || b == 0)
3629 return;
3630 d = (a > b) ? (a - b) : (b - a);
3631 if (d > (a>>3) || d > (b>>3))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003632 drbd_warn(device, "Considerable difference in %s: %llus vs. %llus\n", s,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003633 (unsigned long long)a, (unsigned long long)b);
3634}
3635
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003636static int receive_sizes(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003637{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003638 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003639 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003640 struct p_sizes *p = pi->data;
Philipp Reisnere96c9632013-06-25 16:50:07 +02003641 enum determine_dev_size dd = DS_UNCHANGED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003642 sector_t p_size, p_usize, my_usize;
3643 int ldsc = 0; /* local disk size changed */
Philipp Reisnere89b5912010-03-24 17:11:33 +01003644 enum dds_flags ddsf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003645
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003646 peer_device = conn_peer_device(connection, pi->vnr);
3647 if (!peer_device)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003648 return config_unknown_volume(connection, pi);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003649 device = peer_device->device;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003650
Philipp Reisnerb411b362009-09-25 16:07:19 -07003651 p_size = be64_to_cpu(p->d_size);
3652 p_usize = be64_to_cpu(p->u_size);
3653
Philipp Reisnerb411b362009-09-25 16:07:19 -07003654 /* just store the peer's disk size for now.
3655 * we still need to figure out whether we accept that. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003656 device->p_size = p_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003657
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003658 if (get_ldev(device)) {
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003659 rcu_read_lock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003660 my_usize = rcu_dereference(device->ldev->disk_conf)->disk_size;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003661 rcu_read_unlock();
3662
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003663 warn_if_differ_considerably(device, "lower level device sizes",
3664 p_size, drbd_get_max_capacity(device->ldev));
3665 warn_if_differ_considerably(device, "user requested size",
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003666 p_usize, my_usize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003667
3668 /* if this is the first connect, or an otherwise expected
3669 * param exchange, choose the minimum */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003670 if (device->state.conn == C_WF_REPORT_PARAMS)
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003671 p_usize = min_not_zero(my_usize, p_usize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003672
3673 /* Never shrink a device with usable data during connect.
3674 But allow online shrinking if we are connected. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003675 if (drbd_new_dev_size(device, device->ldev, p_usize, 0) <
3676 drbd_get_capacity(device->this_bdev) &&
3677 device->state.disk >= D_OUTDATED &&
3678 device->state.conn < C_CONNECTED) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003679 drbd_err(device, "The peer's disk size is too small!\n");
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003680 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003681 put_ldev(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003682 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003683 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003684
3685 if (my_usize != p_usize) {
3686 struct disk_conf *old_disk_conf, *new_disk_conf = NULL;
3687
3688 new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3689 if (!new_disk_conf) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003690 drbd_err(device, "Allocation of new disk_conf failed\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003691 put_ldev(device);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003692 return -ENOMEM;
3693 }
3694
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003695 mutex_lock(&connection->resource->conf_update);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003696 old_disk_conf = device->ldev->disk_conf;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003697 *new_disk_conf = *old_disk_conf;
3698 new_disk_conf->disk_size = p_usize;
3699
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003700 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003701 mutex_unlock(&connection->resource->conf_update);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003702 synchronize_rcu();
3703 kfree(old_disk_conf);
3704
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003705 drbd_info(device, "Peer sets u_size to %lu sectors\n",
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003706 (unsigned long)my_usize);
3707 }
3708
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003709 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003710 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003711
Lars Ellenberg20c68fd2014-04-28 18:43:25 +02003712 device->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
3713 drbd_reconsider_max_bio_size(device);
3714 /* Leave drbd_reconsider_max_bio_size() before drbd_determine_dev_size().
3715 In case we cleared the QUEUE_FLAG_DISCARD from our queue in
3716 drbd_reconsider_max_bio_size(), we can be sure that after
3717 drbd_determine_dev_size() no REQ_DISCARDs are in the queue. */
3718
Philipp Reisnere89b5912010-03-24 17:11:33 +01003719 ddsf = be16_to_cpu(p->dds_flags);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003720 if (get_ldev(device)) {
3721 dd = drbd_determine_dev_size(device, ddsf, NULL);
3722 put_ldev(device);
Philipp Reisnere96c9632013-06-25 16:50:07 +02003723 if (dd == DS_ERROR)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003724 return -EIO;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003725 drbd_md_sync(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003726 } else {
3727 /* I am diskless, need to accept the peer's size. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003728 drbd_set_my_capacity(device, p_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003729 }
3730
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003731 if (get_ldev(device)) {
3732 if (device->ldev->known_size != drbd_get_capacity(device->ldev->backing_bdev)) {
3733 device->ldev->known_size = drbd_get_capacity(device->ldev->backing_bdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003734 ldsc = 1;
3735 }
3736
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003737 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003738 }
3739
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003740 if (device->state.conn > C_WF_REPORT_PARAMS) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003741 if (be64_to_cpu(p->c_size) !=
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003742 drbd_get_capacity(device->this_bdev) || ldsc) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003743 /* we have different sizes, probably peer
3744 * needs to know my new size... */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003745 drbd_send_sizes(peer_device, 0, ddsf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003746 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003747 if (test_and_clear_bit(RESIZE_PENDING, &device->flags) ||
3748 (dd == DS_GREW && device->state.conn == C_CONNECTED)) {
3749 if (device->state.pdsk >= D_INCONSISTENT &&
3750 device->state.disk >= D_INCONSISTENT) {
Philipp Reisnere89b5912010-03-24 17:11:33 +01003751 if (ddsf & DDSF_NO_RESYNC)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003752 drbd_info(device, "Resync of new storage suppressed with --assume-clean\n");
Philipp Reisnere89b5912010-03-24 17:11:33 +01003753 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003754 resync_after_online_grow(device);
Philipp Reisnere89b5912010-03-24 17:11:33 +01003755 } else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003756 set_bit(RESYNC_AFTER_NEG, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003757 }
3758 }
3759
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003760 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003761}
3762
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003763static int receive_uuids(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003764{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003765 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003766 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003767 struct p_uuids *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003768 u64 *p_uuid;
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003769 int i, updated_uuids = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003770
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003771 peer_device = conn_peer_device(connection, pi->vnr);
3772 if (!peer_device)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003773 return config_unknown_volume(connection, pi);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003774 device = peer_device->device;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003775
Philipp Reisnerb411b362009-09-25 16:07:19 -07003776 p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO);
Jing Wang063eacf2012-10-25 15:00:56 +08003777 if (!p_uuid) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003778 drbd_err(device, "kmalloc of p_uuid failed\n");
Jing Wang063eacf2012-10-25 15:00:56 +08003779 return false;
3780 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003781
3782 for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
3783 p_uuid[i] = be64_to_cpu(p->uuid[i]);
3784
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003785 kfree(device->p_uuid);
3786 device->p_uuid = p_uuid;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003787
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003788 if (device->state.conn < C_CONNECTED &&
3789 device->state.disk < D_INCONSISTENT &&
3790 device->state.role == R_PRIMARY &&
3791 (device->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003792 drbd_err(device, "Can only connect to data with current UUID=%016llX\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003793 (unsigned long long)device->ed_uuid);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003794 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003795 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003796 }
3797
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003798 if (get_ldev(device)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003799 int skip_initial_sync =
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003800 device->state.conn == C_CONNECTED &&
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003801 peer_device->connection->agreed_pro_version >= 90 &&
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003802 device->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003803 (p_uuid[UI_FLAGS] & 8);
3804 if (skip_initial_sync) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003805 drbd_info(device, "Accepted new current UUID, preparing to skip initial sync\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003806 drbd_bitmap_io(device, &drbd_bmio_clear_n_write,
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003807 "clear_n_write from receive_uuids",
3808 BM_LOCKED_TEST_ALLOWED);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003809 _drbd_uuid_set(device, UI_CURRENT, p_uuid[UI_CURRENT]);
3810 _drbd_uuid_set(device, UI_BITMAP, 0);
3811 _drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
Philipp Reisnerb411b362009-09-25 16:07:19 -07003812 CS_VERBOSE, NULL);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003813 drbd_md_sync(device);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003814 updated_uuids = 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003815 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003816 put_ldev(device);
3817 } else if (device->state.disk < D_INCONSISTENT &&
3818 device->state.role == R_PRIMARY) {
Philipp Reisner18a50fa2010-06-21 14:14:15 +02003819 /* I am a diskless primary, the peer just created a new current UUID
3820 for me. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003821 updated_uuids = drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003822 }
3823
3824 /* Before we test for the disk state, we should wait until an eventually
3825 ongoing cluster wide state change is finished. That is important if
3826 we are primary and are detaching from our disk. We need to see the
3827 new disk state... */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003828 mutex_lock(device->state_mutex);
3829 mutex_unlock(device->state_mutex);
3830 if (device->state.conn >= C_CONNECTED && device->state.disk < D_INCONSISTENT)
3831 updated_uuids |= drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003832
3833 if (updated_uuids)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003834 drbd_print_uuids(device, "receiver updated UUIDs to");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003835
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003836 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003837}
3838
3839/**
3840 * convert_state() - Converts the peer's view of the cluster state to our point of view
3841 * @ps: The state as seen by the peer.
3842 */
3843static union drbd_state convert_state(union drbd_state ps)
3844{
3845 union drbd_state ms;
3846
3847 static enum drbd_conns c_tab[] = {
Philipp Reisner369bea62011-07-06 23:04:44 +02003848 [C_WF_REPORT_PARAMS] = C_WF_REPORT_PARAMS,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003849 [C_CONNECTED] = C_CONNECTED,
3850
3851 [C_STARTING_SYNC_S] = C_STARTING_SYNC_T,
3852 [C_STARTING_SYNC_T] = C_STARTING_SYNC_S,
3853 [C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */
3854 [C_VERIFY_S] = C_VERIFY_T,
3855 [C_MASK] = C_MASK,
3856 };
3857
3858 ms.i = ps.i;
3859
3860 ms.conn = c_tab[ps.conn];
3861 ms.peer = ps.role;
3862 ms.role = ps.peer;
3863 ms.pdsk = ps.disk;
3864 ms.disk = ps.pdsk;
3865 ms.peer_isp = (ps.aftr_isp | ps.user_isp);
3866
3867 return ms;
3868}
3869
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003870static int receive_req_state(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003871{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003872 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003873 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003874 struct p_req_state *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003875 union drbd_state mask, val;
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +01003876 enum drbd_state_rv rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003877
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003878 peer_device = conn_peer_device(connection, pi->vnr);
3879 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003880 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003881 device = peer_device->device;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003882
Philipp Reisnerb411b362009-09-25 16:07:19 -07003883 mask.i = be32_to_cpu(p->mask);
3884 val.i = be32_to_cpu(p->val);
3885
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003886 if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags) &&
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003887 mutex_is_locked(device->state_mutex)) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003888 drbd_send_sr_reply(peer_device, SS_CONCURRENT_ST_CHG);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003889 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003890 }
3891
3892 mask = convert_state(mask);
3893 val = convert_state(val);
3894
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003895 rv = drbd_change_state(device, CS_VERBOSE, mask, val);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003896 drbd_send_sr_reply(peer_device, rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003897
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003898 drbd_md_sync(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003899
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003900 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003901}
3902
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003903static int receive_req_conn_state(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003904{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003905 struct p_req_state *p = pi->data;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003906 union drbd_state mask, val;
3907 enum drbd_state_rv rv;
3908
3909 mask.i = be32_to_cpu(p->mask);
3910 val.i = be32_to_cpu(p->val);
3911
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003912 if (test_bit(RESOLVE_CONFLICTS, &connection->flags) &&
3913 mutex_is_locked(&connection->cstate_mutex)) {
3914 conn_send_sr_reply(connection, SS_CONCURRENT_ST_CHG);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003915 return 0;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003916 }
3917
3918 mask = convert_state(mask);
3919 val = convert_state(val);
3920
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003921 rv = conn_request_state(connection, mask, val, CS_VERBOSE | CS_LOCAL_ONLY | CS_IGN_OUTD_FAIL);
3922 conn_send_sr_reply(connection, rv);
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003923
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003924 return 0;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003925}
3926
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003927static int receive_state(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003928{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003929 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003930 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003931 struct p_state *p = pi->data;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003932 union drbd_state os, ns, peer_state;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003933 enum drbd_disk_state real_peer_disk;
Philipp Reisner65d922c2010-06-16 16:18:09 +02003934 enum chg_state_flags cs_flags;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003935 int rv;
3936
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003937 peer_device = conn_peer_device(connection, pi->vnr);
3938 if (!peer_device)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003939 return config_unknown_volume(connection, pi);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003940 device = peer_device->device;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003941
Philipp Reisnerb411b362009-09-25 16:07:19 -07003942 peer_state.i = be32_to_cpu(p->state);
3943
3944 real_peer_disk = peer_state.disk;
3945 if (peer_state.disk == D_NEGOTIATING) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003946 real_peer_disk = device->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003947 drbd_info(device, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003948 }
3949
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003950 spin_lock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003951 retry:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003952 os = ns = drbd_read_state(device);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003953 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003954
Lars Ellenberg545752d2011-12-05 14:39:25 +01003955 /* If some other part of the code (asender thread, timeout)
3956 * already decided to close the connection again,
3957 * we must not "re-establish" it here. */
3958 if (os.conn <= C_TEAR_DOWN)
Lars Ellenberg58ffa582012-07-26 14:09:49 +02003959 return -ECONNRESET;
Lars Ellenberg545752d2011-12-05 14:39:25 +01003960
Lars Ellenberg40424e42011-09-26 15:24:56 +02003961 /* If this is the "end of sync" confirmation, usually the peer disk
3962 * transitions from D_INCONSISTENT to D_UP_TO_DATE. For empty (0 bits
3963 * set) resync started in PausedSyncT, or if the timing of pause-/
3964 * unpause-sync events has been "just right", the peer disk may
3965 * transition from D_CONSISTENT to D_UP_TO_DATE as well.
3966 */
3967 if ((os.pdsk == D_INCONSISTENT || os.pdsk == D_CONSISTENT) &&
3968 real_peer_disk == D_UP_TO_DATE &&
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003969 os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
3970 /* If we are (becoming) SyncSource, but peer is still in sync
3971 * preparation, ignore its uptodate-ness to avoid flapping, it
3972 * will change to inconsistent once the peer reaches active
3973 * syncing states.
3974 * It may have changed syncer-paused flags, however, so we
3975 * cannot ignore this completely. */
3976 if (peer_state.conn > C_CONNECTED &&
3977 peer_state.conn < C_SYNC_SOURCE)
3978 real_peer_disk = D_INCONSISTENT;
3979
3980 /* if peer_state changes to connected at the same time,
3981 * it explicitly notifies us that it finished resync.
3982 * Maybe we should finish it up, too? */
3983 else if (os.conn >= C_SYNC_SOURCE &&
3984 peer_state.conn == C_CONNECTED) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003985 if (drbd_bm_total_weight(device) <= device->rs_failed)
3986 drbd_resync_finished(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003987 return 0;
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003988 }
3989 }
3990
Lars Ellenberg02b91b52012-06-28 18:26:52 +02003991 /* explicit verify finished notification, stop sector reached. */
3992 if (os.conn == C_VERIFY_T && os.disk == D_UP_TO_DATE &&
3993 peer_state.conn == C_CONNECTED && real_peer_disk == D_UP_TO_DATE) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003994 ov_out_of_sync_print(device);
3995 drbd_resync_finished(device);
Lars Ellenberg58ffa582012-07-26 14:09:49 +02003996 return 0;
Lars Ellenberg02b91b52012-06-28 18:26:52 +02003997 }
3998
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003999 /* peer says his disk is inconsistent, while we think it is uptodate,
4000 * and this happens while the peer still thinks we have a sync going on,
4001 * but we think we are already done with the sync.
4002 * We ignore this to avoid flapping pdsk.
4003 * This should not happen, if the peer is a recent version of drbd. */
4004 if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT &&
4005 os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE)
4006 real_peer_disk = D_UP_TO_DATE;
4007
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004008 if (ns.conn == C_WF_REPORT_PARAMS)
4009 ns.conn = C_CONNECTED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004010
Philipp Reisner67531712010-10-27 12:21:30 +02004011 if (peer_state.conn == C_AHEAD)
4012 ns.conn = C_BEHIND;
4013
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004014 if (device->p_uuid && peer_state.disk >= D_NEGOTIATING &&
4015 get_ldev_if_state(device, D_NEGOTIATING)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004016 int cr; /* consider resync */
4017
4018 /* if we established a new connection */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004019 cr = (os.conn < C_CONNECTED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004020 /* if we had an established connection
4021 * and one of the nodes newly attaches a disk */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004022 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07004023 (peer_state.disk == D_NEGOTIATING ||
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004024 os.disk == D_NEGOTIATING));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004025 /* if we have both been inconsistent, and the peer has been
4026 * forced to be UpToDate with --overwrite-data */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004027 cr |= test_bit(CONSIDER_RESYNC, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004028 /* if we had been plain connected, and the admin requested to
4029 * start a sync by "invalidate" or "invalidate-remote" */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004030 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07004031 (peer_state.conn >= C_STARTING_SYNC_S &&
4032 peer_state.conn <= C_WF_BITMAP_T));
4033
4034 if (cr)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004035 ns.conn = drbd_sync_handshake(peer_device, peer_state.role, real_peer_disk);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004036
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004037 put_ldev(device);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004038 if (ns.conn == C_MASK) {
4039 ns.conn = C_CONNECTED;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004040 if (device->state.disk == D_NEGOTIATING) {
4041 drbd_force_state(device, NS(disk, D_FAILED));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004042 } else if (peer_state.disk == D_NEGOTIATING) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004043 drbd_err(device, "Disk attach process on the peer node was aborted.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004044 peer_state.disk = D_DISKLESS;
Lars Ellenberg580b9762010-02-26 23:15:23 +01004045 real_peer_disk = D_DISKLESS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004046 } else {
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004047 if (test_and_clear_bit(CONN_DRY_RUN, &peer_device->connection->flags))
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004048 return -EIO;
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02004049 D_ASSERT(device, os.conn == C_WF_REPORT_PARAMS);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004050 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004051 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004052 }
4053 }
4054 }
4055
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004056 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004057 if (os.i != drbd_read_state(device).i)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004058 goto retry;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004059 clear_bit(CONSIDER_RESYNC, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004060 ns.peer = peer_state.role;
4061 ns.pdsk = real_peer_disk;
4062 ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004063 if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004064 ns.disk = device->new_state_tmp.disk;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004065 cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004066 if (ns.pdsk == D_CONSISTENT && drbd_suspended(device) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
4067 test_bit(NEW_CUR_UUID, &device->flags)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004068 /* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this
Philipp Reisner481c6f52010-06-22 14:03:27 +02004069 for temporal network outages! */
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004070 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004071 drbd_err(device, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004072 tl_clear(peer_device->connection);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004073 drbd_uuid_new_current(device);
4074 clear_bit(NEW_CUR_UUID, &device->flags);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004075 conn_request_state(peer_device->connection, NS2(conn, C_PROTOCOL_ERROR, susp, 0), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004076 return -EIO;
Philipp Reisner481c6f52010-06-22 14:03:27 +02004077 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004078 rv = _drbd_set_state(device, ns, cs_flags, NULL);
4079 ns = drbd_read_state(device);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004080 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004081
4082 if (rv < SS_SUCCESS) {
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004083 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004084 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004085 }
4086
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004087 if (os.conn > C_WF_REPORT_PARAMS) {
4088 if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07004089 peer_state.disk != D_NEGOTIATING ) {
4090 /* we want resync, peer has not yet decided to sync... */
4091 /* Nowadays only used when forcing a node into primary role and
4092 setting its disk to UpToDate with that */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004093 drbd_send_uuids(peer_device);
4094 drbd_send_current_state(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004095 }
4096 }
4097
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004098 clear_bit(DISCARD_MY_DATA, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004099
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004100 drbd_md_sync(device); /* update connected indicator, la_size_sect, ... */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004101
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004102 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004103}
4104
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004105static int receive_sync_uuid(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004106{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004107 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004108 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004109 struct p_rs_uuid *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004110
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004111 peer_device = conn_peer_device(connection, pi->vnr);
4112 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004113 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004114 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004115
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004116 wait_event(device->misc_wait,
4117 device->state.conn == C_WF_SYNC_UUID ||
4118 device->state.conn == C_BEHIND ||
4119 device->state.conn < C_CONNECTED ||
4120 device->state.disk < D_NEGOTIATING);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004121
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02004122 /* D_ASSERT(device, device->state.conn == C_WF_SYNC_UUID ); */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004123
Philipp Reisnerb411b362009-09-25 16:07:19 -07004124 /* Here the _drbd_uuid_ functions are right, current should
4125 _not_ be rotated into the history */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004126 if (get_ldev_if_state(device, D_NEGOTIATING)) {
4127 _drbd_uuid_set(device, UI_CURRENT, be64_to_cpu(p->uuid));
4128 _drbd_uuid_set(device, UI_BITMAP, 0UL);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004129
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004130 drbd_print_uuids(device, "updated sync uuid");
4131 drbd_start_resync(device, C_SYNC_TARGET);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004132
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004133 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004134 } else
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004135 drbd_err(device, "Ignoring SyncUUID packet!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004136
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004137 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004138}
4139
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004140/**
4141 * receive_bitmap_plain
4142 *
4143 * Return 0 when done, 1 when another iteration is needed, and a negative error
4144 * code upon failure.
4145 */
4146static int
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004147receive_bitmap_plain(struct drbd_peer_device *peer_device, unsigned int size,
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004148 unsigned long *p, struct bm_xfer_ctx *c)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004149{
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004150 unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE -
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004151 drbd_header_size(peer_device->connection);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004152 unsigned int num_words = min_t(size_t, data_size / sizeof(*p),
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004153 c->bm_words - c->word_offset);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004154 unsigned int want = num_words * sizeof(*p);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004155 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004156
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004157 if (want != size) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004158 drbd_err(peer_device, "%s:want (%u) != size (%u)\n", __func__, want, size);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004159 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004160 }
4161 if (want == 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004162 return 0;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004163 err = drbd_recv_all(peer_device->connection, p, want);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004164 if (err)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004165 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004166
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004167 drbd_bm_merge_lel(peer_device->device, c->word_offset, num_words, p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004168
4169 c->word_offset += num_words;
4170 c->bit_offset = c->word_offset * BITS_PER_LONG;
4171 if (c->bit_offset > c->bm_bits)
4172 c->bit_offset = c->bm_bits;
4173
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004174 return 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004175}
4176
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004177static enum drbd_bitmap_code dcbp_get_code(struct p_compressed_bm *p)
4178{
4179 return (enum drbd_bitmap_code)(p->encoding & 0x0f);
4180}
4181
4182static int dcbp_get_start(struct p_compressed_bm *p)
4183{
4184 return (p->encoding & 0x80) != 0;
4185}
4186
4187static int dcbp_get_pad_bits(struct p_compressed_bm *p)
4188{
4189 return (p->encoding >> 4) & 0x7;
4190}
4191
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004192/**
4193 * recv_bm_rle_bits
4194 *
4195 * Return 0 when done, 1 when another iteration is needed, and a negative error
4196 * code upon failure.
4197 */
4198static int
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004199recv_bm_rle_bits(struct drbd_peer_device *peer_device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07004200 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01004201 struct bm_xfer_ctx *c,
4202 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004203{
4204 struct bitstream bs;
4205 u64 look_ahead;
4206 u64 rl;
4207 u64 tmp;
4208 unsigned long s = c->bit_offset;
4209 unsigned long e;
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004210 int toggle = dcbp_get_start(p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004211 int have;
4212 int bits;
4213
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004214 bitstream_init(&bs, p->code, len, dcbp_get_pad_bits(p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004215
4216 bits = bitstream_get_bits(&bs, &look_ahead, 64);
4217 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004218 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004219
4220 for (have = bits; have > 0; s += rl, toggle = !toggle) {
4221 bits = vli_decode_bits(&rl, look_ahead);
4222 if (bits <= 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004223 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004224
4225 if (toggle) {
4226 e = s + rl -1;
4227 if (e >= c->bm_bits) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004228 drbd_err(peer_device, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004229 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004230 }
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004231 _drbd_bm_set_bits(peer_device->device, s, e);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004232 }
4233
4234 if (have < bits) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004235 drbd_err(peer_device, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07004236 have, bits, look_ahead,
4237 (unsigned int)(bs.cur.b - p->code),
4238 (unsigned int)bs.buf_len);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004239 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004240 }
Lars Ellenbergd2da5b02013-10-23 10:59:18 +02004241 /* if we consumed all 64 bits, assign 0; >> 64 is "undefined"; */
4242 if (likely(bits < 64))
4243 look_ahead >>= bits;
4244 else
4245 look_ahead = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004246 have -= bits;
4247
4248 bits = bitstream_get_bits(&bs, &tmp, 64 - have);
4249 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004250 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004251 look_ahead |= tmp << have;
4252 have += bits;
4253 }
4254
4255 c->bit_offset = s;
4256 bm_xfer_ctx_bit_to_word_offset(c);
4257
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004258 return (s != c->bm_bits);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004259}
4260
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004261/**
4262 * decode_bitmap_c
4263 *
4264 * Return 0 when done, 1 when another iteration is needed, and a negative error
4265 * code upon failure.
4266 */
4267static int
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004268decode_bitmap_c(struct drbd_peer_device *peer_device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07004269 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01004270 struct bm_xfer_ctx *c,
4271 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004272{
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004273 if (dcbp_get_code(p) == RLE_VLI_Bits)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004274 return recv_bm_rle_bits(peer_device, p, c, len - sizeof(*p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004275
4276 /* other variants had been implemented for evaluation,
4277 * but have been dropped as this one turned out to be "best"
4278 * during all our tests. */
4279
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004280 drbd_err(peer_device, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
4281 conn_request_state(peer_device->connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004282 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004283}
4284
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004285void INFO_bm_xfer_stats(struct drbd_device *device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07004286 const char *direction, struct bm_xfer_ctx *c)
4287{
4288 /* what would it take to transfer it "plaintext" */
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02004289 unsigned int header_size = drbd_header_size(first_peer_device(device)->connection);
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004290 unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE - header_size;
4291 unsigned int plain =
4292 header_size * (DIV_ROUND_UP(c->bm_words, data_size) + 1) +
4293 c->bm_words * sizeof(unsigned long);
4294 unsigned int total = c->bytes[0] + c->bytes[1];
4295 unsigned int r;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004296
4297 /* total can not be zero. but just in case: */
4298 if (total == 0)
4299 return;
4300
4301 /* don't report if not compressed */
4302 if (total >= plain)
4303 return;
4304
4305 /* total < plain. check for overflow, still */
4306 r = (total > UINT_MAX/1000) ? (total / (plain/1000))
4307 : (1000 * total / plain);
4308
4309 if (r > 1000)
4310 r = 1000;
4311
4312 r = 1000 - r;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004313 drbd_info(device, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
Philipp Reisnerb411b362009-09-25 16:07:19 -07004314 "total %u; compression: %u.%u%%\n",
4315 direction,
4316 c->bytes[1], c->packets[1],
4317 c->bytes[0], c->packets[0],
4318 total, r/10, r % 10);
4319}
4320
4321/* Since we are processing the bitfield from lower addresses to higher,
4322 it does not matter if the process it in 32 bit chunks or 64 bit
4323 chunks as long as it is little endian. (Understand it as byte stream,
4324 beginning with the lowest byte...) If we would use big endian
4325 we would need to process it from the highest address to the lowest,
4326 in order to be agnostic to the 32 vs 64 bits issue.
4327
4328 returns 0 on failure, 1 if we successfully received it. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004329static int receive_bitmap(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004330{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004331 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004332 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004333 struct bm_xfer_ctx c;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004334 int err;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004335
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004336 peer_device = conn_peer_device(connection, pi->vnr);
4337 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004338 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004339 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004340
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004341 drbd_bm_lock(device, "receive bitmap", BM_LOCKED_SET_ALLOWED);
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004342 /* you are supposed to send additional out-of-sync information
4343 * if you actually set bits during this phase */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004344
Philipp Reisnerb411b362009-09-25 16:07:19 -07004345 c = (struct bm_xfer_ctx) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004346 .bm_bits = drbd_bm_bits(device),
4347 .bm_words = drbd_bm_words(device),
Philipp Reisnerb411b362009-09-25 16:07:19 -07004348 };
4349
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004350 for(;;) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004351 if (pi->cmd == P_BITMAP)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004352 err = receive_bitmap_plain(peer_device, pi->size, pi->data, &c);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004353 else if (pi->cmd == P_COMPRESSED_BITMAP) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004354 /* MAYBE: sanity check that we speak proto >= 90,
4355 * and the feature is enabled! */
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004356 struct p_compressed_bm *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004357
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004358 if (pi->size > DRBD_SOCKET_BUFFER_SIZE - drbd_header_size(connection)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004359 drbd_err(device, "ReportCBitmap packet too large\n");
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004360 err = -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004361 goto out;
4362 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004363 if (pi->size <= sizeof(*p)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004364 drbd_err(device, "ReportCBitmap packet too small (l:%u)\n", pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004365 err = -EIO;
Andreas Gruenbacher78fcbda2010-12-10 22:18:27 +01004366 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004367 }
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004368 err = drbd_recv_all(peer_device->connection, p, pi->size);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004369 if (err)
4370 goto out;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004371 err = decode_bitmap_c(peer_device, p, &c, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004372 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004373 drbd_warn(device, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", pi->cmd);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004374 err = -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004375 goto out;
4376 }
4377
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004378 c.packets[pi->cmd == P_BITMAP]++;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004379 c.bytes[pi->cmd == P_BITMAP] += drbd_header_size(connection) + pi->size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004380
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004381 if (err <= 0) {
4382 if (err < 0)
4383 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004384 break;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004385 }
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004386 err = drbd_recv_header(peer_device->connection, pi);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004387 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004388 goto out;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004389 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004390
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004391 INFO_bm_xfer_stats(device, "receive", &c);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004392
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004393 if (device->state.conn == C_WF_BITMAP_T) {
Andreas Gruenbacherde1f8e42010-12-10 21:04:00 +01004394 enum drbd_state_rv rv;
4395
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004396 err = drbd_send_bitmap(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004397 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004398 goto out;
4399 /* Omit CS_ORDERED with this state transition to avoid deadlocks. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004400 rv = _drbd_request_state(device, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02004401 D_ASSERT(device, rv == SS_SUCCESS);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004402 } else if (device->state.conn != C_WF_BITMAP_S) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004403 /* admin may have requested C_DISCONNECTING,
4404 * other threads may have noticed network errors */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004405 drbd_info(device, "unexpected cstate (%s) in receive_bitmap\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004406 drbd_conn_str(device->state.conn));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004407 }
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004408 err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004409
Philipp Reisnerb411b362009-09-25 16:07:19 -07004410 out:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004411 drbd_bm_unlock(device);
4412 if (!err && device->state.conn == C_WF_BITMAP_S)
4413 drbd_start_resync(device, C_SYNC_SOURCE);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004414 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004415}
4416
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004417static int receive_skip(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004418{
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004419 drbd_warn(connection, "skipping unknown optional packet type %d, l: %d!\n",
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004420 pi->cmd, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004421
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004422 return ignore_remaining_packet(connection, pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004423}
4424
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004425static int receive_UnplugRemote(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004426{
Philipp Reisnerb411b362009-09-25 16:07:19 -07004427 /* Make sure we've acked all the TCP data associated
4428 * with the data requests being unplugged */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004429 drbd_tcp_quickack(connection->data.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004430
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004431 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004432}
4433
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004434static int receive_out_of_sync(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisner73a01a12010-10-27 14:33:00 +02004435{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004436 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004437 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004438 struct p_block_desc *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004439
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004440 peer_device = conn_peer_device(connection, pi->vnr);
4441 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004442 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004443 device = peer_device->device;
Philipp Reisner73a01a12010-10-27 14:33:00 +02004444
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004445 switch (device->state.conn) {
Lars Ellenbergf735e3632010-12-17 21:06:18 +01004446 case C_WF_SYNC_UUID:
4447 case C_WF_BITMAP_T:
4448 case C_BEHIND:
4449 break;
4450 default:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004451 drbd_err(device, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004452 drbd_conn_str(device->state.conn));
Lars Ellenbergf735e3632010-12-17 21:06:18 +01004453 }
4454
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004455 drbd_set_out_of_sync(device, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
Philipp Reisner73a01a12010-10-27 14:33:00 +02004456
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004457 return 0;
Philipp Reisner73a01a12010-10-27 14:33:00 +02004458}
4459
Philipp Reisner02918be2010-08-20 14:35:10 +02004460struct data_cmd {
4461 int expect_payload;
4462 size_t pkt_size;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004463 int (*fn)(struct drbd_connection *, struct packet_info *);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004464};
4465
Philipp Reisner02918be2010-08-20 14:35:10 +02004466static struct data_cmd drbd_cmd_handler[] = {
4467 [P_DATA] = { 1, sizeof(struct p_data), receive_Data },
4468 [P_DATA_REPLY] = { 1, sizeof(struct p_data), receive_DataReply },
4469 [P_RS_DATA_REPLY] = { 1, sizeof(struct p_data), receive_RSDataReply } ,
4470 [P_BARRIER] = { 0, sizeof(struct p_barrier), receive_Barrier } ,
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004471 [P_BITMAP] = { 1, 0, receive_bitmap } ,
4472 [P_COMPRESSED_BITMAP] = { 1, 0, receive_bitmap } ,
4473 [P_UNPLUG_REMOTE] = { 0, 0, receive_UnplugRemote },
Philipp Reisner02918be2010-08-20 14:35:10 +02004474 [P_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
4475 [P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004476 [P_SYNC_PARAM] = { 1, 0, receive_SyncParam },
4477 [P_SYNC_PARAM89] = { 1, 0, receive_SyncParam },
Philipp Reisner02918be2010-08-20 14:35:10 +02004478 [P_PROTOCOL] = { 1, sizeof(struct p_protocol), receive_protocol },
4479 [P_UUIDS] = { 0, sizeof(struct p_uuids), receive_uuids },
4480 [P_SIZES] = { 0, sizeof(struct p_sizes), receive_sizes },
4481 [P_STATE] = { 0, sizeof(struct p_state), receive_state },
4482 [P_STATE_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_state },
4483 [P_SYNC_UUID] = { 0, sizeof(struct p_rs_uuid), receive_sync_uuid },
4484 [P_OV_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
4485 [P_OV_REPLY] = { 1, sizeof(struct p_block_req), receive_DataRequest },
4486 [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest },
4487 [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip },
Philipp Reisner73a01a12010-10-27 14:33:00 +02004488 [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004489 [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_conn_state },
Philipp Reisner036b17e2011-05-16 17:38:11 +02004490 [P_PROTOCOL_UPDATE] = { 1, sizeof(struct p_protocol), receive_protocol },
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02004491 [P_TRIM] = { 0, sizeof(struct p_trim), receive_Data },
Philipp Reisner02918be2010-08-20 14:35:10 +02004492};
4493
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004494static void drbdd(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004495{
Philipp Reisner77351055b2011-02-07 17:24:26 +01004496 struct packet_info pi;
Philipp Reisner02918be2010-08-20 14:35:10 +02004497 size_t shs; /* sub header size */
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004498 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004499
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004500 while (get_t_state(&connection->receiver) == RUNNING) {
Andreas Gruenbacherdeebe192011-03-25 00:01:04 +01004501 struct data_cmd *cmd;
4502
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004503 drbd_thread_current_set_cpu(&connection->receiver);
4504 if (drbd_recv_header(connection, &pi))
Philipp Reisner02918be2010-08-20 14:35:10 +02004505 goto err_out;
4506
Andreas Gruenbacherdeebe192011-03-25 00:01:04 +01004507 cmd = &drbd_cmd_handler[pi.cmd];
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004508 if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) || !cmd->fn)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004509 drbd_err(connection, "Unexpected data packet %s (0x%04x)",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004510 cmdname(pi.cmd), pi.cmd);
Philipp Reisner02918be2010-08-20 14:35:10 +02004511 goto err_out;
Lars Ellenberg0b33a912009-11-16 15:58:04 +01004512 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004513
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004514 shs = cmd->pkt_size;
4515 if (pi.size > shs && !cmd->expect_payload) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004516 drbd_err(connection, "No payload expected %s l:%d\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004517 cmdname(pi.cmd), pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02004518 goto err_out;
4519 }
4520
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02004521 if (shs) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004522 err = drbd_recv_all_warn(connection, pi.data, shs);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004523 if (err)
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02004524 goto err_out;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004525 pi.size -= shs;
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02004526 }
4527
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004528 err = cmd->fn(connection, &pi);
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004529 if (err) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004530 drbd_err(connection, "error receiving %s, e: %d l: %d!\n",
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004531 cmdname(pi.cmd), err, pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02004532 goto err_out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004533 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004534 }
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004535 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004536
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004537 err_out:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004538 conn_request_state(connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004539}
4540
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004541static void conn_disconnect(struct drbd_connection *connection)
Philipp Reisnerf70b35112010-06-24 14:34:40 +02004542{
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02004543 struct drbd_peer_device *peer_device;
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004544 enum drbd_conns oc;
Philipp Reisner376694a2011-11-07 10:54:28 +01004545 int vnr;
Philipp Reisnerf70b35112010-06-24 14:34:40 +02004546
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004547 if (connection->cstate == C_STANDALONE)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004548 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004549
Lars Ellenberg545752d2011-12-05 14:39:25 +01004550 /* We are about to start the cleanup after connection loss.
4551 * Make sure drbd_make_request knows about that.
4552 * Usually we should be in some network failure state already,
4553 * but just in case we are not, we fix it up here.
4554 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004555 conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
Lars Ellenberg545752d2011-12-05 14:39:25 +01004556
Philipp Reisnerb411b362009-09-25 16:07:19 -07004557 /* asender does not clean up anything. it must not interfere, either */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004558 drbd_thread_stop(&connection->asender);
4559 drbd_free_sock(connection);
Philipp Reisner360cc742011-02-08 14:29:53 +01004560
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02004561 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02004562 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
4563 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004564 kref_get(&device->kref);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02004565 rcu_read_unlock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004566 drbd_disconnected(peer_device);
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02004567 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02004568 rcu_read_lock();
4569 }
4570 rcu_read_unlock();
4571
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004572 if (!list_empty(&connection->current_epoch->list))
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004573 drbd_err(connection, "ASSERTION FAILED: connection->current_epoch->list not empty\n");
Philipp Reisner12038a32011-11-09 19:18:00 +01004574 /* ok, no more ee's on the fly, it is safe to reset the epoch_size */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004575 atomic_set(&connection->current_epoch->epoch_size, 0);
4576 connection->send.seen_any_write_yet = false;
Philipp Reisner12038a32011-11-09 19:18:00 +01004577
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004578 drbd_info(connection, "Connection closed\n");
Philipp Reisner360cc742011-02-08 14:29:53 +01004579
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004580 if (conn_highest_role(connection) == R_PRIMARY && conn_highest_pdsk(connection) >= D_UNKNOWN)
4581 conn_try_outdate_peer_async(connection);
Philipp Reisnercb703452011-03-24 11:03:07 +01004582
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004583 spin_lock_irq(&connection->resource->req_lock);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004584 oc = connection->cstate;
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004585 if (oc >= C_UNCONNECTED)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004586 _conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE);
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004587
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004588 spin_unlock_irq(&connection->resource->req_lock);
Philipp Reisner360cc742011-02-08 14:29:53 +01004589
Lars Ellenbergf3dfa402011-05-02 10:45:05 +02004590 if (oc == C_DISCONNECTING)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004591 conn_request_state(connection, NS(conn, C_STANDALONE), CS_VERBOSE | CS_HARD);
Philipp Reisner360cc742011-02-08 14:29:53 +01004592}
4593
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004594static int drbd_disconnected(struct drbd_peer_device *peer_device)
Philipp Reisner360cc742011-02-08 14:29:53 +01004595{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004596 struct drbd_device *device = peer_device->device;
Philipp Reisner360cc742011-02-08 14:29:53 +01004597 unsigned int i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004598
Philipp Reisner85719572010-07-21 10:20:17 +02004599 /* wait for current activity to cease. */
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004600 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004601 _drbd_wait_ee_list_empty(device, &device->active_ee);
4602 _drbd_wait_ee_list_empty(device, &device->sync_ee);
4603 _drbd_wait_ee_list_empty(device, &device->read_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004604 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004605
4606 /* We do not have data structures that would allow us to
4607 * get the rs_pending_cnt down to 0 again.
4608 * * On C_SYNC_TARGET we do not have any data structures describing
4609 * the pending RSDataRequest's we have sent.
4610 * * On C_SYNC_SOURCE there is no data structure that tracks
4611 * the P_RS_DATA_REPLY blocks that we sent to the SyncTarget.
4612 * And no, it is not the sum of the reference counts in the
4613 * resync_LRU. The resync_LRU tracks the whole operation including
4614 * the disk-IO, while the rs_pending_cnt only tracks the blocks
4615 * on the fly. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004616 drbd_rs_cancel_all(device);
4617 device->rs_total = 0;
4618 device->rs_failed = 0;
4619 atomic_set(&device->rs_pending_cnt, 0);
4620 wake_up(&device->misc_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004621
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004622 del_timer_sync(&device->resync_timer);
4623 resync_timer_fn((unsigned long)device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004624
Philipp Reisnerb411b362009-09-25 16:07:19 -07004625 /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
4626 * w_make_resync_request etc. which may still be on the worker queue
4627 * to be "canceled" */
Andreas Gruenbacherb5043c52011-07-28 15:56:02 +02004628 drbd_flush_workqueue(&peer_device->connection->sender_work);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004629
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004630 drbd_finish_peer_reqs(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004631
Philipp Reisnerd10b4ea2011-11-30 23:25:36 +01004632 /* This second workqueue flush is necessary, since drbd_finish_peer_reqs()
4633 might have issued a work again. The one before drbd_finish_peer_reqs() is
4634 necessary to reclain net_ee in drbd_finish_peer_reqs(). */
Andreas Gruenbacherb5043c52011-07-28 15:56:02 +02004635 drbd_flush_workqueue(&peer_device->connection->sender_work);
Philipp Reisnerd10b4ea2011-11-30 23:25:36 +01004636
Lars Ellenberg08332d72012-08-17 15:09:13 +02004637 /* need to do it again, drbd_finish_peer_reqs() may have populated it
4638 * again via drbd_try_clear_on_disk_bm(). */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004639 drbd_rs_cancel_all(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004640
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004641 kfree(device->p_uuid);
4642 device->p_uuid = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004643
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004644 if (!drbd_suspended(device))
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004645 tl_clear(peer_device->connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004646
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004647 drbd_md_sync(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004648
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004649 /* serialize with bitmap writeout triggered by the state change,
4650 * if any. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004651 wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004652
Philipp Reisnerb411b362009-09-25 16:07:19 -07004653 /* tcp_close and release of sendpage pages can be deferred. I don't
4654 * want to use SO_LINGER, because apparently it can be deferred for
4655 * more than 20 seconds (longest time I checked).
4656 *
4657 * Actually we don't care for exactly when the network stack does its
4658 * put_page(), but release our reference on these pages right here.
4659 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004660 i = drbd_free_peer_reqs(device, &device->net_ee);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004661 if (i)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004662 drbd_info(device, "net_ee not empty, killed %u entries\n", i);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004663 i = atomic_read(&device->pp_in_use_by_net);
Lars Ellenberg435f0742010-09-06 12:30:25 +02004664 if (i)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004665 drbd_info(device, "pp_in_use_by_net = %d, expected 0\n", i);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004666 i = atomic_read(&device->pp_in_use);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004667 if (i)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004668 drbd_info(device, "pp_in_use = %d, expected 0\n", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004669
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02004670 D_ASSERT(device, list_empty(&device->read_ee));
4671 D_ASSERT(device, list_empty(&device->active_ee));
4672 D_ASSERT(device, list_empty(&device->sync_ee));
4673 D_ASSERT(device, list_empty(&device->done_ee));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004674
Philipp Reisner360cc742011-02-08 14:29:53 +01004675 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004676}
4677
4678/*
4679 * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version
4680 * we can agree on is stored in agreed_pro_version.
4681 *
4682 * feature flags and the reserved array should be enough room for future
4683 * enhancements of the handshake protocol, and possible plugins...
4684 *
4685 * for now, they are expected to be zero, but ignored.
4686 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004687static int drbd_send_features(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004688{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004689 struct drbd_socket *sock;
4690 struct p_connection_features *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004691
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004692 sock = &connection->data;
4693 p = conn_prepare_command(connection, sock);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004694 if (!p)
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004695 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004696 memset(p, 0, sizeof(*p));
4697 p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
4698 p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
Lars Ellenberg20c68fd2014-04-28 18:43:25 +02004699 p->feature_flags = cpu_to_be32(PRO_FEATURES);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004700 return conn_send_command(connection, sock, P_CONNECTION_FEATURES, sizeof(*p), NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004701}
4702
4703/*
4704 * return values:
4705 * 1 yes, we have a valid connection
4706 * 0 oops, did not work out, please try again
4707 * -1 peer talks different language,
4708 * no point in trying again, please go standalone.
4709 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004710static int drbd_do_features(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004711{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004712 /* ASSERT current == connection->receiver ... */
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004713 struct p_connection_features *p;
4714 const int expect = sizeof(struct p_connection_features);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004715 struct packet_info pi;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004716 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004717
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004718 err = drbd_send_features(connection);
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004719 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004720 return 0;
4721
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004722 err = drbd_recv_header(connection, &pi);
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004723 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004724 return 0;
4725
Andreas Gruenbacher60381782011-03-28 17:05:50 +02004726 if (pi.cmd != P_CONNECTION_FEATURES) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004727 drbd_err(connection, "expected ConnectionFeatures packet, received: %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004728 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004729 return -1;
4730 }
4731
Philipp Reisner77351055b2011-02-07 17:24:26 +01004732 if (pi.size != expect) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004733 drbd_err(connection, "expected ConnectionFeatures length: %u, received: %u\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004734 expect, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004735 return -1;
4736 }
4737
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004738 p = pi.data;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004739 err = drbd_recv_all_warn(connection, p, expect);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004740 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004741 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004742
Philipp Reisnerb411b362009-09-25 16:07:19 -07004743 p->protocol_min = be32_to_cpu(p->protocol_min);
4744 p->protocol_max = be32_to_cpu(p->protocol_max);
4745 if (p->protocol_max == 0)
4746 p->protocol_max = p->protocol_min;
4747
4748 if (PRO_VERSION_MAX < p->protocol_min ||
4749 PRO_VERSION_MIN > p->protocol_max)
4750 goto incompat;
4751
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004752 connection->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
Lars Ellenberg20c68fd2014-04-28 18:43:25 +02004753 connection->agreed_features = PRO_FEATURES & be32_to_cpu(p->feature_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004754
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004755 drbd_info(connection, "Handshake successful: "
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004756 "Agreed network protocol version %d\n", connection->agreed_pro_version);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004757
Lars Ellenberg20c68fd2014-04-28 18:43:25 +02004758 drbd_info(connection, "Agreed to%ssupport TRIM on protocol level\n",
4759 connection->agreed_features & FF_TRIM ? " " : " not ");
4760
Philipp Reisnerb411b362009-09-25 16:07:19 -07004761 return 1;
4762
4763 incompat:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004764 drbd_err(connection, "incompatible DRBD dialects: "
Philipp Reisnerb411b362009-09-25 16:07:19 -07004765 "I support %d-%d, peer supports %d-%d\n",
4766 PRO_VERSION_MIN, PRO_VERSION_MAX,
4767 p->protocol_min, p->protocol_max);
4768 return -1;
4769}
4770
4771#if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004772static int drbd_do_auth(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004773{
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004774 drbd_err(connection, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
4775 drbd_err(connection, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004776 return -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004777}
4778#else
4779#define CHALLENGE_LEN 64
Johannes Thomab10d96c2010-01-07 16:02:50 +01004780
4781/* Return value:
4782 1 - auth succeeded,
4783 0 - failed, try again (network error),
4784 -1 - auth failed, don't try again.
4785*/
4786
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004787static int drbd_do_auth(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004788{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004789 struct drbd_socket *sock;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004790 char my_challenge[CHALLENGE_LEN]; /* 64 Bytes... */
4791 struct scatterlist sg;
4792 char *response = NULL;
4793 char *right_response = NULL;
4794 char *peers_ch = NULL;
Philipp Reisner44ed1672011-04-19 17:10:19 +02004795 unsigned int key_len;
4796 char secret[SHARED_SECRET_MAX]; /* 64 byte */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004797 unsigned int resp_size;
4798 struct hash_desc desc;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004799 struct packet_info pi;
Philipp Reisner44ed1672011-04-19 17:10:19 +02004800 struct net_conf *nc;
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004801 int err, rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004802
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004803 /* FIXME: Put the challenge/response into the preallocated socket buffer. */
4804
Philipp Reisner44ed1672011-04-19 17:10:19 +02004805 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004806 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +02004807 key_len = strlen(nc->shared_secret);
4808 memcpy(secret, nc->shared_secret, key_len);
4809 rcu_read_unlock();
4810
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004811 desc.tfm = connection->cram_hmac_tfm;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004812 desc.flags = 0;
4813
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004814 rv = crypto_hash_setkey(connection->cram_hmac_tfm, (u8 *)secret, key_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004815 if (rv) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004816 drbd_err(connection, "crypto_hash_setkey() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004817 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004818 goto fail;
4819 }
4820
4821 get_random_bytes(my_challenge, CHALLENGE_LEN);
4822
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004823 sock = &connection->data;
4824 if (!conn_prepare_command(connection, sock)) {
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004825 rv = 0;
4826 goto fail;
4827 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004828 rv = !conn_send_command(connection, sock, P_AUTH_CHALLENGE, 0,
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004829 my_challenge, CHALLENGE_LEN);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004830 if (!rv)
4831 goto fail;
4832
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004833 err = drbd_recv_header(connection, &pi);
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004834 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004835 rv = 0;
4836 goto fail;
4837 }
4838
Philipp Reisner77351055b2011-02-07 17:24:26 +01004839 if (pi.cmd != P_AUTH_CHALLENGE) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004840 drbd_err(connection, "expected AuthChallenge packet, received: %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004841 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004842 rv = 0;
4843 goto fail;
4844 }
4845
Philipp Reisner77351055b2011-02-07 17:24:26 +01004846 if (pi.size > CHALLENGE_LEN * 2) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004847 drbd_err(connection, "expected AuthChallenge payload too big.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004848 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004849 goto fail;
4850 }
4851
Philipp Reisner67cca282014-04-28 18:43:30 +02004852 if (pi.size < CHALLENGE_LEN) {
4853 drbd_err(connection, "AuthChallenge payload too small.\n");
4854 rv = -1;
4855 goto fail;
4856 }
4857
Philipp Reisner77351055b2011-02-07 17:24:26 +01004858 peers_ch = kmalloc(pi.size, GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004859 if (peers_ch == NULL) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004860 drbd_err(connection, "kmalloc of peers_ch failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004861 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004862 goto fail;
4863 }
4864
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004865 err = drbd_recv_all_warn(connection, peers_ch, pi.size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004866 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004867 rv = 0;
4868 goto fail;
4869 }
4870
Philipp Reisner67cca282014-04-28 18:43:30 +02004871 if (!memcmp(my_challenge, peers_ch, CHALLENGE_LEN)) {
4872 drbd_err(connection, "Peer presented the same challenge!\n");
4873 rv = -1;
4874 goto fail;
4875 }
4876
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004877 resp_size = crypto_hash_digestsize(connection->cram_hmac_tfm);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004878 response = kmalloc(resp_size, GFP_NOIO);
4879 if (response == NULL) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004880 drbd_err(connection, "kmalloc of response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004881 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004882 goto fail;
4883 }
4884
4885 sg_init_table(&sg, 1);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004886 sg_set_buf(&sg, peers_ch, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004887
4888 rv = crypto_hash_digest(&desc, &sg, sg.length, response);
4889 if (rv) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004890 drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004891 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004892 goto fail;
4893 }
4894
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004895 if (!conn_prepare_command(connection, sock)) {
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004896 rv = 0;
4897 goto fail;
4898 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004899 rv = !conn_send_command(connection, sock, P_AUTH_RESPONSE, 0,
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004900 response, resp_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004901 if (!rv)
4902 goto fail;
4903
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004904 err = drbd_recv_header(connection, &pi);
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004905 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004906 rv = 0;
4907 goto fail;
4908 }
4909
Philipp Reisner77351055b2011-02-07 17:24:26 +01004910 if (pi.cmd != P_AUTH_RESPONSE) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004911 drbd_err(connection, "expected AuthResponse packet, received: %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004912 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004913 rv = 0;
4914 goto fail;
4915 }
4916
Philipp Reisner77351055b2011-02-07 17:24:26 +01004917 if (pi.size != resp_size) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004918 drbd_err(connection, "expected AuthResponse payload of wrong size\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004919 rv = 0;
4920 goto fail;
4921 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004922
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004923 err = drbd_recv_all_warn(connection, response , resp_size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004924 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004925 rv = 0;
4926 goto fail;
4927 }
4928
4929 right_response = kmalloc(resp_size, GFP_NOIO);
Julia Lawall2d1ee872009-12-27 22:27:11 +01004930 if (right_response == NULL) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004931 drbd_err(connection, "kmalloc of right_response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004932 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004933 goto fail;
4934 }
4935
4936 sg_set_buf(&sg, my_challenge, CHALLENGE_LEN);
4937
4938 rv = crypto_hash_digest(&desc, &sg, sg.length, right_response);
4939 if (rv) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004940 drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004941 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004942 goto fail;
4943 }
4944
4945 rv = !memcmp(response, right_response, resp_size);
4946
4947 if (rv)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004948 drbd_info(connection, "Peer authenticated using %d bytes HMAC\n",
Philipp Reisner44ed1672011-04-19 17:10:19 +02004949 resp_size);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004950 else
4951 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004952
4953 fail:
4954 kfree(peers_ch);
4955 kfree(response);
4956 kfree(right_response);
4957
4958 return rv;
4959}
4960#endif
4961
Andreas Gruenbacher8fe60552011-07-22 11:04:36 +02004962int drbd_receiver(struct drbd_thread *thi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004963{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004964 struct drbd_connection *connection = thi->connection;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004965 int h;
4966
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004967 drbd_info(connection, "receiver (re)started\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004968
4969 do {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004970 h = conn_connect(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004971 if (h == 0) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004972 conn_disconnect(connection);
Philipp Reisner20ee6392011-01-18 15:28:59 +01004973 schedule_timeout_interruptible(HZ);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004974 }
4975 if (h == -1) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004976 drbd_warn(connection, "Discarding network configuration.\n");
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004977 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004978 }
4979 } while (h == 0);
4980
Philipp Reisner91fd4da2011-04-20 17:47:29 +02004981 if (h > 0)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004982 drbdd(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004983
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004984 conn_disconnect(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004985
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004986 drbd_info(connection, "receiver terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004987 return 0;
4988}
4989
4990/* ********* acknowledge sender ******** */
4991
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004992static int got_conn_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004993{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004994 struct p_req_state_reply *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004995 int retcode = be32_to_cpu(p->retcode);
4996
4997 if (retcode >= SS_SUCCESS) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004998 set_bit(CONN_WD_ST_CHG_OKAY, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004999 } else {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005000 set_bit(CONN_WD_ST_CHG_FAIL, &connection->flags);
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005001 drbd_err(connection, "Requested state change failed by peer: %s (%d)\n",
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01005002 drbd_set_st_err_str(retcode), retcode);
5003 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005004 wake_up(&connection->ping_wait);
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01005005
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005006 return 0;
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01005007}
5008
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005009static int got_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005010{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005011 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005012 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005013 struct p_req_state_reply *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005014 int retcode = be32_to_cpu(p->retcode);
5015
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005016 peer_device = conn_peer_device(connection, pi->vnr);
5017 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005018 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005019 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005020
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005021 if (test_bit(CONN_WD_ST_CHG_REQ, &connection->flags)) {
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02005022 D_ASSERT(device, connection->agreed_pro_version < 100);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005023 return got_conn_RqSReply(connection, pi);
Philipp Reisner4d0fc3f2012-01-20 13:52:27 +01005024 }
5025
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01005026 if (retcode >= SS_SUCCESS) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005027 set_bit(CL_ST_CHG_SUCCESS, &device->flags);
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01005028 } else {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005029 set_bit(CL_ST_CHG_FAIL, &device->flags);
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02005030 drbd_err(device, "Requested state change failed by peer: %s (%d)\n",
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01005031 drbd_set_st_err_str(retcode), retcode);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005032 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005033 wake_up(&device->state_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005034
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005035 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005036}
5037
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005038static int got_Ping(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005039{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005040 return drbd_send_ping_ack(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005041
5042}
5043
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005044static int got_PingAck(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005045{
5046 /* restore idle timeout */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005047 connection->meta.socket->sk->sk_rcvtimeo = connection->net_conf->ping_int*HZ;
5048 if (!test_and_set_bit(GOT_PING_ACK, &connection->flags))
5049 wake_up(&connection->ping_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005050
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005051 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005052}
5053
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005054static int got_IsInSync(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005055{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005056 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005057 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005058 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005059 sector_t sector = be64_to_cpu(p->sector);
5060 int blksize = be32_to_cpu(p->blksize);
5061
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005062 peer_device = conn_peer_device(connection, pi->vnr);
5063 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005064 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005065 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005066
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005067 D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005068
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005069 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005070
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005071 if (get_ldev(device)) {
5072 drbd_rs_complete_io(device, sector);
5073 drbd_set_in_sync(device, sector, blksize);
Lars Ellenberg1d53f092010-09-05 01:13:24 +02005074 /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005075 device->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
5076 put_ldev(device);
Lars Ellenberg1d53f092010-09-05 01:13:24 +02005077 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005078 dec_rs_pending(device);
5079 atomic_add(blksize >> 9, &device->rs_sect_in);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005080
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005081 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005082}
5083
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01005084static int
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005085validate_req_change_req_state(struct drbd_device *device, u64 id, sector_t sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01005086 struct rb_root *root, const char *func,
5087 enum drbd_req_event what, bool missing_ok)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005088{
5089 struct drbd_request *req;
5090 struct bio_and_error m;
5091
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005092 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005093 req = find_request(device, root, id, sector, missing_ok, func);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005094 if (unlikely(!req)) {
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005095 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005096 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005097 }
5098 __req_mod(req, what, &m);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005099 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005100
5101 if (m.bio)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005102 complete_master_bio(device, &m);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005103 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005104}
5105
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005106static int got_BlockAck(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005107{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005108 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005109 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005110 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005111 sector_t sector = be64_to_cpu(p->sector);
5112 int blksize = be32_to_cpu(p->blksize);
5113 enum drbd_req_event what;
5114
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005115 peer_device = conn_peer_device(connection, pi->vnr);
5116 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005117 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005118 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005119
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005120 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005121
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01005122 if (p->block_id == ID_SYNCER) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005123 drbd_set_in_sync(device, sector, blksize);
5124 dec_rs_pending(device);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005125 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005126 }
Andreas Gruenbachere05e1e52011-03-25 15:16:26 +01005127 switch (pi->cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07005128 case P_RS_WRITE_ACK:
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01005129 what = WRITE_ACKED_BY_PEER_AND_SIS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005130 break;
5131 case P_WRITE_ACK:
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01005132 what = WRITE_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005133 break;
5134 case P_RECV_ACK:
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01005135 what = RECV_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005136 break;
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02005137 case P_SUPERSEDED:
5138 what = CONFLICT_RESOLVED;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01005139 break;
5140 case P_RETRY_WRITE:
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01005141 what = POSTPONE_WRITE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005142 break;
5143 default:
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005144 BUG();
Philipp Reisnerb411b362009-09-25 16:07:19 -07005145 }
5146
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005147 return validate_req_change_req_state(device, p->block_id, sector,
5148 &device->write_requests, __func__,
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005149 what, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005150}
5151
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005152static int got_NegAck(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005153{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005154 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005155 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005156 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005157 sector_t sector = be64_to_cpu(p->sector);
Philipp Reisner2deb8332011-01-17 18:39:18 +01005158 int size = be32_to_cpu(p->blksize);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005159 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005160
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005161 peer_device = conn_peer_device(connection, pi->vnr);
5162 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005163 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005164 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005165
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005166 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005167
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01005168 if (p->block_id == ID_SYNCER) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005169 dec_rs_pending(device);
5170 drbd_rs_failed_io(device, sector, size);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005171 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005172 }
Philipp Reisner2deb8332011-01-17 18:39:18 +01005173
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005174 err = validate_req_change_req_state(device, p->block_id, sector,
5175 &device->write_requests, __func__,
Philipp Reisner303d1442011-04-13 16:24:47 -07005176 NEG_ACKED, true);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005177 if (err) {
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01005178 /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs.
5179 The master bio might already be completed, therefore the
5180 request is no longer in the collision hash. */
5181 /* In Protocol B we might already have got a P_RECV_ACK
5182 but then get a P_NEG_ACK afterwards. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005183 drbd_set_out_of_sync(device, sector, size);
Philipp Reisner2deb8332011-01-17 18:39:18 +01005184 }
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005185 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005186}
5187
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005188static int got_NegDReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005189{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005190 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005191 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005192 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005193 sector_t sector = be64_to_cpu(p->sector);
5194
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005195 peer_device = conn_peer_device(connection, pi->vnr);
5196 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005197 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005198 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005199
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005200 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01005201
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02005202 drbd_err(device, "Got NegDReply; Sector %llus, len %u.\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07005203 (unsigned long long)sector, be32_to_cpu(p->blksize));
5204
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005205 return validate_req_change_req_state(device, p->block_id, sector,
5206 &device->read_requests, __func__,
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005207 NEG_ACKED, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005208}
5209
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005210static int got_NegRSDReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005211{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005212 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005213 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005214 sector_t sector;
5215 int size;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005216 struct p_block_ack *p = pi->data;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005217
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005218 peer_device = conn_peer_device(connection, pi->vnr);
5219 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005220 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005221 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005222
5223 sector = be64_to_cpu(p->sector);
5224 size = be32_to_cpu(p->blksize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005225
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005226 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005227
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005228 dec_rs_pending(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005229
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005230 if (get_ldev_if_state(device, D_FAILED)) {
5231 drbd_rs_complete_io(device, sector);
Andreas Gruenbachere05e1e52011-03-25 15:16:26 +01005232 switch (pi->cmd) {
Philipp Reisnerd612d302010-12-27 10:53:28 +01005233 case P_NEG_RS_DREPLY:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005234 drbd_rs_failed_io(device, sector, size);
Philipp Reisnerd612d302010-12-27 10:53:28 +01005235 case P_RS_CANCEL:
5236 break;
5237 default:
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005238 BUG();
Philipp Reisnerd612d302010-12-27 10:53:28 +01005239 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005240 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005241 }
5242
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005243 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005244}
5245
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005246static int got_BarrierAck(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005247{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005248 struct p_barrier_ack *p = pi->data;
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005249 struct drbd_peer_device *peer_device;
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005250 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005251
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005252 tl_release(connection, p->barrier, be32_to_cpu(p->set_size));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005253
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005254 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005255 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5256 struct drbd_device *device = peer_device->device;
5257
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005258 if (device->state.conn == C_AHEAD &&
5259 atomic_read(&device->ap_in_flight) == 0 &&
5260 !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &device->flags)) {
5261 device->start_resync_timer.expires = jiffies + HZ;
5262 add_timer(&device->start_resync_timer);
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005263 }
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02005264 }
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005265 rcu_read_unlock();
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02005266
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005267 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005268}
5269
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005270static int got_OVResult(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005271{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005272 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005273 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005274 struct p_block_ack *p = pi->data;
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02005275 struct drbd_device_work *dw;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005276 sector_t sector;
5277 int size;
5278
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005279 peer_device = conn_peer_device(connection, pi->vnr);
5280 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005281 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005282 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005283
Philipp Reisnerb411b362009-09-25 16:07:19 -07005284 sector = be64_to_cpu(p->sector);
5285 size = be32_to_cpu(p->blksize);
5286
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005287 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005288
5289 if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005290 drbd_ov_out_of_sync_found(device, sector, size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005291 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005292 ov_out_of_sync_print(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005293
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005294 if (!get_ldev(device))
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005295 return 0;
Lars Ellenberg1d53f092010-09-05 01:13:24 +02005296
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005297 drbd_rs_complete_io(device, sector);
5298 dec_rs_pending(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005299
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005300 --device->ov_left;
Lars Ellenbergea5442a2010-11-05 09:48:01 +01005301
5302 /* let's advance progress step marks only for every other megabyte */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005303 if ((device->ov_left & 0x200) == 0x200)
5304 drbd_advance_rs_marks(device, device->ov_left);
Lars Ellenbergea5442a2010-11-05 09:48:01 +01005305
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005306 if (device->ov_left == 0) {
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02005307 dw = kmalloc(sizeof(*dw), GFP_NOIO);
5308 if (dw) {
5309 dw->w.cb = w_ov_finished;
5310 dw->device = device;
5311 drbd_queue_work(&peer_device->connection->sender_work, &dw->w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005312 } else {
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02005313 drbd_err(device, "kmalloc(dw) failed.");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005314 ov_out_of_sync_print(device);
5315 drbd_resync_finished(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005316 }
5317 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005318 put_ldev(device);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005319 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005320}
5321
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005322static int got_skip(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisner0ced55a2010-04-30 15:26:20 +02005323{
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005324 return 0;
Philipp Reisner0ced55a2010-04-30 15:26:20 +02005325}
5326
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005327static int connection_finish_peer_reqs(struct drbd_connection *connection)
Philipp Reisner32862ec2011-02-08 16:41:01 +01005328{
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005329 struct drbd_peer_device *peer_device;
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005330 int vnr, not_empty = 0;
Philipp Reisner32862ec2011-02-08 16:41:01 +01005331
5332 do {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005333 clear_bit(SIGNAL_ASENDER, &connection->flags);
Philipp Reisner32862ec2011-02-08 16:41:01 +01005334 flush_signals(current);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005335
5336 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005337 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5338 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005339 kref_get(&device->kref);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005340 rcu_read_unlock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005341 if (drbd_finish_peer_reqs(device)) {
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02005342 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005343 return 1;
Philipp Reisnerd3fcb492011-04-13 14:46:05 -07005344 }
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02005345 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005346 rcu_read_lock();
Philipp Reisner082a3432011-03-15 16:05:42 +01005347 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005348 set_bit(SIGNAL_ASENDER, &connection->flags);
Philipp Reisner082a3432011-03-15 16:05:42 +01005349
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005350 spin_lock_irq(&connection->resource->req_lock);
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005351 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5352 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005353 not_empty = !list_empty(&device->done_ee);
Philipp Reisner082a3432011-03-15 16:05:42 +01005354 if (not_empty)
5355 break;
5356 }
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005357 spin_unlock_irq(&connection->resource->req_lock);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005358 rcu_read_unlock();
Philipp Reisner32862ec2011-02-08 16:41:01 +01005359 } while (not_empty);
5360
5361 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005362}
5363
5364struct asender_cmd {
5365 size_t pkt_size;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005366 int (*fn)(struct drbd_connection *connection, struct packet_info *);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005367};
5368
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01005369static struct asender_cmd asender_tbl[] = {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005370 [P_PING] = { 0, got_Ping },
5371 [P_PING_ACK] = { 0, got_PingAck },
Philipp Reisnerb411b362009-09-25 16:07:19 -07005372 [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
5373 [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
5374 [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02005375 [P_SUPERSEDED] = { sizeof(struct p_block_ack), got_BlockAck },
Philipp Reisnerb411b362009-09-25 16:07:19 -07005376 [P_NEG_ACK] = { sizeof(struct p_block_ack), got_NegAck },
5377 [P_NEG_DREPLY] = { sizeof(struct p_block_ack), got_NegDReply },
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005378 [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), got_NegRSDReply },
Philipp Reisnerb411b362009-09-25 16:07:19 -07005379 [P_OV_RESULT] = { sizeof(struct p_block_ack), got_OVResult },
5380 [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck },
5381 [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
5382 [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync },
Philipp Reisner02918be2010-08-20 14:35:10 +02005383 [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip },
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005384 [P_RS_CANCEL] = { sizeof(struct p_block_ack), got_NegRSDReply },
5385 [P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), got_conn_RqSReply },
5386 [P_RETRY_WRITE] = { sizeof(struct p_block_ack), got_BlockAck },
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01005387};
Philipp Reisnerb411b362009-09-25 16:07:19 -07005388
5389int drbd_asender(struct drbd_thread *thi)
5390{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005391 struct drbd_connection *connection = thi->connection;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005392 struct asender_cmd *cmd = NULL;
Philipp Reisner77351055b2011-02-07 17:24:26 +01005393 struct packet_info pi;
Philipp Reisner257d0af2011-01-26 12:15:29 +01005394 int rv;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005395 void *buf = connection->meta.rbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005396 int received = 0;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005397 unsigned int header_size = drbd_header_size(connection);
Andreas Gruenbacher52b061a2011-03-30 11:38:49 +02005398 int expect = header_size;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005399 bool ping_timeout_active = false;
5400 struct net_conf *nc;
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005401 int ping_timeo, tcp_cork, ping_int;
Philipp Reisner3990e042013-03-27 14:08:48 +01005402 struct sched_param param = { .sched_priority = 2 };
Philipp Reisnerb411b362009-09-25 16:07:19 -07005403
Philipp Reisner3990e042013-03-27 14:08:48 +01005404 rv = sched_setscheduler(current, SCHED_RR, &param);
5405 if (rv < 0)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005406 drbd_err(connection, "drbd_asender: ERROR set priority, ret=%d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005407
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +01005408 while (get_t_state(thi) == RUNNING) {
Philipp Reisner80822282011-02-08 12:46:30 +01005409 drbd_thread_current_set_cpu(thi);
Philipp Reisner44ed1672011-04-19 17:10:19 +02005410
5411 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005412 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +02005413 ping_timeo = nc->ping_timeo;
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005414 tcp_cork = nc->tcp_cork;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005415 ping_int = nc->ping_int;
5416 rcu_read_unlock();
5417
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005418 if (test_and_clear_bit(SEND_PING, &connection->flags)) {
5419 if (drbd_send_ping(connection)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005420 drbd_err(connection, "drbd_send_ping has failed\n");
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01005421 goto reconnect;
5422 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005423 connection->meta.socket->sk->sk_rcvtimeo = ping_timeo * HZ / 10;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005424 ping_timeout_active = true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005425 }
5426
Philipp Reisner32862ec2011-02-08 16:41:01 +01005427 /* TODO: conditionally cork; it may hurt latency if we cork without
5428 much to send */
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005429 if (tcp_cork)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005430 drbd_tcp_cork(connection->meta.socket);
5431 if (connection_finish_peer_reqs(connection)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005432 drbd_err(connection, "connection_finish_peer_reqs() failed\n");
Philipp Reisner32862ec2011-02-08 16:41:01 +01005433 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005434 }
5435 /* but unconditionally uncork unless disabled */
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005436 if (tcp_cork)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005437 drbd_tcp_uncork(connection->meta.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005438
5439 /* short circuit, recv_msg would return EINTR anyways. */
5440 if (signal_pending(current))
5441 continue;
5442
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005443 rv = drbd_recv_short(connection->meta.socket, buf, expect-received, 0);
5444 clear_bit(SIGNAL_ASENDER, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005445
5446 flush_signals(current);
5447
5448 /* Note:
5449 * -EINTR (on meta) we got a signal
5450 * -EAGAIN (on meta) rcvtimeo expired
5451 * -ECONNRESET other side closed the connection
5452 * -ERESTARTSYS (on data) we got a signal
5453 * rv < 0 other than above: unexpected error!
5454 * rv == expected: full header or command
5455 * rv < expected: "woken" by signal during receive
5456 * rv == 0 : "connection shut down by peer"
5457 */
5458 if (likely(rv > 0)) {
5459 received += rv;
5460 buf += rv;
5461 } else if (rv == 0) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005462 if (test_bit(DISCONNECT_SENT, &connection->flags)) {
Philipp Reisnerb66623e2012-08-08 21:19:09 +02005463 long t;
5464 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005465 t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
Philipp Reisnerb66623e2012-08-08 21:19:09 +02005466 rcu_read_unlock();
5467
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005468 t = wait_event_timeout(connection->ping_wait,
5469 connection->cstate < C_WF_REPORT_PARAMS,
Philipp Reisnerb66623e2012-08-08 21:19:09 +02005470 t);
Philipp Reisner599377a2012-08-17 14:50:22 +02005471 if (t)
5472 break;
5473 }
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005474 drbd_err(connection, "meta connection shut down by peer.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005475 goto reconnect;
5476 } else if (rv == -EAGAIN) {
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02005477 /* If the data socket received something meanwhile,
5478 * that is good enough: peer is still alive. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005479 if (time_after(connection->last_received,
5480 jiffies - connection->meta.socket->sk->sk_rcvtimeo))
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02005481 continue;
Lars Ellenbergf36af182011-03-09 22:44:55 +01005482 if (ping_timeout_active) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005483 drbd_err(connection, "PingAck did not arrive in time.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005484 goto reconnect;
5485 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005486 set_bit(SEND_PING, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005487 continue;
5488 } else if (rv == -EINTR) {
5489 continue;
5490 } else {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005491 drbd_err(connection, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005492 goto reconnect;
5493 }
5494
5495 if (received == expect && cmd == NULL) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005496 if (decode_header(connection, connection->meta.rbuf, &pi))
Philipp Reisnerb411b362009-09-25 16:07:19 -07005497 goto reconnect;
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01005498 cmd = &asender_tbl[pi.cmd];
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005499 if (pi.cmd >= ARRAY_SIZE(asender_tbl) || !cmd->fn) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005500 drbd_err(connection, "Unexpected meta packet %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02005501 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005502 goto disconnect;
5503 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005504 expect = header_size + cmd->pkt_size;
Andreas Gruenbacher52b061a2011-03-30 11:38:49 +02005505 if (pi.size != expect - header_size) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005506 drbd_err(connection, "Wrong packet size on meta (c: %d, l: %d)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01005507 pi.cmd, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005508 goto reconnect;
Philipp Reisner257d0af2011-01-26 12:15:29 +01005509 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005510 }
5511 if (received == expect) {
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005512 bool err;
Philipp Reisnera4fbda82011-03-16 11:13:17 +01005513
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005514 err = cmd->fn(connection, &pi);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005515 if (err) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005516 drbd_err(connection, "%pf failed\n", cmd->fn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005517 goto reconnect;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005518 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005519
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005520 connection->last_received = jiffies;
Lars Ellenbergf36af182011-03-09 22:44:55 +01005521
Philipp Reisner44ed1672011-04-19 17:10:19 +02005522 if (cmd == &asender_tbl[P_PING_ACK]) {
5523 /* restore idle timeout */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005524 connection->meta.socket->sk->sk_rcvtimeo = ping_int * HZ;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005525 ping_timeout_active = false;
5526 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005527
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005528 buf = connection->meta.rbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005529 received = 0;
Andreas Gruenbacher52b061a2011-03-30 11:38:49 +02005530 expect = header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005531 cmd = NULL;
5532 }
5533 }
5534
5535 if (0) {
5536reconnect:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005537 conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
5538 conn_md_sync(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005539 }
5540 if (0) {
5541disconnect:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005542 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005543 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005544 clear_bit(SIGNAL_ASENDER, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005545
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005546 drbd_info(connection, "asender terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005547
5548 return 0;
5549}