blob: c7084188c2ae256edf5e648ad2a594d00557b2a8 [file] [log] [blame]
Philipp Reisnerb411b362009-09-25 16:07:19 -07001/*
2 drbd_receiver.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 drbd is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2, or (at your option)
13 any later version.
14
15 drbd is distributed in the hope that it will be useful,
16 but WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 GNU General Public License for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with drbd; see the file COPYING. If not, write to
22 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
23 */
24
25
Philipp Reisnerb411b362009-09-25 16:07:19 -070026#include <linux/module.h>
27
28#include <asm/uaccess.h>
29#include <net/sock.h>
30
Philipp Reisnerb411b362009-09-25 16:07:19 -070031#include <linux/drbd.h>
32#include <linux/fs.h>
33#include <linux/file.h>
34#include <linux/in.h>
35#include <linux/mm.h>
36#include <linux/memcontrol.h>
37#include <linux/mm_inline.h>
38#include <linux/slab.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070039#include <linux/pkt_sched.h>
40#define __KERNEL_SYSCALLS__
41#include <linux/unistd.h>
42#include <linux/vmalloc.h>
43#include <linux/random.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070044#include <linux/string.h>
45#include <linux/scatterlist.h>
46#include "drbd_int.h"
Andreas Gruenbachera3603a62011-05-30 11:47:37 +020047#include "drbd_protocol.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070048#include "drbd_req.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070049#include "drbd_vli.h"
50
Lars Ellenberg20c68fd2014-04-28 18:43:25 +020051#define PRO_FEATURES (FF_TRIM)
52
Philipp Reisner77351055b2011-02-07 17:24:26 +010053struct packet_info {
54 enum drbd_packet cmd;
Andreas Gruenbachere2857212011-03-25 00:57:38 +010055 unsigned int size;
56 unsigned int vnr;
Andreas Gruenbachere6589832011-03-30 12:54:42 +020057 void *data;
Philipp Reisner77351055b2011-02-07 17:24:26 +010058};
59
Philipp Reisnerb411b362009-09-25 16:07:19 -070060enum finish_epoch {
61 FE_STILL_LIVE,
62 FE_DESTROYED,
63 FE_RECYCLED,
64};
65
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +020066static int drbd_do_features(struct drbd_connection *connection);
67static int drbd_do_auth(struct drbd_connection *connection);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +020068static int drbd_disconnected(struct drbd_peer_device *);
Lars Ellenberga0fb3c42014-04-28 18:43:23 +020069static void conn_wait_active_ee_empty(struct drbd_connection *connection);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +020070static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *, struct drbd_epoch *, enum epoch_event);
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +010071static int e_end_block(struct drbd_work *, int);
Philipp Reisnerb411b362009-09-25 16:07:19 -070072
Philipp Reisnerb411b362009-09-25 16:07:19 -070073
74#define GFP_TRY (__GFP_HIGHMEM | __GFP_NOWARN)
75
Lars Ellenberg45bb9122010-05-14 17:10:48 +020076/*
77 * some helper functions to deal with single linked page lists,
78 * page->private being our "next" pointer.
79 */
80
81/* If at least n pages are linked at head, get n pages off.
82 * Otherwise, don't modify head, and return NULL.
83 * Locking is the responsibility of the caller.
84 */
85static struct page *page_chain_del(struct page **head, int n)
86{
87 struct page *page;
88 struct page *tmp;
89
90 BUG_ON(!n);
91 BUG_ON(!head);
92
93 page = *head;
Philipp Reisner23ce4222010-05-20 13:35:31 +020094
95 if (!page)
96 return NULL;
97
Lars Ellenberg45bb9122010-05-14 17:10:48 +020098 while (page) {
99 tmp = page_chain_next(page);
100 if (--n == 0)
101 break; /* found sufficient pages */
102 if (tmp == NULL)
103 /* insufficient pages, don't use any of them. */
104 return NULL;
105 page = tmp;
106 }
107
108 /* add end of list marker for the returned list */
109 set_page_private(page, 0);
110 /* actual return value, and adjustment of head */
111 page = *head;
112 *head = tmp;
113 return page;
114}
115
116/* may be used outside of locks to find the tail of a (usually short)
117 * "private" page chain, before adding it back to a global chain head
118 * with page_chain_add() under a spinlock. */
119static struct page *page_chain_tail(struct page *page, int *len)
120{
121 struct page *tmp;
122 int i = 1;
123 while ((tmp = page_chain_next(page)))
124 ++i, page = tmp;
125 if (len)
126 *len = i;
127 return page;
128}
129
130static int page_chain_free(struct page *page)
131{
132 struct page *tmp;
133 int i = 0;
134 page_chain_for_each_safe(page, tmp) {
135 put_page(page);
136 ++i;
137 }
138 return i;
139}
140
141static void page_chain_add(struct page **head,
142 struct page *chain_first, struct page *chain_last)
143{
144#if 1
145 struct page *tmp;
146 tmp = page_chain_tail(chain_first, NULL);
147 BUG_ON(tmp != chain_last);
148#endif
149
150 /* add chain to head */
151 set_page_private(chain_last, (unsigned long)*head);
152 *head = chain_first;
153}
154
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200155static struct page *__drbd_alloc_pages(struct drbd_device *device,
Andreas Gruenbacher18c2d522011-04-07 21:08:50 +0200156 unsigned int number)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700157{
158 struct page *page = NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200159 struct page *tmp = NULL;
Andreas Gruenbacher18c2d522011-04-07 21:08:50 +0200160 unsigned int i = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700161
162 /* Yes, testing drbd_pp_vacant outside the lock is racy.
163 * So what. It saves a spin_lock. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200164 if (drbd_pp_vacant >= number) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700165 spin_lock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200166 page = page_chain_del(&drbd_pp_pool, number);
167 if (page)
168 drbd_pp_vacant -= number;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700169 spin_unlock(&drbd_pp_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200170 if (page)
171 return page;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700172 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200173
Philipp Reisnerb411b362009-09-25 16:07:19 -0700174 /* GFP_TRY, because we must not cause arbitrary write-out: in a DRBD
175 * "criss-cross" setup, that might cause write-out on some other DRBD,
176 * which in turn might block on the other node at this very place. */
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200177 for (i = 0; i < number; i++) {
178 tmp = alloc_page(GFP_TRY);
179 if (!tmp)
180 break;
181 set_page_private(tmp, (unsigned long)page);
182 page = tmp;
183 }
184
185 if (i == number)
186 return page;
187
188 /* Not enough pages immediately available this time.
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200189 * No need to jump around here, drbd_alloc_pages will retry this
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200190 * function "soon". */
191 if (page) {
192 tmp = page_chain_tail(page, NULL);
193 spin_lock(&drbd_pp_lock);
194 page_chain_add(&drbd_pp_pool, page, tmp);
195 drbd_pp_vacant += i;
196 spin_unlock(&drbd_pp_lock);
197 }
198 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700199}
200
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200201static void reclaim_finished_net_peer_reqs(struct drbd_device *device,
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200202 struct list_head *to_be_freed)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700203{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200204 struct drbd_peer_request *peer_req, *tmp;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700205
206 /* The EEs are always appended to the end of the list. Since
207 they are sent in order over the wire, they have to finish
208 in order. As soon as we see the first not finished we can
209 stop to examine the list... */
210
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200211 list_for_each_entry_safe(peer_req, tmp, &device->net_ee, w.list) {
Andreas Gruenbacher045417f2011-04-07 21:34:24 +0200212 if (drbd_peer_req_has_active_page(peer_req))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700213 break;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200214 list_move(&peer_req->w.list, to_be_freed);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700215 }
216}
217
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200218static void drbd_kick_lo_and_reclaim_net(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700219{
220 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100221 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700222
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200223 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200224 reclaim_finished_net_peer_reqs(device, &reclaimed);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200225 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700226
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200227 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200228 drbd_free_net_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700229}
230
231/**
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200232 * drbd_alloc_pages() - Returns @number pages, retries forever (or until signalled)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200233 * @device: DRBD device.
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200234 * @number: number of pages requested
235 * @retry: whether to retry, if not enough pages are available right now
Philipp Reisnerb411b362009-09-25 16:07:19 -0700236 *
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200237 * Tries to allocate number pages, first from our own page pool, then from
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200238 * the kernel.
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200239 * Possibly retry until DRBD frees sufficient pages somewhere else.
240 *
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200241 * If this allocation would exceed the max_buffers setting, we throttle
242 * allocation (schedule_timeout) to give the system some room to breathe.
243 *
244 * We do not use max-buffers as hard limit, because it could lead to
245 * congestion and further to a distributed deadlock during online-verify or
246 * (checksum based) resync, if the max-buffers, socket buffer sizes and
247 * resync-rate settings are mis-configured.
248 *
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200249 * Returns a page chain linked via page->private.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700250 */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200251struct page *drbd_alloc_pages(struct drbd_peer_device *peer_device, unsigned int number,
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200252 bool retry)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700253{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200254 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700255 struct page *page = NULL;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200256 struct net_conf *nc;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700257 DEFINE_WAIT(wait);
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200258 unsigned int mxb;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700259
Philipp Reisner44ed1672011-04-19 17:10:19 +0200260 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200261 nc = rcu_dereference(peer_device->connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200262 mxb = nc ? nc->max_buffers : 1000000;
263 rcu_read_unlock();
264
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200265 if (atomic_read(&device->pp_in_use) < mxb)
266 page = __drbd_alloc_pages(device, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700267
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200268 while (page == NULL) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700269 prepare_to_wait(&drbd_pp_wait, &wait, TASK_INTERRUPTIBLE);
270
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200271 drbd_kick_lo_and_reclaim_net(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700272
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200273 if (atomic_read(&device->pp_in_use) < mxb) {
274 page = __drbd_alloc_pages(device, number);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700275 if (page)
276 break;
277 }
278
279 if (!retry)
280 break;
281
282 if (signal_pending(current)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200283 drbd_warn(device, "drbd_alloc_pages interrupted!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700284 break;
285 }
286
Lars Ellenberg0e49d7b2014-04-28 18:43:18 +0200287 if (schedule_timeout(HZ/10) == 0)
288 mxb = UINT_MAX;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700289 }
290 finish_wait(&drbd_pp_wait, &wait);
291
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200292 if (page)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200293 atomic_add(number, &device->pp_in_use);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700294 return page;
295}
296
Andreas Gruenbacherc37c8ec2011-04-07 21:02:09 +0200297/* Must not be used from irq, as that may deadlock: see drbd_alloc_pages.
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200298 * Is also used from inside an other spin_lock_irq(&resource->req_lock);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200299 * Either links the page chain back to the global pool,
300 * or returns all pages to the system. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200301static void drbd_free_pages(struct drbd_device *device, struct page *page, int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700302{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200303 atomic_t *a = is_net ? &device->pp_in_use_by_net : &device->pp_in_use;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700304 int i;
Lars Ellenberg435f0742010-09-06 12:30:25 +0200305
Lars Ellenberga73ff322012-06-25 19:15:38 +0200306 if (page == NULL)
307 return;
308
Philipp Reisner81a5d602011-02-22 19:53:16 -0500309 if (drbd_pp_vacant > (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count)
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200310 i = page_chain_free(page);
311 else {
312 struct page *tmp;
313 tmp = page_chain_tail(page, &i);
314 spin_lock(&drbd_pp_lock);
315 page_chain_add(&drbd_pp_pool, page, tmp);
316 drbd_pp_vacant += i;
317 spin_unlock(&drbd_pp_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700318 }
Lars Ellenberg435f0742010-09-06 12:30:25 +0200319 i = atomic_sub_return(i, a);
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200320 if (i < 0)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200321 drbd_warn(device, "ASSERTION FAILED: %s: %d < 0\n",
Lars Ellenberg435f0742010-09-06 12:30:25 +0200322 is_net ? "pp_in_use_by_net" : "pp_in_use", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700323 wake_up(&drbd_pp_wait);
324}
325
326/*
327You need to hold the req_lock:
328 _drbd_wait_ee_list_empty()
329
330You must not have the req_lock:
Andreas Gruenbacher3967deb2011-04-06 16:16:56 +0200331 drbd_free_peer_req()
Andreas Gruenbacher0db55362011-04-06 16:09:15 +0200332 drbd_alloc_peer_req()
Andreas Gruenbacher7721f562011-04-06 17:14:02 +0200333 drbd_free_peer_reqs()
Philipp Reisnerb411b362009-09-25 16:07:19 -0700334 drbd_ee_fix_bhs()
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200335 drbd_finish_peer_reqs()
Philipp Reisnerb411b362009-09-25 16:07:19 -0700336 drbd_clear_done_ee()
337 drbd_wait_ee_list_empty()
338*/
339
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100340struct drbd_peer_request *
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200341drbd_alloc_peer_req(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
Lars Ellenberga0fb3c42014-04-28 18:43:23 +0200342 unsigned int data_size, bool has_payload, gfp_t gfp_mask) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700343{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200344 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100345 struct drbd_peer_request *peer_req;
Lars Ellenberga73ff322012-06-25 19:15:38 +0200346 struct page *page = NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200347 unsigned nr_pages = (data_size + PAGE_SIZE -1) >> PAGE_SHIFT;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700348
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200349 if (drbd_insert_fault(device, DRBD_FAULT_AL_EE))
Philipp Reisnerb411b362009-09-25 16:07:19 -0700350 return NULL;
351
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100352 peer_req = mempool_alloc(drbd_ee_mempool, gfp_mask & ~__GFP_HIGHMEM);
353 if (!peer_req) {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700354 if (!(gfp_mask & __GFP_NOWARN))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +0200355 drbd_err(device, "%s: allocation failed\n", __func__);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700356 return NULL;
357 }
358
Lars Ellenberga0fb3c42014-04-28 18:43:23 +0200359 if (has_payload && data_size) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200360 page = drbd_alloc_pages(peer_device, nr_pages, (gfp_mask & __GFP_WAIT));
Lars Ellenberga73ff322012-06-25 19:15:38 +0200361 if (!page)
362 goto fail;
363 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700364
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100365 drbd_clear_interval(&peer_req->i);
366 peer_req->i.size = data_size;
367 peer_req->i.sector = sector;
368 peer_req->i.local = false;
369 peer_req->i.waiting = false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700370
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100371 peer_req->epoch = NULL;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200372 peer_req->peer_device = peer_device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100373 peer_req->pages = page;
374 atomic_set(&peer_req->pending_bios, 0);
375 peer_req->flags = 0;
Andreas Gruenbacher9a8e7752011-01-11 14:04:09 +0100376 /*
377 * The block_id is opaque to the receiver. It is not endianness
378 * converted, and sent back to the sender unchanged.
379 */
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100380 peer_req->block_id = id;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700381
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100382 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700383
Lars Ellenberg45bb9122010-05-14 17:10:48 +0200384 fail:
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100385 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700386 return NULL;
387}
388
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200389void __drbd_free_peer_req(struct drbd_device *device, struct drbd_peer_request *peer_req,
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +0100390 int is_net)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700391{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100392 if (peer_req->flags & EE_HAS_DIGEST)
393 kfree(peer_req->digest);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200394 drbd_free_pages(device, peer_req->pages, is_net);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +0200395 D_ASSERT(device, atomic_read(&peer_req->pending_bios) == 0);
396 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100397 mempool_free(peer_req, drbd_ee_mempool);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700398}
399
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200400int drbd_free_peer_reqs(struct drbd_device *device, struct list_head *list)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700401{
402 LIST_HEAD(work_list);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100403 struct drbd_peer_request *peer_req, *t;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700404 int count = 0;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200405 int is_net = list == &device->net_ee;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700406
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200407 spin_lock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700408 list_splice_init(list, &work_list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200409 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700410
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200411 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200412 __drbd_free_peer_req(device, peer_req, is_net);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700413 count++;
414 }
415 return count;
416}
417
Philipp Reisnerb411b362009-09-25 16:07:19 -0700418/*
Andreas Gruenbachera990be42011-04-06 17:56:48 +0200419 * See also comments in _req_mod(,BARRIER_ACKED) and receive_Barrier.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700420 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200421static int drbd_finish_peer_reqs(struct drbd_device *device)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700422{
423 LIST_HEAD(work_list);
424 LIST_HEAD(reclaimed);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +0100425 struct drbd_peer_request *peer_req, *t;
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100426 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700427
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200428 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200429 reclaim_finished_net_peer_reqs(device, &reclaimed);
430 list_splice_init(&device->done_ee, &work_list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200431 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700432
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200433 list_for_each_entry_safe(peer_req, t, &reclaimed, w.list)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200434 drbd_free_net_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700435
436 /* possible callbacks here:
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +0200437 * e_end_block, and e_end_resync_block, e_send_superseded.
Philipp Reisnerb411b362009-09-25 16:07:19 -0700438 * all ignore the last argument.
439 */
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200440 list_for_each_entry_safe(peer_req, t, &work_list, w.list) {
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100441 int err2;
442
Philipp Reisnerb411b362009-09-25 16:07:19 -0700443 /* list_del not necessary, next/prev members not touched */
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +0200444 err2 = peer_req->w.cb(&peer_req->w, !!err);
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100445 if (!err)
446 err = err2;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200447 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700448 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200449 wake_up(&device->ee_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700450
Andreas Gruenbachere2b30322011-03-16 17:16:12 +0100451 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700452}
453
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200454static void _drbd_wait_ee_list_empty(struct drbd_device *device,
Andreas Gruenbacherd4da1532011-04-07 00:06:56 +0200455 struct list_head *head)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700456{
457 DEFINE_WAIT(wait);
458
459 /* avoids spin_lock/unlock
460 * and calling prepare_to_wait in the fast path */
461 while (!list_empty(head)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200462 prepare_to_wait(&device->ee_wait, &wait, TASK_UNINTERRUPTIBLE);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200463 spin_unlock_irq(&device->resource->req_lock);
Jens Axboe7eaceac2011-03-10 08:52:07 +0100464 io_schedule();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200465 finish_wait(&device->ee_wait, &wait);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200466 spin_lock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700467 }
468}
469
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200470static void drbd_wait_ee_list_empty(struct drbd_device *device,
Andreas Gruenbacherd4da1532011-04-07 00:06:56 +0200471 struct list_head *head)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700472{
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200473 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200474 _drbd_wait_ee_list_empty(device, head);
Andreas Gruenbacher05008132011-07-07 14:19:42 +0200475 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700476}
477
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100478static int drbd_recv_short(struct socket *sock, void *buf, size_t size, int flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700479{
Philipp Reisnerb411b362009-09-25 16:07:19 -0700480 struct kvec iov = {
481 .iov_base = buf,
482 .iov_len = size,
483 };
484 struct msghdr msg = {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700485 .msg_flags = (flags ? flags : MSG_WAITALL | MSG_NOSIGNAL)
486 };
Al Virof730c842014-02-08 21:07:38 -0500487 return kernel_recvmsg(sock, &msg, &iov, 1, size, msg.msg_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700488}
489
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200490static int drbd_recv(struct drbd_connection *connection, void *buf, size_t size)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700491{
Philipp Reisnerb411b362009-09-25 16:07:19 -0700492 int rv;
493
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200494 rv = drbd_recv_short(connection->data.socket, buf, size, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700495
Philipp Reisnerdbd08202012-08-17 16:55:47 +0200496 if (rv < 0) {
497 if (rv == -ECONNRESET)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200498 drbd_info(connection, "sock was reset by peer\n");
Philipp Reisnerdbd08202012-08-17 16:55:47 +0200499 else if (rv != -ERESTARTSYS)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200500 drbd_err(connection, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerdbd08202012-08-17 16:55:47 +0200501 } else if (rv == 0) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200502 if (test_bit(DISCONNECT_SENT, &connection->flags)) {
Philipp Reisnerb66623e2012-08-08 21:19:09 +0200503 long t;
504 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200505 t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
Philipp Reisnerb66623e2012-08-08 21:19:09 +0200506 rcu_read_unlock();
507
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200508 t = wait_event_timeout(connection->ping_wait, connection->cstate < C_WF_REPORT_PARAMS, t);
Philipp Reisnerb66623e2012-08-08 21:19:09 +0200509
Philipp Reisner599377a2012-08-17 14:50:22 +0200510 if (t)
511 goto out;
512 }
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200513 drbd_info(connection, "sock was shut down by peer\n");
Philipp Reisner599377a2012-08-17 14:50:22 +0200514 }
515
Philipp Reisnerb411b362009-09-25 16:07:19 -0700516 if (rv != size)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200517 conn_request_state(connection, NS(conn, C_BROKEN_PIPE), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700518
Philipp Reisner599377a2012-08-17 14:50:22 +0200519out:
Philipp Reisnerb411b362009-09-25 16:07:19 -0700520 return rv;
521}
522
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200523static int drbd_recv_all(struct drbd_connection *connection, void *buf, size_t size)
Andreas Gruenbacherc6967742011-03-17 17:15:20 +0100524{
525 int err;
526
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200527 err = drbd_recv(connection, buf, size);
Andreas Gruenbacherc6967742011-03-17 17:15:20 +0100528 if (err != size) {
529 if (err >= 0)
530 err = -EIO;
531 } else
532 err = 0;
533 return err;
534}
535
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200536static int drbd_recv_all_warn(struct drbd_connection *connection, void *buf, size_t size)
Andreas Gruenbachera5c31902011-03-24 03:28:04 +0100537{
538 int err;
539
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200540 err = drbd_recv_all(connection, buf, size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +0100541 if (err && !signal_pending(current))
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200542 drbd_warn(connection, "short read (expected size %d)\n", (int)size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +0100543 return err;
544}
545
Lars Ellenberg5dbf1672010-05-25 16:18:01 +0200546/* quoting tcp(7):
547 * On individual connections, the socket buffer size must be set prior to the
548 * listen(2) or connect(2) calls in order to have it take effect.
549 * This is our wrapper to do so.
550 */
551static void drbd_setbufsize(struct socket *sock, unsigned int snd,
552 unsigned int rcv)
553{
554 /* open coded SO_SNDBUF, SO_RCVBUF */
555 if (snd) {
556 sock->sk->sk_sndbuf = snd;
557 sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
558 }
559 if (rcv) {
560 sock->sk->sk_rcvbuf = rcv;
561 sock->sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
562 }
563}
564
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200565static struct socket *drbd_try_connect(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700566{
567 const char *what;
568 struct socket *sock;
569 struct sockaddr_in6 src_in6;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200570 struct sockaddr_in6 peer_in6;
571 struct net_conf *nc;
572 int err, peer_addr_len, my_addr_len;
Andreas Gruenbacher69ef82d2011-05-11 14:34:35 +0200573 int sndbuf_size, rcvbuf_size, connect_int;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700574 int disconnect_on_error = 1;
575
Philipp Reisner44ed1672011-04-19 17:10:19 +0200576 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200577 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200578 if (!nc) {
579 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700580 return NULL;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200581 }
Philipp Reisner44ed1672011-04-19 17:10:19 +0200582 sndbuf_size = nc->sndbuf_size;
583 rcvbuf_size = nc->rcvbuf_size;
Andreas Gruenbacher69ef82d2011-05-11 14:34:35 +0200584 connect_int = nc->connect_int;
Andreas Gruenbacher089c0752011-06-14 18:28:09 +0200585 rcu_read_unlock();
Philipp Reisner44ed1672011-04-19 17:10:19 +0200586
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200587 my_addr_len = min_t(int, connection->my_addr_len, sizeof(src_in6));
588 memcpy(&src_in6, &connection->my_addr, my_addr_len);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200589
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200590 if (((struct sockaddr *)&connection->my_addr)->sa_family == AF_INET6)
Philipp Reisner44ed1672011-04-19 17:10:19 +0200591 src_in6.sin6_port = 0;
592 else
593 ((struct sockaddr_in *)&src_in6)->sin_port = 0; /* AF_INET & AF_SCI */
594
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200595 peer_addr_len = min_t(int, connection->peer_addr_len, sizeof(src_in6));
596 memcpy(&peer_in6, &connection->peer_addr, peer_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700597
598 what = "sock_create_kern";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200599 err = sock_create_kern(((struct sockaddr *)&src_in6)->sa_family,
600 SOCK_STREAM, IPPROTO_TCP, &sock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700601 if (err < 0) {
602 sock = NULL;
603 goto out;
604 }
605
606 sock->sk->sk_rcvtimeo =
Andreas Gruenbacher69ef82d2011-05-11 14:34:35 +0200607 sock->sk->sk_sndtimeo = connect_int * HZ;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200608 drbd_setbufsize(sock, sndbuf_size, rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700609
610 /* explicitly bind to the configured IP as source IP
611 * for the outgoing connections.
612 * This is needed for multihomed hosts and to be
613 * able to use lo: interfaces for drbd.
614 * Make sure to use 0 as port number, so linux selects
615 * a free one dynamically.
616 */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700617 what = "bind before connect";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200618 err = sock->ops->bind(sock, (struct sockaddr *) &src_in6, my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700619 if (err < 0)
620 goto out;
621
622 /* connect may fail, peer not yet available.
623 * stay C_WF_CONNECTION, don't go Disconnecting! */
624 disconnect_on_error = 0;
625 what = "connect";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200626 err = sock->ops->connect(sock, (struct sockaddr *) &peer_in6, peer_addr_len, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700627
628out:
629 if (err < 0) {
630 if (sock) {
631 sock_release(sock);
632 sock = NULL;
633 }
634 switch (-err) {
635 /* timeout, busy, signal pending */
636 case ETIMEDOUT: case EAGAIN: case EINPROGRESS:
637 case EINTR: case ERESTARTSYS:
638 /* peer not (yet) available, network problem */
639 case ECONNREFUSED: case ENETUNREACH:
640 case EHOSTDOWN: case EHOSTUNREACH:
641 disconnect_on_error = 0;
642 break;
643 default:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200644 drbd_err(connection, "%s failed, err = %d\n", what, err);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700645 }
646 if (disconnect_on_error)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200647 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700648 }
Philipp Reisner44ed1672011-04-19 17:10:19 +0200649
Philipp Reisnerb411b362009-09-25 16:07:19 -0700650 return sock;
651}
652
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200653struct accept_wait_data {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200654 struct drbd_connection *connection;
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200655 struct socket *s_listen;
656 struct completion door_bell;
657 void (*original_sk_state_change)(struct sock *sk);
658
659};
660
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200661static void drbd_incoming_connection(struct sock *sk)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700662{
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200663 struct accept_wait_data *ad = sk->sk_user_data;
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200664 void (*state_change)(struct sock *sk);
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200665
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200666 state_change = ad->original_sk_state_change;
667 if (sk->sk_state == TCP_ESTABLISHED)
668 complete(&ad->door_bell);
669 state_change(sk);
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200670}
671
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200672static int prepare_listen_socket(struct drbd_connection *connection, struct accept_wait_data *ad)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700673{
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200674 int err, sndbuf_size, rcvbuf_size, my_addr_len;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200675 struct sockaddr_in6 my_addr;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200676 struct socket *s_listen;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200677 struct net_conf *nc;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700678 const char *what;
679
Philipp Reisner44ed1672011-04-19 17:10:19 +0200680 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200681 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +0200682 if (!nc) {
683 rcu_read_unlock();
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200684 return -EIO;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200685 }
Philipp Reisner44ed1672011-04-19 17:10:19 +0200686 sndbuf_size = nc->sndbuf_size;
687 rcvbuf_size = nc->rcvbuf_size;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200688 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -0700689
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200690 my_addr_len = min_t(int, connection->my_addr_len, sizeof(struct sockaddr_in6));
691 memcpy(&my_addr, &connection->my_addr, my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700692
693 what = "sock_create_kern";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200694 err = sock_create_kern(((struct sockaddr *)&my_addr)->sa_family,
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200695 SOCK_STREAM, IPPROTO_TCP, &s_listen);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700696 if (err) {
697 s_listen = NULL;
698 goto out;
699 }
700
Philipp Reisner98683652012-11-09 14:18:43 +0100701 s_listen->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
Philipp Reisner44ed1672011-04-19 17:10:19 +0200702 drbd_setbufsize(s_listen, sndbuf_size, rcvbuf_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700703
704 what = "bind before listen";
Philipp Reisner44ed1672011-04-19 17:10:19 +0200705 err = s_listen->ops->bind(s_listen, (struct sockaddr *)&my_addr, my_addr_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700706 if (err < 0)
707 goto out;
708
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200709 ad->s_listen = s_listen;
710 write_lock_bh(&s_listen->sk->sk_callback_lock);
711 ad->original_sk_state_change = s_listen->sk->sk_state_change;
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200712 s_listen->sk->sk_state_change = drbd_incoming_connection;
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200713 s_listen->sk->sk_user_data = ad;
714 write_unlock_bh(&s_listen->sk->sk_callback_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700715
Philipp Reisner2820fd32012-07-12 10:22:48 +0200716 what = "listen";
717 err = s_listen->ops->listen(s_listen, 5);
718 if (err < 0)
719 goto out;
720
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200721 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700722out:
723 if (s_listen)
724 sock_release(s_listen);
725 if (err < 0) {
726 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200727 drbd_err(connection, "%s failed, err = %d\n", what, err);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200728 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700729 }
730 }
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200731
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200732 return -EIO;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200733}
734
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200735static void unregister_state_change(struct sock *sk, struct accept_wait_data *ad)
736{
737 write_lock_bh(&sk->sk_callback_lock);
738 sk->sk_state_change = ad->original_sk_state_change;
739 sk->sk_user_data = NULL;
740 write_unlock_bh(&sk->sk_callback_lock);
741}
742
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200743static struct socket *drbd_wait_for_connect(struct drbd_connection *connection, struct accept_wait_data *ad)
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200744{
745 int timeo, connect_int, err = 0;
746 struct socket *s_estab = NULL;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200747 struct net_conf *nc;
748
749 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200750 nc = rcu_dereference(connection->net_conf);
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200751 if (!nc) {
752 rcu_read_unlock();
753 return NULL;
754 }
755 connect_int = nc->connect_int;
756 rcu_read_unlock();
757
758 timeo = connect_int * HZ;
Akinobu Mita38b682b22013-04-29 16:21:31 -0700759 /* 28.5% random jitter */
760 timeo += (prandom_u32() & 1) ? timeo / 7 : -timeo / 7;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200761
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200762 err = wait_for_completion_interruptible_timeout(&ad->door_bell, timeo);
763 if (err <= 0)
764 return NULL;
Philipp Reisner1f3e5092012-07-12 11:08:34 +0200765
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200766 err = kernel_accept(ad->s_listen, &s_estab, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700767 if (err < 0) {
768 if (err != -EAGAIN && err != -EINTR && err != -ERESTARTSYS) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200769 drbd_err(connection, "accept failed, err = %d\n", err);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200770 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700771 }
772 }
Philipp Reisnerb411b362009-09-25 16:07:19 -0700773
Andreas Gruenbacher715306f2012-08-10 17:00:30 +0200774 if (s_estab)
775 unregister_state_change(s_estab->sk, ad);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700776
777 return s_estab;
778}
779
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200780static int decode_header(struct drbd_connection *, void *, struct packet_info *);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700781
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200782static int send_first_packet(struct drbd_connection *connection, struct drbd_socket *sock,
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200783 enum drbd_packet cmd)
784{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200785 if (!conn_prepare_command(connection, sock))
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200786 return -EIO;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200787 return conn_send_command(connection, sock, cmd, 0, NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700788}
789
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200790static int receive_first_packet(struct drbd_connection *connection, struct socket *sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700791{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200792 unsigned int header_size = drbd_header_size(connection);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200793 struct packet_info pi;
794 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700795
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200796 err = drbd_recv_short(sock, connection->data.rbuf, header_size, 0);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200797 if (err != header_size) {
798 if (err >= 0)
799 err = -EIO;
800 return err;
801 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200802 err = decode_header(connection, connection->data.rbuf, &pi);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +0200803 if (err)
804 return err;
805 return pi.cmd;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700806}
807
808/**
809 * drbd_socket_okay() - Free the socket if its connection is not okay
Philipp Reisnerb411b362009-09-25 16:07:19 -0700810 * @sock: pointer to the pointer to the socket.
811 */
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100812static int drbd_socket_okay(struct socket **sock)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700813{
814 int rr;
815 char tb[4];
816
817 if (!*sock)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100818 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700819
Philipp Reisnerdbd9eea2011-02-07 15:34:16 +0100820 rr = drbd_recv_short(*sock, tb, 4, MSG_DONTWAIT | MSG_PEEK);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700821
822 if (rr > 0 || rr == -EAGAIN) {
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100823 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700824 } else {
825 sock_release(*sock);
826 *sock = NULL;
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100827 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700828 }
829}
Philipp Reisner2325eb62011-03-15 16:56:18 +0100830/* Gets called if a connection is established, or if a new minor gets created
831 in a connection */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200832int drbd_connected(struct drbd_peer_device *peer_device)
Philipp Reisner907599e2011-02-08 11:25:37 +0100833{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200834 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100835 int err;
Philipp Reisner907599e2011-02-08 11:25:37 +0100836
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200837 atomic_set(&device->packet_seq, 0);
838 device->peer_seq = 0;
Philipp Reisner907599e2011-02-08 11:25:37 +0100839
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200840 device->state_mutex = peer_device->connection->agreed_pro_version < 100 ?
841 &peer_device->connection->cstate_mutex :
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200842 &device->own_state_mutex;
Philipp Reisner8410da82011-02-11 20:11:10 +0100843
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200844 err = drbd_send_sync_param(peer_device);
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100845 if (!err)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200846 err = drbd_send_sizes(peer_device, 0, 0);
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100847 if (!err)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200848 err = drbd_send_uuids(peer_device);
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100849 if (!err)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +0200850 err = drbd_send_current_state(peer_device);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +0200851 clear_bit(USE_DEGR_WFC_T, &device->flags);
852 clear_bit(RESIZE_PENDING, &device->flags);
853 atomic_set(&device->ap_in_flight, 0);
854 mod_timer(&device->request_timer, jiffies + HZ); /* just start it here. */
Andreas Gruenbacher0829f5e2011-03-24 14:31:22 +0100855 return err;
Philipp Reisner907599e2011-02-08 11:25:37 +0100856}
Philipp Reisnerb411b362009-09-25 16:07:19 -0700857
858/*
859 * return values:
860 * 1 yes, we have a valid connection
861 * 0 oops, did not work out, please try again
862 * -1 peer talks different language,
863 * no point in trying again, please go standalone.
864 * -2 We do not have a network config...
865 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200866static int conn_connect(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700867{
Philipp Reisner7da35862011-12-19 22:42:56 +0100868 struct drbd_socket sock, msock;
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +0200869 struct drbd_peer_device *peer_device;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200870 struct net_conf *nc;
Philipp Reisner92f14952012-08-01 11:41:01 +0200871 int vnr, timeout, h, ok;
Philipp Reisner08b165b2011-09-05 16:22:33 +0200872 bool discard_my_data;
Philipp Reisner197296f2012-03-26 16:47:11 +0200873 enum drbd_state_rv rv;
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200874 struct accept_wait_data ad = {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200875 .connection = connection,
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200876 .door_bell = COMPLETION_INITIALIZER_ONSTACK(ad.door_bell),
877 };
Philipp Reisnerb411b362009-09-25 16:07:19 -0700878
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200879 clear_bit(DISCONNECT_SENT, &connection->flags);
880 if (conn_request_state(connection, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700881 return -2;
882
Philipp Reisner7da35862011-12-19 22:42:56 +0100883 mutex_init(&sock.mutex);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200884 sock.sbuf = connection->data.sbuf;
885 sock.rbuf = connection->data.rbuf;
Philipp Reisner7da35862011-12-19 22:42:56 +0100886 sock.socket = NULL;
887 mutex_init(&msock.mutex);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200888 msock.sbuf = connection->meta.sbuf;
889 msock.rbuf = connection->meta.rbuf;
Philipp Reisner7da35862011-12-19 22:42:56 +0100890 msock.socket = NULL;
891
Andreas Gruenbacher0916e0e2011-03-21 14:10:15 +0100892 /* Assume that the peer only understands protocol 80 until we know better. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200893 connection->agreed_pro_version = 80;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700894
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200895 if (prepare_listen_socket(connection, &ad))
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200896 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700897
898 do {
Andreas Gruenbacher2bf89622011-03-28 16:33:12 +0200899 struct socket *s;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700900
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200901 s = drbd_try_connect(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700902 if (s) {
Philipp Reisner7da35862011-12-19 22:42:56 +0100903 if (!sock.socket) {
904 sock.socket = s;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200905 send_first_packet(connection, &sock, P_INITIAL_DATA);
Philipp Reisner7da35862011-12-19 22:42:56 +0100906 } else if (!msock.socket) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200907 clear_bit(RESOLVE_CONFLICTS, &connection->flags);
Philipp Reisner7da35862011-12-19 22:42:56 +0100908 msock.socket = s;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200909 send_first_packet(connection, &msock, P_INITIAL_META);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700910 } else {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200911 drbd_err(connection, "Logic error in conn_connect()\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700912 goto out_release_sockets;
913 }
914 }
915
Philipp Reisner7da35862011-12-19 22:42:56 +0100916 if (sock.socket && msock.socket) {
917 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200918 nc = rcu_dereference(connection->net_conf);
Philipp Reisner7da35862011-12-19 22:42:56 +0100919 timeout = nc->ping_timeo * HZ / 10;
920 rcu_read_unlock();
921 schedule_timeout_interruptible(timeout);
922 ok = drbd_socket_okay(&sock.socket);
923 ok = drbd_socket_okay(&msock.socket) && ok;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700924 if (ok)
925 break;
926 }
927
928retry:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200929 s = drbd_wait_for_connect(connection, &ad);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700930 if (s) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200931 int fp = receive_first_packet(connection, s);
Philipp Reisner7da35862011-12-19 22:42:56 +0100932 drbd_socket_okay(&sock.socket);
933 drbd_socket_okay(&msock.socket);
Philipp Reisner92f14952012-08-01 11:41:01 +0200934 switch (fp) {
Andreas Gruenbachere5d6f332011-03-28 16:44:40 +0200935 case P_INITIAL_DATA:
Philipp Reisner7da35862011-12-19 22:42:56 +0100936 if (sock.socket) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200937 drbd_warn(connection, "initial packet S crossed\n");
Philipp Reisner7da35862011-12-19 22:42:56 +0100938 sock_release(sock.socket);
Philipp Reisner80c6eed2012-08-01 14:53:39 +0200939 sock.socket = s;
940 goto randomize;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700941 }
Philipp Reisner7da35862011-12-19 22:42:56 +0100942 sock.socket = s;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700943 break;
Andreas Gruenbachere5d6f332011-03-28 16:44:40 +0200944 case P_INITIAL_META:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200945 set_bit(RESOLVE_CONFLICTS, &connection->flags);
Philipp Reisner7da35862011-12-19 22:42:56 +0100946 if (msock.socket) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200947 drbd_warn(connection, "initial packet M crossed\n");
Philipp Reisner7da35862011-12-19 22:42:56 +0100948 sock_release(msock.socket);
Philipp Reisner80c6eed2012-08-01 14:53:39 +0200949 msock.socket = s;
950 goto randomize;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700951 }
Philipp Reisner7da35862011-12-19 22:42:56 +0100952 msock.socket = s;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700953 break;
954 default:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +0200955 drbd_warn(connection, "Error receiving initial packet\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -0700956 sock_release(s);
Philipp Reisner80c6eed2012-08-01 14:53:39 +0200957randomize:
Akinobu Mita38b682b22013-04-29 16:21:31 -0700958 if (prandom_u32() & 1)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700959 goto retry;
960 }
961 }
962
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200963 if (connection->cstate <= C_DISCONNECTING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700964 goto out_release_sockets;
965 if (signal_pending(current)) {
966 flush_signals(current);
967 smp_rmb();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200968 if (get_t_state(&connection->receiver) == EXITING)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700969 goto out_release_sockets;
970 }
971
Philipp Reisnerb666dbf2012-07-26 14:12:59 +0200972 ok = drbd_socket_okay(&sock.socket);
973 ok = drbd_socket_okay(&msock.socket) && ok;
974 } while (!ok);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700975
Philipp Reisner7a426fd2012-07-12 14:22:37 +0200976 if (ad.s_listen)
977 sock_release(ad.s_listen);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700978
Philipp Reisner98683652012-11-09 14:18:43 +0100979 sock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
980 msock.socket->sk->sk_reuse = SK_CAN_REUSE; /* SO_REUSEADDR */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700981
Philipp Reisner7da35862011-12-19 22:42:56 +0100982 sock.socket->sk->sk_allocation = GFP_NOIO;
983 msock.socket->sk->sk_allocation = GFP_NOIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700984
Philipp Reisner7da35862011-12-19 22:42:56 +0100985 sock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
986 msock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700987
Philipp Reisnerb411b362009-09-25 16:07:19 -0700988 /* NOT YET ...
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200989 * sock.socket->sk->sk_sndtimeo = connection->net_conf->timeout*HZ/10;
Philipp Reisner7da35862011-12-19 22:42:56 +0100990 * sock.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
Andreas Gruenbacher60381782011-03-28 17:05:50 +0200991 * first set it to the P_CONNECTION_FEATURES timeout,
Philipp Reisnerb411b362009-09-25 16:07:19 -0700992 * which we set to 4x the configured ping_timeout. */
Philipp Reisner44ed1672011-04-19 17:10:19 +0200993 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +0200994 nc = rcu_dereference(connection->net_conf);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700995
Philipp Reisner7da35862011-12-19 22:42:56 +0100996 sock.socket->sk->sk_sndtimeo =
997 sock.socket->sk->sk_rcvtimeo = nc->ping_timeo*4*HZ/10;
Philipp Reisner44ed1672011-04-19 17:10:19 +0200998
Philipp Reisner7da35862011-12-19 22:42:56 +0100999 msock.socket->sk->sk_rcvtimeo = nc->ping_int*HZ;
Philipp Reisner44ed1672011-04-19 17:10:19 +02001000 timeout = nc->timeout * HZ / 10;
Philipp Reisner08b165b2011-09-05 16:22:33 +02001001 discard_my_data = nc->discard_my_data;
Philipp Reisner44ed1672011-04-19 17:10:19 +02001002 rcu_read_unlock();
1003
Philipp Reisner7da35862011-12-19 22:42:56 +01001004 msock.socket->sk->sk_sndtimeo = timeout;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001005
1006 /* we don't want delays.
Lucas De Marchi25985ed2011-03-30 22:57:33 -03001007 * we use TCP_CORK where appropriate, though */
Philipp Reisner7da35862011-12-19 22:42:56 +01001008 drbd_tcp_nodelay(sock.socket);
1009 drbd_tcp_nodelay(msock.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001010
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001011 connection->data.socket = sock.socket;
1012 connection->meta.socket = msock.socket;
1013 connection->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001014
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001015 h = drbd_do_features(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001016 if (h <= 0)
1017 return h;
1018
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001019 if (connection->cram_hmac_tfm) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001020 /* drbd_request_state(device, NS(conn, WFAuth)); */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001021 switch (drbd_do_auth(connection)) {
Johannes Thomab10d96c2010-01-07 16:02:50 +01001022 case -1:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001023 drbd_err(connection, "Authentication of peer failed\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001024 return -1;
Johannes Thomab10d96c2010-01-07 16:02:50 +01001025 case 0:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001026 drbd_err(connection, "Authentication of peer failed, trying again.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01001027 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001028 }
1029 }
1030
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001031 connection->data.socket->sk->sk_sndtimeo = timeout;
1032 connection->data.socket->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001033
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001034 if (drbd_send_protocol(connection) == -EOPNOTSUPP)
Philipp Reisner7e2455c2010-04-22 14:50:23 +02001035 return -1;
Philipp Reisner1e86ac42011-08-04 10:33:08 +02001036
Philipp Reisner31007742014-04-28 18:43:12 +02001037 /* Prevent a race between resync-handshake and
1038 * being promoted to Primary.
1039 *
1040 * Grab and release the state mutex, so we know that any current
1041 * drbd_set_role() is finished, and any incoming drbd_set_role
1042 * will see the STATE_SENT flag, and wait for it to be cleared.
1043 */
1044 idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
1045 mutex_lock(peer_device->device->state_mutex);
1046
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001047 set_bit(STATE_SENT, &connection->flags);
Philipp Reisner197296f2012-03-26 16:47:11 +02001048
Philipp Reisner31007742014-04-28 18:43:12 +02001049 idr_for_each_entry(&connection->peer_devices, peer_device, vnr)
1050 mutex_unlock(peer_device->device->state_mutex);
1051
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02001052 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001053 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1054 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001055 kref_get(&device->kref);
Andreas Gruenbacher26ea8f92013-06-25 16:50:03 +02001056 rcu_read_unlock();
1057
Philipp Reisner08b165b2011-09-05 16:22:33 +02001058 if (discard_my_data)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001059 set_bit(DISCARD_MY_DATA, &device->flags);
Philipp Reisner08b165b2011-09-05 16:22:33 +02001060 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001061 clear_bit(DISCARD_MY_DATA, &device->flags);
Philipp Reisner08b165b2011-09-05 16:22:33 +02001062
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001063 drbd_connected(peer_device);
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001064 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02001065 rcu_read_lock();
1066 }
1067 rcu_read_unlock();
1068
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001069 rv = conn_request_state(connection, NS(conn, C_WF_REPORT_PARAMS), CS_VERBOSE);
1070 if (rv < SS_SUCCESS || connection->cstate != C_WF_REPORT_PARAMS) {
1071 clear_bit(STATE_SENT, &connection->flags);
Philipp Reisner1e86ac42011-08-04 10:33:08 +02001072 return 0;
Philipp Reisnera1096a62012-04-06 12:07:34 +02001073 }
Philipp Reisner1e86ac42011-08-04 10:33:08 +02001074
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001075 drbd_thread_start(&connection->asender);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001076
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001077 mutex_lock(&connection->resource->conf_update);
Philipp Reisner08b165b2011-09-05 16:22:33 +02001078 /* The discard_my_data flag is a single-shot modifier to the next
1079 * connection attempt, the handshake of which is now well underway.
1080 * No need for rcu style copying of the whole struct
1081 * just to clear a single value. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001082 connection->net_conf->discard_my_data = 0;
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001083 mutex_unlock(&connection->resource->conf_update);
Philipp Reisner08b165b2011-09-05 16:22:33 +02001084
Philipp Reisnerd3fcb492011-04-13 14:46:05 -07001085 return h;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001086
1087out_release_sockets:
Philipp Reisner7a426fd2012-07-12 14:22:37 +02001088 if (ad.s_listen)
1089 sock_release(ad.s_listen);
Philipp Reisner7da35862011-12-19 22:42:56 +01001090 if (sock.socket)
1091 sock_release(sock.socket);
1092 if (msock.socket)
1093 sock_release(msock.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001094 return -1;
1095}
1096
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001097static int decode_header(struct drbd_connection *connection, void *header, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001098{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001099 unsigned int header_size = drbd_header_size(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001100
Andreas Gruenbacher0c8e36d2011-03-30 16:00:17 +02001101 if (header_size == sizeof(struct p_header100) &&
1102 *(__be32 *)header == cpu_to_be32(DRBD_MAGIC_100)) {
1103 struct p_header100 *h = header;
1104 if (h->pad != 0) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001105 drbd_err(connection, "Header padding is not zero\n");
Andreas Gruenbacher0c8e36d2011-03-30 16:00:17 +02001106 return -EINVAL;
1107 }
1108 pi->vnr = be16_to_cpu(h->volume);
1109 pi->cmd = be16_to_cpu(h->command);
1110 pi->size = be32_to_cpu(h->length);
1111 } else if (header_size == sizeof(struct p_header95) &&
1112 *(__be16 *)header == cpu_to_be16(DRBD_MAGIC_BIG)) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001113 struct p_header95 *h = header;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001114 pi->cmd = be16_to_cpu(h->command);
Andreas Gruenbacherb55d84b2011-03-22 13:17:47 +01001115 pi->size = be32_to_cpu(h->length);
1116 pi->vnr = 0;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001117 } else if (header_size == sizeof(struct p_header80) &&
1118 *(__be32 *)header == cpu_to_be32(DRBD_MAGIC)) {
1119 struct p_header80 *h = header;
1120 pi->cmd = be16_to_cpu(h->command);
1121 pi->size = be16_to_cpu(h->length);
Philipp Reisner77351055b2011-02-07 17:24:26 +01001122 pi->vnr = 0;
Philipp Reisner02918be2010-08-20 14:35:10 +02001123 } else {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001124 drbd_err(connection, "Wrong magic value 0x%08x in protocol version %d\n",
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001125 be32_to_cpu(*(__be32 *)header),
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001126 connection->agreed_pro_version);
Andreas Gruenbacher8172f3e2011-03-16 17:22:39 +01001127 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001128 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001129 pi->data = header + header_size;
Andreas Gruenbacher8172f3e2011-03-16 17:22:39 +01001130 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001131}
1132
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001133static int drbd_recv_header(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisner257d0af2011-01-26 12:15:29 +01001134{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001135 void *buffer = connection->data.rbuf;
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001136 int err;
Philipp Reisner257d0af2011-01-26 12:15:29 +01001137
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001138 err = drbd_recv_all_warn(connection, buffer, drbd_header_size(connection));
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001139 if (err)
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001140 return err;
Philipp Reisner257d0af2011-01-26 12:15:29 +01001141
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001142 err = decode_header(connection, buffer, pi);
1143 connection->last_received = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001144
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01001145 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001146}
1147
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001148static void drbd_flush(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001149{
1150 int rv;
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001151 struct drbd_peer_device *peer_device;
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001152 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001153
Philipp Reisnere9526582013-11-22 15:53:41 +01001154 if (connection->resource->write_ordering >= WO_bdev_flush) {
Lars Ellenberg615e0872011-11-17 14:32:12 +01001155 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001156 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1157 struct drbd_device *device = peer_device->device;
1158
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001159 if (!get_ldev(device))
Lars Ellenberg615e0872011-11-17 14:32:12 +01001160 continue;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001161 kref_get(&device->kref);
Lars Ellenberg615e0872011-11-17 14:32:12 +01001162 rcu_read_unlock();
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001163
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001164 rv = blkdev_issue_flush(device->ldev->backing_bdev,
Lars Ellenberg615e0872011-11-17 14:32:12 +01001165 GFP_NOIO, NULL);
1166 if (rv) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001167 drbd_info(device, "local disk flush failed with status %d\n", rv);
Lars Ellenberg615e0872011-11-17 14:32:12 +01001168 /* would rather check on EOPNOTSUPP, but that is not reliable.
1169 * don't try again for ANY return value != 0
1170 * if (rv == -EOPNOTSUPP) */
Philipp Reisnere9526582013-11-22 15:53:41 +01001171 drbd_bump_write_ordering(connection->resource, WO_drain_io);
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001172 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001173 put_ldev(device);
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001174 kref_put(&device->kref, drbd_destroy_device);
Lars Ellenberg615e0872011-11-17 14:32:12 +01001175
1176 rcu_read_lock();
1177 if (rv)
1178 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001179 }
Lars Ellenberg615e0872011-11-17 14:32:12 +01001180 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07001181 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001182}
1183
1184/**
1185 * drbd_may_finish_epoch() - Applies an epoch_event to the epoch's state, eventually finishes it.
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001186 * @device: DRBD device.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001187 * @epoch: Epoch object.
1188 * @ev: Epoch event.
1189 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001190static enum finish_epoch drbd_may_finish_epoch(struct drbd_connection *connection,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001191 struct drbd_epoch *epoch,
1192 enum epoch_event ev)
1193{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001194 int epoch_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001195 struct drbd_epoch *next_epoch;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001196 enum finish_epoch rv = FE_STILL_LIVE;
1197
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001198 spin_lock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001199 do {
1200 next_epoch = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001201
1202 epoch_size = atomic_read(&epoch->epoch_size);
1203
1204 switch (ev & ~EV_CLEANUP) {
1205 case EV_PUT:
1206 atomic_dec(&epoch->active);
1207 break;
1208 case EV_GOT_BARRIER_NR:
1209 set_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001210 break;
1211 case EV_BECAME_LAST:
1212 /* nothing to do*/
1213 break;
1214 }
1215
Philipp Reisnerb411b362009-09-25 16:07:19 -07001216 if (epoch_size != 0 &&
1217 atomic_read(&epoch->active) == 0 &&
Philipp Reisner80f9fd52011-07-18 15:45:15 +02001218 (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags) || ev & EV_CLEANUP)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001219 if (!(ev & EV_CLEANUP)) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001220 spin_unlock(&connection->epoch_lock);
1221 drbd_send_b_ack(epoch->connection, epoch->barrier_nr, epoch_size);
1222 spin_lock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001223 }
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001224#if 0
1225 /* FIXME: dec unacked on connection, once we have
1226 * something to count pending connection packets in. */
Philipp Reisner80f9fd52011-07-18 15:45:15 +02001227 if (test_bit(DE_HAVE_BARRIER_NUMBER, &epoch->flags))
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001228 dec_unacked(epoch->connection);
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001229#endif
Philipp Reisnerb411b362009-09-25 16:07:19 -07001230
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001231 if (connection->current_epoch != epoch) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001232 next_epoch = list_entry(epoch->list.next, struct drbd_epoch, list);
1233 list_del(&epoch->list);
1234 ev = EV_BECAME_LAST | (ev & EV_CLEANUP);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001235 connection->epochs--;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001236 kfree(epoch);
1237
1238 if (rv == FE_STILL_LIVE)
1239 rv = FE_DESTROYED;
1240 } else {
1241 epoch->flags = 0;
1242 atomic_set(&epoch->epoch_size, 0);
Uwe Kleine-König698f9312010-07-02 20:41:51 +02001243 /* atomic_set(&epoch->active, 0); is already zero */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001244 if (rv == FE_STILL_LIVE)
1245 rv = FE_RECYCLED;
1246 }
1247 }
1248
1249 if (!next_epoch)
1250 break;
1251
1252 epoch = next_epoch;
1253 } while (1);
1254
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001255 spin_unlock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001256
Philipp Reisnerb411b362009-09-25 16:07:19 -07001257 return rv;
1258}
1259
1260/**
1261 * drbd_bump_write_ordering() - Fall back to an other write ordering method
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001262 * @connection: DRBD connection.
Philipp Reisnerb411b362009-09-25 16:07:19 -07001263 * @wo: Write ordering method to try.
1264 */
Philipp Reisnere9526582013-11-22 15:53:41 +01001265void drbd_bump_write_ordering(struct drbd_resource *resource, enum write_ordering_e wo)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001266{
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001267 struct disk_conf *dc;
Philipp Reisnere9526582013-11-22 15:53:41 +01001268 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001269 enum write_ordering_e pwo;
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001270 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001271 static char *write_ordering_str[] = {
1272 [WO_none] = "none",
1273 [WO_drain_io] = "drain",
1274 [WO_bdev_flush] = "flush",
Philipp Reisnerb411b362009-09-25 16:07:19 -07001275 };
1276
Philipp Reisnere9526582013-11-22 15:53:41 +01001277 pwo = resource->write_ordering;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001278 wo = min(pwo, wo);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001279 rcu_read_lock();
Philipp Reisnere9526582013-11-22 15:53:41 +01001280 idr_for_each_entry(&resource->devices, device, vnr) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001281 if (!get_ldev_if_state(device, D_ATTACHING))
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001282 continue;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001283 dc = rcu_dereference(device->ldev->disk_conf);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001284
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001285 if (wo == WO_bdev_flush && !dc->disk_flushes)
1286 wo = WO_drain_io;
1287 if (wo == WO_drain_io && !dc->disk_drain)
1288 wo = WO_none;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001289 put_ldev(device);
Philipp Reisner4b0007c2011-11-09 20:12:34 +01001290 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02001291 rcu_read_unlock();
Philipp Reisnere9526582013-11-22 15:53:41 +01001292 resource->write_ordering = wo;
1293 if (pwo != resource->write_ordering || wo == WO_bdev_flush)
1294 drbd_info(resource, "Method to ensure write ordering: %s\n", write_ordering_str[resource->write_ordering]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001295}
1296
1297/**
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001298 * drbd_submit_peer_request()
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001299 * @device: DRBD device.
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001300 * @peer_req: peer request
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001301 * @rw: flag field, see bio->bi_rw
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001302 *
1303 * May spread the pages to multiple bios,
1304 * depending on bio_add_page restrictions.
1305 *
1306 * Returns 0 if all bios have been submitted,
1307 * -ENOMEM if we could not allocate enough bios,
1308 * -ENOSPC (any better suggestion?) if we have not been able to bio_add_page a
1309 * single page to an empty bio (which should never happen and likely indicates
1310 * that the lower level IO stack is in some way broken). This has been observed
1311 * on certain Xen deployments.
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001312 */
1313/* TODO allocate from our own bio_set. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001314int drbd_submit_peer_request(struct drbd_device *device,
Andreas Gruenbacherfbe29de2011-02-17 16:38:35 +01001315 struct drbd_peer_request *peer_req,
1316 const unsigned rw, const int fault_type)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001317{
1318 struct bio *bios = NULL;
1319 struct bio *bio;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001320 struct page *page = peer_req->pages;
1321 sector_t sector = peer_req->i.sector;
1322 unsigned ds = peer_req->i.size;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001323 unsigned n_bios = 0;
1324 unsigned nr_pages = (ds + PAGE_SIZE -1) >> PAGE_SHIFT;
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001325 int err = -ENOMEM;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001326
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001327 if (peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) {
1328 /* wait for all pending IO completions, before we start
1329 * zeroing things out. */
1330 conn_wait_active_ee_empty(first_peer_device(device)->connection);
1331 if (blkdev_issue_zeroout(device->ldev->backing_bdev,
1332 sector, ds >> 9, GFP_NOIO))
1333 peer_req->flags |= EE_WAS_ERROR;
1334 drbd_endio_write_sec_final(peer_req);
1335 return 0;
1336 }
1337
Lars Ellenberg54ed4ed2014-06-25 17:52:38 +02001338 /* Discards don't have any payload.
1339 * But the scsi layer still expects a bio_vec it can use internally,
1340 * see sd_setup_discard_cmnd() and blk_add_request_payload(). */
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001341 if (peer_req->flags & EE_IS_TRIM)
Lars Ellenberg54ed4ed2014-06-25 17:52:38 +02001342 nr_pages = 1;
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001343
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001344 /* In most cases, we will only need one bio. But in case the lower
1345 * level restrictions happen to be different at this offset on this
1346 * side than those of the sending peer, we may need to submit the
Lars Ellenberg9476f392011-02-23 17:02:01 +01001347 * request in more than one bio.
1348 *
1349 * Plain bio_alloc is good enough here, this is no DRBD internally
1350 * generated bio, but a bio allocated on behalf of the peer.
1351 */
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001352next_bio:
1353 bio = bio_alloc(GFP_NOIO, nr_pages);
1354 if (!bio) {
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001355 drbd_err(device, "submit_ee: Allocation of a bio failed (nr_pages=%u)\n", nr_pages);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001356 goto fail;
1357 }
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001358 /* > peer_req->i.sector, unless this is the first bio */
Kent Overstreet4f024f32013-10-11 15:44:27 -07001359 bio->bi_iter.bi_sector = sector;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001360 bio->bi_bdev = device->ldev->backing_bdev;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001361 bio->bi_rw = rw;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001362 bio->bi_private = peer_req;
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001363 bio->bi_end_io = drbd_peer_request_endio;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001364
1365 bio->bi_next = bios;
1366 bios = bio;
1367 ++n_bios;
1368
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001369 if (rw & REQ_DISCARD) {
1370 bio->bi_iter.bi_size = ds;
1371 goto submit;
1372 }
1373
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001374 page_chain_for_each(page) {
1375 unsigned len = min_t(unsigned, ds, PAGE_SIZE);
1376 if (!bio_add_page(bio, page, len, 0)) {
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001377 /* A single page must always be possible!
1378 * But in case it fails anyways,
1379 * we deal with it, and complain (below). */
1380 if (bio->bi_vcnt == 0) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001381 drbd_err(device,
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001382 "bio_add_page failed for len=%u, "
1383 "bi_vcnt=0 (bi_sector=%llu)\n",
Kent Overstreet4f024f32013-10-11 15:44:27 -07001384 len, (uint64_t)bio->bi_iter.bi_sector);
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001385 err = -ENOSPC;
1386 goto fail;
1387 }
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001388 goto next_bio;
1389 }
1390 ds -= len;
1391 sector += len >> 9;
1392 --nr_pages;
1393 }
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001394 D_ASSERT(device, ds == 0);
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001395submit:
1396 D_ASSERT(device, page == NULL);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001397
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001398 atomic_set(&peer_req->pending_bios, n_bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001399 do {
1400 bio = bios;
1401 bios = bios->bi_next;
1402 bio->bi_next = NULL;
1403
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001404 drbd_generic_make_request(device, fault_type, bio);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001405 } while (bios);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001406 return 0;
1407
1408fail:
1409 while (bios) {
1410 bio = bios;
1411 bios = bios->bi_next;
1412 bio_put(bio);
1413 }
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001414 return err;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001415}
1416
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001417static void drbd_remove_epoch_entry_interval(struct drbd_device *device,
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001418 struct drbd_peer_request *peer_req)
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001419{
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001420 struct drbd_interval *i = &peer_req->i;
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001421
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001422 drbd_remove_interval(&device->write_requests, i);
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001423 drbd_clear_interval(i);
1424
Andreas Gruenbacher6c852be2011-02-04 15:38:52 +01001425 /* Wake up any processes waiting for this peer request to complete. */
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001426 if (i->waiting)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001427 wake_up(&device->misc_wait);
Andreas Gruenbacher53840642011-01-28 10:31:04 +01001428}
1429
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001430static void conn_wait_active_ee_empty(struct drbd_connection *connection)
Philipp Reisner77fede52011-11-10 21:19:11 +01001431{
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001432 struct drbd_peer_device *peer_device;
Philipp Reisner77fede52011-11-10 21:19:11 +01001433 int vnr;
1434
1435 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02001436 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
1437 struct drbd_device *device = peer_device->device;
1438
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001439 kref_get(&device->kref);
Philipp Reisner77fede52011-11-10 21:19:11 +01001440 rcu_read_unlock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001441 drbd_wait_ee_list_empty(device, &device->active_ee);
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02001442 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisner77fede52011-11-10 21:19:11 +01001443 rcu_read_lock();
1444 }
1445 rcu_read_unlock();
1446}
1447
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001448static struct drbd_peer_device *
1449conn_peer_device(struct drbd_connection *connection, int volume_number)
1450{
1451 return idr_find(&connection->peer_devices, volume_number);
1452}
1453
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001454static int receive_Barrier(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001455{
Philipp Reisner2451fc32010-08-24 13:43:11 +02001456 int rv;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001457 struct p_barrier *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001458 struct drbd_epoch *epoch;
1459
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02001460 /* FIXME these are unacked on connection,
1461 * not a specific (peer)device.
1462 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001463 connection->current_epoch->barrier_nr = p->barrier;
1464 connection->current_epoch->connection = connection;
1465 rv = drbd_may_finish_epoch(connection, connection->current_epoch, EV_GOT_BARRIER_NR);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001466
1467 /* P_BARRIER_ACK may imply that the corresponding extent is dropped from
1468 * the activity log, which means it would not be resynced in case the
1469 * R_PRIMARY crashes now.
1470 * Therefore we must send the barrier_ack after the barrier request was
1471 * completed. */
Philipp Reisnere9526582013-11-22 15:53:41 +01001472 switch (connection->resource->write_ordering) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001473 case WO_none:
1474 if (rv == FE_RECYCLED)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001475 return 0;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001476
1477 /* receiver context, in the writeout path of the other node.
1478 * avoid potential distributed deadlock */
1479 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1480 if (epoch)
1481 break;
1482 else
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02001483 drbd_warn(connection, "Allocation of an epoch failed, slowing down\n");
Philipp Reisner2451fc32010-08-24 13:43:11 +02001484 /* Fall through */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001485
1486 case WO_bdev_flush:
1487 case WO_drain_io:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001488 conn_wait_active_ee_empty(connection);
1489 drbd_flush(connection);
Philipp Reisner2451fc32010-08-24 13:43:11 +02001490
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001491 if (atomic_read(&connection->current_epoch->epoch_size)) {
Philipp Reisner2451fc32010-08-24 13:43:11 +02001492 epoch = kmalloc(sizeof(struct drbd_epoch), GFP_NOIO);
1493 if (epoch)
1494 break;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001495 }
1496
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001497 return 0;
Philipp Reisner2451fc32010-08-24 13:43:11 +02001498 default:
Philipp Reisnere9526582013-11-22 15:53:41 +01001499 drbd_err(connection, "Strangeness in connection->write_ordering %d\n",
1500 connection->resource->write_ordering);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001501 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001502 }
1503
1504 epoch->flags = 0;
1505 atomic_set(&epoch->epoch_size, 0);
1506 atomic_set(&epoch->active, 0);
1507
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001508 spin_lock(&connection->epoch_lock);
1509 if (atomic_read(&connection->current_epoch->epoch_size)) {
1510 list_add(&epoch->list, &connection->current_epoch->list);
1511 connection->current_epoch = epoch;
1512 connection->epochs++;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001513 } else {
1514 /* The current_epoch got recycled while we allocated this one... */
1515 kfree(epoch);
1516 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001517 spin_unlock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001518
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001519 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001520}
1521
1522/* used from receive_RSDataReply (recv_resync_read)
1523 * and from receive_Data */
Andreas Gruenbacherf6ffca92011-02-04 15:30:34 +01001524static struct drbd_peer_request *
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001525read_in_block(struct drbd_peer_device *peer_device, u64 id, sector_t sector,
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001526 struct packet_info *pi) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001527{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001528 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001529 const sector_t capacity = drbd_get_capacity(device->this_bdev);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001530 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001531 struct page *page;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001532 int dgs, ds, err;
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001533 int data_size = pi->size;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001534 void *dig_in = peer_device->connection->int_dig_in;
1535 void *dig_vv = peer_device->connection->int_dig_vv;
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001536 unsigned long *data;
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001537 struct p_trim *trim = (pi->cmd == P_TRIM) ? pi->data : NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001538
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001539 dgs = 0;
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001540 if (!trim && peer_device->connection->peer_integrity_tfm) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001541 dgs = crypto_hash_digestsize(peer_device->connection->peer_integrity_tfm);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02001542 /*
1543 * FIXME: Receive the incoming digest into the receive buffer
1544 * here, together with its struct p_data?
1545 */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001546 err = drbd_recv_all_warn(peer_device->connection, dig_in, dgs);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001547 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001548 return NULL;
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001549 data_size -= dgs;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001550 }
1551
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001552 if (trim) {
1553 D_ASSERT(peer_device, data_size == 0);
1554 data_size = be32_to_cpu(trim->size);
1555 }
1556
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01001557 if (!expect(IS_ALIGNED(data_size, 512)))
1558 return NULL;
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001559 /* prepare for larger trim requests. */
1560 if (!trim && !expect(data_size <= DRBD_MAX_BIO_SIZE))
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01001561 return NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001562
Lars Ellenberg66660322010-04-06 12:15:04 +02001563 /* even though we trust out peer,
1564 * we sometimes have to double check. */
1565 if (sector + (data_size>>9) > capacity) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001566 drbd_err(device, "request from peer beyond end of local disk: "
Lars Ellenbergfdda6542011-01-24 15:11:01 +01001567 "capacity: %llus < sector: %llus + size: %u\n",
Lars Ellenberg66660322010-04-06 12:15:04 +02001568 (unsigned long long)capacity,
1569 (unsigned long long)sector, data_size);
1570 return NULL;
1571 }
1572
Philipp Reisnerb411b362009-09-25 16:07:19 -07001573 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
1574 * "criss-cross" setup, that might cause write-out on some other DRBD,
1575 * which in turn might block on the other node at this very place. */
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001576 peer_req = drbd_alloc_peer_req(peer_device, id, sector, data_size, trim == NULL, GFP_NOIO);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001577 if (!peer_req)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001578 return NULL;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001579
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001580 if (trim)
Lars Ellenberg81a35372012-07-30 09:00:54 +02001581 return peer_req;
Lars Ellenberga73ff322012-06-25 19:15:38 +02001582
Philipp Reisnerb411b362009-09-25 16:07:19 -07001583 ds = data_size;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001584 page = peer_req->pages;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001585 page_chain_for_each(page) {
1586 unsigned len = min_t(int, ds, PAGE_SIZE);
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001587 data = kmap(page);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001588 err = drbd_recv_all_warn(peer_device->connection, data, len);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001589 if (drbd_insert_fault(device, DRBD_FAULT_RECEIVE)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001590 drbd_err(device, "Fault injection: Corrupting data on receive\n");
Philipp Reisner6b4388a2010-04-26 14:11:45 +02001591 data[0] = data[0] ^ (unsigned long)-1;
1592 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001593 kunmap(page);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001594 if (err) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001595 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001596 return NULL;
1597 }
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001598 ds -= len;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001599 }
1600
1601 if (dgs) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001602 drbd_csum_ee(peer_device->connection->peer_integrity_tfm, peer_req, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001603 if (memcmp(dig_in, dig_vv, dgs)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001604 drbd_err(device, "Digest integrity check FAILED: %llus +%u\n",
Lars Ellenberg470be442010-11-10 10:36:52 +01001605 (unsigned long long)sector, data_size);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001606 drbd_free_peer_req(device, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001607 return NULL;
1608 }
1609 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001610 device->recv_cnt += data_size>>9;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001611 return peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001612}
1613
1614/* drbd_drain_block() just takes a data block
1615 * out of the socket input buffer, and discards it.
1616 */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001617static int drbd_drain_block(struct drbd_peer_device *peer_device, int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001618{
1619 struct page *page;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001620 int err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001621 void *data;
1622
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001623 if (!data_size)
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001624 return 0;
Lars Ellenbergc3470cd2010-04-01 16:57:19 +02001625
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001626 page = drbd_alloc_pages(peer_device, 1, 1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001627
1628 data = kmap(page);
1629 while (data_size) {
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001630 unsigned int len = min_t(int, data_size, PAGE_SIZE);
1631
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001632 err = drbd_recv_all_warn(peer_device->connection, data, len);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001633 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001634 break;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001635 data_size -= len;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001636 }
1637 kunmap(page);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001638 drbd_free_pages(peer_device->device, page, 0);
Andreas Gruenbacherfc5be832011-03-16 17:50:50 +01001639 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001640}
1641
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001642static int recv_dless_read(struct drbd_peer_device *peer_device, struct drbd_request *req,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001643 sector_t sector, int data_size)
1644{
Kent Overstreet79886132013-11-23 17:19:00 -08001645 struct bio_vec bvec;
1646 struct bvec_iter iter;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001647 struct bio *bio;
Kent Overstreet79886132013-11-23 17:19:00 -08001648 int dgs, err, expect;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001649 void *dig_in = peer_device->connection->int_dig_in;
1650 void *dig_vv = peer_device->connection->int_dig_vv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001651
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001652 dgs = 0;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001653 if (peer_device->connection->peer_integrity_tfm) {
1654 dgs = crypto_hash_digestsize(peer_device->connection->peer_integrity_tfm);
1655 err = drbd_recv_all_warn(peer_device->connection, dig_in, dgs);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001656 if (err)
1657 return err;
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02001658 data_size -= dgs;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001659 }
1660
Philipp Reisnerb411b362009-09-25 16:07:19 -07001661 /* optimistically update recv_cnt. if receiving fails below,
1662 * we disconnect anyways, and counters will be reset. */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001663 peer_device->device->recv_cnt += data_size>>9;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001664
1665 bio = req->master_bio;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001666 D_ASSERT(peer_device->device, sector == bio->bi_iter.bi_sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001667
Kent Overstreet79886132013-11-23 17:19:00 -08001668 bio_for_each_segment(bvec, bio, iter) {
1669 void *mapped = kmap(bvec.bv_page) + bvec.bv_offset;
1670 expect = min_t(int, data_size, bvec.bv_len);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001671 err = drbd_recv_all_warn(peer_device->connection, mapped, expect);
Kent Overstreet79886132013-11-23 17:19:00 -08001672 kunmap(bvec.bv_page);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01001673 if (err)
1674 return err;
1675 data_size -= expect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001676 }
1677
1678 if (dgs) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001679 drbd_csum_bio(peer_device->connection->peer_integrity_tfm, bio, dig_vv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001680 if (memcmp(dig_in, dig_vv, dgs)) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001681 drbd_err(peer_device, "Digest integrity check FAILED. Broken NICs?\n");
Andreas Gruenbacher28284ce2011-03-16 17:54:02 +01001682 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001683 }
1684 }
1685
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001686 D_ASSERT(peer_device->device, data_size == 0);
Andreas Gruenbacher28284ce2011-03-16 17:54:02 +01001687 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001688}
1689
Andreas Gruenbachera990be42011-04-06 17:56:48 +02001690/*
1691 * e_end_resync_block() is called in asender context via
1692 * drbd_finish_peer_reqs().
1693 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001694static int e_end_resync_block(struct drbd_work *w, int unused)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001695{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001696 struct drbd_peer_request *peer_req =
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001697 container_of(w, struct drbd_peer_request, w);
1698 struct drbd_peer_device *peer_device = peer_req->peer_device;
1699 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001700 sector_t sector = peer_req->i.sector;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001701 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001702
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001703 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001704
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001705 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001706 drbd_set_in_sync(device, sector, peer_req->i.size);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001707 err = drbd_send_ack(peer_device, P_RS_WRITE_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001708 } else {
1709 /* Record failure to sync */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001710 drbd_rs_failed_io(device, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001711
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001712 err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001713 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001714 dec_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001715
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001716 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001717}
1718
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001719static int recv_resync_read(struct drbd_peer_device *peer_device, sector_t sector,
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001720 struct packet_info *pi) __releases(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001721{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001722 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001723 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001724
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001725 peer_req = read_in_block(peer_device, ID_SYNCER, sector, pi);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001726 if (!peer_req)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001727 goto fail;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001728
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001729 dec_rs_pending(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001730
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001731 inc_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001732 /* corresponding dec_unacked() in e_end_resync_block()
1733 * respective _drbd_clear_done_ee */
1734
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001735 peer_req->w.cb = e_end_resync_block;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001736
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001737 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001738 list_add(&peer_req->w.list, &device->sync_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001739 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001740
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001741 atomic_add(pi->size >> 9, &device->rs_sect_ev);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001742 if (drbd_submit_peer_request(device, peer_req, WRITE, DRBD_FAULT_RS_WR) == 0)
Andreas Gruenbachere1c1b0f2011-03-16 17:58:27 +01001743 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001744
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01001745 /* don't care for the reason here */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001746 drbd_err(device, "submit failed, triggering re-connect\n");
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001747 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001748 list_del(&peer_req->w.list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001749 spin_unlock_irq(&device->resource->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02001750
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001751 drbd_free_peer_req(device, peer_req);
Lars Ellenberg45bb9122010-05-14 17:10:48 +02001752fail:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001753 put_ldev(device);
Andreas Gruenbachere1c1b0f2011-03-16 17:58:27 +01001754 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001755}
1756
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001757static struct drbd_request *
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001758find_request(struct drbd_device *device, struct rb_root *root, u64 id,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001759 sector_t sector, bool missing_ok, const char *func)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001760{
1761 struct drbd_request *req;
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001762
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01001763 /* Request object according to our peer */
1764 req = (struct drbd_request *)(unsigned long)id;
Andreas Gruenbacher5e472262011-01-27 14:42:51 +01001765 if (drbd_contains_interval(root, sector, &req->i) && req->i.local)
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001766 return req;
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001767 if (!missing_ok) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001768 drbd_err(device, "%s: failed to find request 0x%lx, sector %llus\n", func,
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001769 (unsigned long)id, (unsigned long long)sector);
1770 }
Andreas Gruenbacher668eebc2011-01-20 17:14:26 +01001771 return NULL;
1772}
1773
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001774static int receive_DataReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001775{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001776 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001777 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001778 struct drbd_request *req;
1779 sector_t sector;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001780 int err;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001781 struct p_data *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001782
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001783 peer_device = conn_peer_device(connection, pi->vnr);
1784 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001785 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001786 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001787
1788 sector = be64_to_cpu(p->sector);
1789
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001790 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001791 req = find_request(device, &device->read_requests, p->block_id, sector, false, __func__);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001792 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01001793 if (unlikely(!req))
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001794 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001795
Bart Van Assche24c48302011-05-21 18:32:29 +02001796 /* hlist_del(&req->collision) is done in _req_may_be_done, to avoid
Philipp Reisnerb411b362009-09-25 16:07:19 -07001797 * special casing it there for the various failure cases.
1798 * still no race with drbd_fail_pending_reads */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001799 err = recv_dless_read(peer_device, req, sector, pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001800 if (!err)
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01001801 req_mod(req, DATA_RECEIVED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001802 /* else: nothing. handled from drbd_disconnect...
1803 * I don't think we may complete this just yet
1804 * in case we are "on-disconnect: freeze" */
1805
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001806 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001807}
1808
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02001809static int receive_RSDataReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001810{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001811 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001812 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001813 sector_t sector;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001814 int err;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02001815 struct p_data *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001816
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001817 peer_device = conn_peer_device(connection, pi->vnr);
1818 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01001819 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02001820 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001821
1822 sector = be64_to_cpu(p->sector);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001823 D_ASSERT(device, p->block_id == ID_SYNCER);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001824
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001825 if (get_ldev(device)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001826 /* data is submitted to disk within recv_resync_read.
1827 * corresponding put_ldev done below on error,
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01001828 * or in drbd_peer_request_endio. */
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02001829 err = recv_resync_read(peer_device, sector, pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001830 } else {
1831 if (__ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02001832 drbd_err(device, "Can not write resync data to local disk.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07001833
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001834 err = drbd_drain_block(peer_device, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001835
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001836 drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001837 }
1838
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001839 atomic_add(pi->size >> 9, &device->rs_sect_in);
Philipp Reisner778f2712010-07-06 11:14:00 +02001840
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01001841 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001842}
1843
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001844static void restart_conflicting_writes(struct drbd_device *device,
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001845 sector_t sector, int size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001846{
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001847 struct drbd_interval *i;
1848 struct drbd_request *req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001849
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001850 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001851 if (!i->local)
1852 continue;
1853 req = container_of(i, struct drbd_request, i);
1854 if (req->rq_state & RQ_LOCAL_PENDING ||
1855 !(req->rq_state & RQ_POSTPONED))
1856 continue;
Lars Ellenberg2312f0b32011-11-24 10:36:25 +01001857 /* as it is RQ_POSTPONED, this will cause it to
1858 * be queued on the retry workqueue. */
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02001859 __req_mod(req, CONFLICT_RESOLVED, NULL);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001860 }
1861}
1862
Andreas Gruenbachera990be42011-04-06 17:56:48 +02001863/*
1864 * e_end_block() is called in asender context via drbd_finish_peer_reqs().
Philipp Reisnerb411b362009-09-25 16:07:19 -07001865 */
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001866static int e_end_block(struct drbd_work *w, int cancel)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001867{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001868 struct drbd_peer_request *peer_req =
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001869 container_of(w, struct drbd_peer_request, w);
1870 struct drbd_peer_device *peer_device = peer_req->peer_device;
1871 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001872 sector_t sector = peer_req->i.sector;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001873 int err = 0, pcmd;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001874
Philipp Reisner303d1442011-04-13 16:24:47 -07001875 if (peer_req->flags & EE_SEND_WRITE_ACK) {
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001876 if (likely((peer_req->flags & EE_WAS_ERROR) == 0)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001877 pcmd = (device->state.conn >= C_SYNC_SOURCE &&
1878 device->state.conn <= C_PAUSED_SYNC_T &&
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01001879 peer_req->flags & EE_MAY_SET_IN_SYNC) ?
Philipp Reisnerb411b362009-09-25 16:07:19 -07001880 P_RS_WRITE_ACK : P_WRITE_ACK;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001881 err = drbd_send_ack(peer_device, pcmd, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001882 if (pcmd == P_RS_WRITE_ACK)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001883 drbd_set_in_sync(device, sector, peer_req->i.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001884 } else {
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001885 err = drbd_send_ack(peer_device, P_NEG_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001886 /* we expect it to be marked out of sync anyways...
1887 * maybe assert this? */
1888 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001889 dec_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001890 }
1891 /* we delete from the conflict detection hash _after_ we sent out the
1892 * P_WRITE_ACK / P_NEG_ACK, to get the sequence number right. */
Philipp Reisner302bdea2011-04-21 11:36:49 +02001893 if (peer_req->flags & EE_IN_INTERVAL_TREE) {
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001894 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001895 D_ASSERT(device, !drbd_interval_empty(&peer_req->i));
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001896 drbd_remove_epoch_entry_interval(device, peer_req);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001897 if (peer_req->flags & EE_RESTART_REQUESTS)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001898 restart_conflicting_writes(device, sector, peer_req->i.size);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001899 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherbb3bfe92011-01-21 15:59:23 +01001900 } else
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02001901 D_ASSERT(device, drbd_interval_empty(&peer_req->i));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001902
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02001903 drbd_may_finish_epoch(first_peer_device(device)->connection, peer_req->epoch, EV_PUT + (cancel ? EV_CLEANUP : 0));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001904
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001905 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001906}
1907
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001908static int e_send_ack(struct drbd_work *w, enum drbd_packet ack)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001909{
Andreas Gruenbacher8050e6d2011-02-18 16:12:48 +01001910 struct drbd_peer_request *peer_req =
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001911 container_of(w, struct drbd_peer_request, w);
1912 struct drbd_peer_device *peer_device = peer_req->peer_device;
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001913 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001914
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001915 err = drbd_send_ack(peer_device, ack, peer_req);
1916 dec_unacked(peer_device->device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001917
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001918 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001919}
1920
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02001921static int e_send_superseded(struct drbd_work *w, int unused)
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001922{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001923 return e_send_ack(w, P_SUPERSEDED);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001924}
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001925
Andreas Gruenbacher99920dc2011-03-16 15:31:39 +01001926static int e_send_retry_write(struct drbd_work *w, int unused)
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001927{
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001928 struct drbd_peer_request *peer_req =
1929 container_of(w, struct drbd_peer_request, w);
1930 struct drbd_connection *connection = peer_req->peer_device->connection;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001931
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001932 return e_send_ack(w, connection->agreed_pro_version >= 100 ?
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02001933 P_RETRY_WRITE : P_SUPERSEDED);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001934}
1935
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001936static bool seq_greater(u32 a, u32 b)
1937{
1938 /*
1939 * We assume 32-bit wrap-around here.
1940 * For 24-bit wrap-around, we would have to shift:
1941 * a <<= 8; b <<= 8;
1942 */
1943 return (s32)a - (s32)b > 0;
1944}
1945
1946static u32 seq_max(u32 a, u32 b)
1947{
1948 return seq_greater(a, b) ? a : b;
1949}
1950
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001951static void update_peer_seq(struct drbd_peer_device *peer_device, unsigned int peer_seq)
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001952{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001953 struct drbd_device *device = peer_device->device;
Lars Ellenberg3c13b682011-02-23 16:10:01 +01001954 unsigned int newest_peer_seq;
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001955
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02001956 if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001957 spin_lock(&device->peer_seq_lock);
1958 newest_peer_seq = seq_max(device->peer_seq, peer_seq);
1959 device->peer_seq = newest_peer_seq;
1960 spin_unlock(&device->peer_seq_lock);
1961 /* wake up only if we actually changed device->peer_seq */
Lars Ellenberg3c13b682011-02-23 16:10:01 +01001962 if (peer_seq == newest_peer_seq)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001963 wake_up(&device->seq_wait);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01001964 }
Andreas Gruenbacher3e394da2011-01-26 18:36:55 +01001965}
1966
Lars Ellenbergd93f6302012-03-26 15:49:13 +02001967static inline int overlaps(sector_t s1, int l1, sector_t s2, int l2)
1968{
1969 return !((s1 + (l1>>9) <= s2) || (s1 >= s2 + (l2>>9)));
1970}
1971
1972/* maybe change sync_ee into interval trees as well? */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02001973static bool overlapping_resync_write(struct drbd_device *device, struct drbd_peer_request *peer_req)
Lars Ellenbergd93f6302012-03-26 15:49:13 +02001974{
1975 struct drbd_peer_request *rs_req;
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001976 bool rv = 0;
1977
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001978 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02001979 list_for_each_entry(rs_req, &device->sync_ee, w.list) {
Lars Ellenbergd93f6302012-03-26 15:49:13 +02001980 if (overlaps(peer_req->i.sector, peer_req->i.size,
1981 rs_req->i.sector, rs_req->i.size)) {
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001982 rv = 1;
1983 break;
1984 }
1985 }
Andreas Gruenbacher05008132011-07-07 14:19:42 +02001986 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01001987
1988 return rv;
1989}
1990
Philipp Reisnerb411b362009-09-25 16:07:19 -07001991/* Called from receive_Data.
1992 * Synchronize packets on sock with packets on msock.
1993 *
1994 * This is here so even when a P_DATA packet traveling via sock overtook an Ack
1995 * packet traveling on msock, they are still processed in the order they have
1996 * been sent.
1997 *
1998 * Note: we don't care for Ack packets overtaking P_DATA packets.
1999 *
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002000 * In case packet_seq is larger than device->peer_seq number, there are
Philipp Reisnerb411b362009-09-25 16:07:19 -07002001 * outstanding packets on the msock. We wait for them to arrive.
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002002 * In case we are the logically next packet, we update device->peer_seq
Philipp Reisnerb411b362009-09-25 16:07:19 -07002003 * ourselves. Correctly handles 32bit wrap around.
2004 *
2005 * Assume we have a 10 GBit connection, that is about 1<<30 byte per second,
2006 * about 1<<21 sectors per second. So "worst" case, we have 1<<3 == 8 seconds
2007 * for the 24bit wrap (historical atomic_t guarantee on some archs), and we have
2008 * 1<<9 == 512 seconds aka ages for the 32bit wrap around...
2009 *
2010 * returns 0 if we may process the packet,
2011 * -ERESTARTSYS if we were interrupted (by disconnect signal). */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002012static int wait_for_and_update_peer_seq(struct drbd_peer_device *peer_device, const u32 peer_seq)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002013{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002014 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002015 DEFINE_WAIT(wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002016 long timeout;
Philipp Reisnerb874d232013-10-23 10:59:16 +02002017 int ret = 0, tp;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002018
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002019 if (!test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags))
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002020 return 0;
2021
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002022 spin_lock(&device->peer_seq_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002023 for (;;) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002024 if (!seq_greater(peer_seq - 1, device->peer_seq)) {
2025 device->peer_seq = seq_max(device->peer_seq, peer_seq);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002026 break;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002027 }
Philipp Reisnerb874d232013-10-23 10:59:16 +02002028
Philipp Reisnerb411b362009-09-25 16:07:19 -07002029 if (signal_pending(current)) {
2030 ret = -ERESTARTSYS;
2031 break;
2032 }
Philipp Reisnerb874d232013-10-23 10:59:16 +02002033
2034 rcu_read_lock();
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002035 tp = rcu_dereference(first_peer_device(device)->connection->net_conf)->two_primaries;
Philipp Reisnerb874d232013-10-23 10:59:16 +02002036 rcu_read_unlock();
2037
2038 if (!tp)
2039 break;
2040
2041 /* Only need to wait if two_primaries is enabled */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002042 prepare_to_wait(&device->seq_wait, &wait, TASK_INTERRUPTIBLE);
2043 spin_unlock(&device->peer_seq_lock);
Philipp Reisner44ed1672011-04-19 17:10:19 +02002044 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002045 timeout = rcu_dereference(peer_device->connection->net_conf)->ping_timeo*HZ/10;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002046 rcu_read_unlock();
Andreas Gruenbacher71b1c1e2011-03-01 15:40:43 +01002047 timeout = schedule_timeout(timeout);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002048 spin_lock(&device->peer_seq_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002049 if (!timeout) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002050 ret = -ETIMEDOUT;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002051 drbd_err(device, "Timed out waiting for missing ack packets; disconnecting\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002052 break;
2053 }
2054 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002055 spin_unlock(&device->peer_seq_lock);
2056 finish_wait(&device->seq_wait, &wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002057 return ret;
2058}
2059
Lars Ellenberg688593c2010-11-17 22:25:03 +01002060/* see also bio_flags_to_wire()
2061 * DRBD_REQ_*, because we need to semantically map the flags to data packet
2062 * flags and back. We may replicate to other kernel versions. */
Andreas Gruenbacher81f0ffd2011-08-30 16:22:33 +02002063static unsigned long wire_flags_to_bio(u32 dpf)
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02002064{
Lars Ellenberg688593c2010-11-17 22:25:03 +01002065 return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) |
2066 (dpf & DP_FUA ? REQ_FUA : 0) |
2067 (dpf & DP_FLUSH ? REQ_FLUSH : 0) |
2068 (dpf & DP_DISCARD ? REQ_DISCARD : 0);
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02002069}
2070
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002071static void fail_postponed_requests(struct drbd_device *device, sector_t sector,
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002072 unsigned int size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002073{
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002074 struct drbd_interval *i;
2075
2076 repeat:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002077 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002078 struct drbd_request *req;
2079 struct bio_and_error m;
2080
2081 if (!i->local)
2082 continue;
2083 req = container_of(i, struct drbd_request, i);
2084 if (!(req->rq_state & RQ_POSTPONED))
2085 continue;
2086 req->rq_state &= ~RQ_POSTPONED;
2087 __req_mod(req, NEG_ACKED, &m);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002088 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002089 if (m.bio)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002090 complete_master_bio(device, &m);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002091 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002092 goto repeat;
2093 }
2094}
2095
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002096static int handle_write_conflicts(struct drbd_device *device,
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002097 struct drbd_peer_request *peer_req)
2098{
Andreas Gruenbachere33b32d2011-08-30 15:38:04 +02002099 struct drbd_connection *connection = peer_req->peer_device->connection;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002100 bool resolve_conflicts = test_bit(RESOLVE_CONFLICTS, &connection->flags);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002101 sector_t sector = peer_req->i.sector;
2102 const unsigned int size = peer_req->i.size;
2103 struct drbd_interval *i;
2104 bool equal;
2105 int err;
2106
2107 /*
2108 * Inserting the peer request into the write_requests tree will prevent
2109 * new conflicting local requests from being added.
2110 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002111 drbd_insert_interval(&device->write_requests, &peer_req->i);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002112
2113 repeat:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002114 drbd_for_each_overlap(i, &device->write_requests, sector, size) {
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002115 if (i == &peer_req->i)
2116 continue;
2117
2118 if (!i->local) {
2119 /*
2120 * Our peer has sent a conflicting remote request; this
2121 * should not happen in a two-node setup. Wait for the
2122 * earlier peer request to complete.
2123 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002124 err = drbd_wait_misc(device, i);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002125 if (err)
2126 goto out;
2127 goto repeat;
2128 }
2129
2130 equal = i->sector == sector && i->size == size;
2131 if (resolve_conflicts) {
2132 /*
2133 * If the peer request is fully contained within the
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002134 * overlapping request, it can be considered overwritten
2135 * and thus superseded; otherwise, it will be retried
2136 * once all overlapping requests have completed.
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002137 */
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002138 bool superseded = i->sector <= sector && i->sector +
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002139 (i->size >> 9) >= sector + (size >> 9);
2140
2141 if (!equal)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002142 drbd_alert(device, "Concurrent writes detected: "
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002143 "local=%llus +%u, remote=%llus +%u, "
2144 "assuming %s came first\n",
2145 (unsigned long long)i->sector, i->size,
2146 (unsigned long long)sector, size,
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002147 superseded ? "local" : "remote");
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002148
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002149 inc_unacked(device);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002150 peer_req->w.cb = superseded ? e_send_superseded :
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002151 e_send_retry_write;
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002152 list_add_tail(&peer_req->w.list, &device->done_ee);
Andreas Gruenbachere33b32d2011-08-30 15:38:04 +02002153 wake_asender(connection);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002154
2155 err = -ENOENT;
2156 goto out;
2157 } else {
2158 struct drbd_request *req =
2159 container_of(i, struct drbd_request, i);
2160
2161 if (!equal)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002162 drbd_alert(device, "Concurrent writes detected: "
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002163 "local=%llus +%u, remote=%llus +%u\n",
2164 (unsigned long long)i->sector, i->size,
2165 (unsigned long long)sector, size);
2166
2167 if (req->rq_state & RQ_LOCAL_PENDING ||
2168 !(req->rq_state & RQ_POSTPONED)) {
2169 /*
2170 * Wait for the node with the discard flag to
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02002171 * decide if this request has been superseded
2172 * or needs to be retried.
2173 * Requests that have been superseded will
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002174 * disappear from the write_requests tree.
2175 *
2176 * In addition, wait for the conflicting
2177 * request to finish locally before submitting
2178 * the conflicting peer request.
2179 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002180 err = drbd_wait_misc(device, &req->i);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002181 if (err) {
Andreas Gruenbachere33b32d2011-08-30 15:38:04 +02002182 _conn_request_state(connection, NS(conn, C_TIMEOUT), CS_HARD);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002183 fail_postponed_requests(device, sector, size);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002184 goto out;
2185 }
2186 goto repeat;
2187 }
2188 /*
2189 * Remember to restart the conflicting requests after
2190 * the new peer request has completed.
2191 */
2192 peer_req->flags |= EE_RESTART_REQUESTS;
2193 }
2194 }
2195 err = 0;
2196
2197 out:
2198 if (err)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002199 drbd_remove_epoch_entry_interval(device, peer_req);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002200 return err;
2201}
2202
Philipp Reisnerb411b362009-09-25 16:07:19 -07002203/* mirrored write */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002204static int receive_Data(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002205{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002206 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002207 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002208 sector_t sector;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002209 struct drbd_peer_request *peer_req;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02002210 struct p_data *p = pi->data;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002211 u32 peer_seq = be32_to_cpu(p->seq_num);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002212 int rw = WRITE;
2213 u32 dp_flags;
Philipp Reisner302bdea2011-04-21 11:36:49 +02002214 int err, tp;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002215
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002216 peer_device = conn_peer_device(connection, pi->vnr);
2217 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002218 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002219 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002220
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002221 if (!get_ldev(device)) {
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002222 int err2;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002223
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002224 err = wait_for_and_update_peer_seq(peer_device, peer_seq);
2225 drbd_send_ack_dp(peer_device, P_NEG_ACK, p, pi->size);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002226 atomic_inc(&connection->current_epoch->epoch_size);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002227 err2 = drbd_drain_block(peer_device, pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002228 if (!err)
2229 err = err2;
2230 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002231 }
2232
Andreas Gruenbacherfcefa622011-02-17 16:46:59 +01002233 /*
2234 * Corresponding put_ldev done either below (on various errors), or in
2235 * drbd_peer_request_endio, if we successfully submit the data at the
2236 * end of this function.
2237 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002238
2239 sector = be64_to_cpu(p->sector);
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02002240 peer_req = read_in_block(peer_device, p->block_id, sector, pi);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002241 if (!peer_req) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002242 put_ldev(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002243 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002244 }
2245
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002246 peer_req->w.cb = e_end_block;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002247
Lars Ellenberg688593c2010-11-17 22:25:03 +01002248 dp_flags = be32_to_cpu(p->dp_flags);
Andreas Gruenbacher81f0ffd2011-08-30 16:22:33 +02002249 rw |= wire_flags_to_bio(dp_flags);
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02002250 if (pi->cmd == P_TRIM) {
2251 struct request_queue *q = bdev_get_queue(device->ldev->backing_bdev);
2252 peer_req->flags |= EE_IS_TRIM;
2253 if (!blk_queue_discard(q))
2254 peer_req->flags |= EE_IS_TRIM_USE_ZEROOUT;
2255 D_ASSERT(peer_device, peer_req->i.size > 0);
2256 D_ASSERT(peer_device, rw & REQ_DISCARD);
2257 D_ASSERT(peer_device, peer_req->pages == NULL);
2258 } else if (peer_req->pages == NULL) {
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02002259 D_ASSERT(device, peer_req->i.size == 0);
2260 D_ASSERT(device, dp_flags & DP_FLUSH);
Lars Ellenberga73ff322012-06-25 19:15:38 +02002261 }
Lars Ellenberg688593c2010-11-17 22:25:03 +01002262
2263 if (dp_flags & DP_MAY_SET_IN_SYNC)
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002264 peer_req->flags |= EE_MAY_SET_IN_SYNC;
Lars Ellenberg688593c2010-11-17 22:25:03 +01002265
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002266 spin_lock(&connection->epoch_lock);
2267 peer_req->epoch = connection->current_epoch;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002268 atomic_inc(&peer_req->epoch->epoch_size);
2269 atomic_inc(&peer_req->epoch->active);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002270 spin_unlock(&connection->epoch_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002271
Philipp Reisner302bdea2011-04-21 11:36:49 +02002272 rcu_read_lock();
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002273 tp = rcu_dereference(peer_device->connection->net_conf)->two_primaries;
Philipp Reisner302bdea2011-04-21 11:36:49 +02002274 rcu_read_unlock();
2275 if (tp) {
2276 peer_req->flags |= EE_IN_INTERVAL_TREE;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002277 err = wait_for_and_update_peer_seq(peer_device, peer_seq);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002278 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002279 goto out_interrupted;
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002280 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002281 err = handle_write_conflicts(device, peer_req);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002282 if (err) {
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002283 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002284 if (err == -ENOENT) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002285 put_ldev(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002286 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002287 }
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01002288 goto out_interrupted;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002289 }
Philipp Reisnerb874d232013-10-23 10:59:16 +02002290 } else {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002291 update_peer_seq(peer_device, peer_seq);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002292 spin_lock_irq(&device->resource->req_lock);
Philipp Reisnerb874d232013-10-23 10:59:16 +02002293 }
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02002294 /* if we use the zeroout fallback code, we process synchronously
2295 * and we wait for all pending requests, respectively wait for
2296 * active_ee to become empty in drbd_submit_peer_request();
2297 * better not add ourselves here. */
2298 if ((peer_req->flags & EE_IS_TRIM_USE_ZEROOUT) == 0)
2299 list_add(&peer_req->w.list, &device->active_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002300 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002301
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002302 if (device->state.conn == C_SYNC_TARGET)
2303 wait_event(device->ee_wait, !overlapping_resync_write(device, peer_req));
Philipp Reisnerb6a370ba2012-02-19 01:27:53 +01002304
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002305 if (peer_device->connection->agreed_pro_version < 100) {
Philipp Reisner44ed1672011-04-19 17:10:19 +02002306 rcu_read_lock();
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002307 switch (rcu_dereference(peer_device->connection->net_conf)->wire_protocol) {
Philipp Reisner303d1442011-04-13 16:24:47 -07002308 case DRBD_PROT_C:
2309 dp_flags |= DP_SEND_WRITE_ACK;
2310 break;
2311 case DRBD_PROT_B:
2312 dp_flags |= DP_SEND_RECEIVE_ACK;
2313 break;
2314 }
Philipp Reisner44ed1672011-04-19 17:10:19 +02002315 rcu_read_unlock();
Philipp Reisner303d1442011-04-13 16:24:47 -07002316 }
2317
2318 if (dp_flags & DP_SEND_WRITE_ACK) {
2319 peer_req->flags |= EE_SEND_WRITE_ACK;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002320 inc_unacked(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002321 /* corresponding dec_unacked() in e_end_block()
2322 * respective _drbd_clear_done_ee */
Philipp Reisner303d1442011-04-13 16:24:47 -07002323 }
2324
2325 if (dp_flags & DP_SEND_RECEIVE_ACK) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002326 /* I really don't like it that the receiver thread
2327 * sends on the msock, but anyways */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002328 drbd_send_ack(first_peer_device(device), P_RECV_ACK, peer_req);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002329 }
2330
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002331 if (device->state.pdsk < D_INCONSISTENT) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002332 /* In case we have the only disk of the cluster, */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002333 drbd_set_out_of_sync(device, peer_req->i.sector, peer_req->i.size);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002334 peer_req->flags |= EE_CALL_AL_COMPLETE_IO;
2335 peer_req->flags &= ~EE_MAY_SET_IN_SYNC;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002336 drbd_al_begin_io(device, &peer_req->i, true);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002337 }
2338
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002339 err = drbd_submit_peer_request(device, peer_req, rw, DRBD_FAULT_DT_WR);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002340 if (!err)
2341 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002342
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002343 /* don't care for the reason here */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002344 drbd_err(device, "submit failed, triggering re-connect\n");
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002345 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002346 list_del(&peer_req->w.list);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002347 drbd_remove_epoch_entry_interval(device, peer_req);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002348 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002349 if (peer_req->flags & EE_CALL_AL_COMPLETE_IO)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002350 drbd_al_complete_io(device, &peer_req->i);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002351
Philipp Reisnerb411b362009-09-25 16:07:19 -07002352out_interrupted:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002353 drbd_may_finish_epoch(connection, peer_req->epoch, EV_PUT + EV_CLEANUP);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002354 put_ldev(device);
2355 drbd_free_peer_req(device, peer_req);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002356 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002357}
2358
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002359/* We may throttle resync, if the lower device seems to be busy,
2360 * and current sync rate is above c_min_rate.
2361 *
2362 * To decide whether or not the lower device is busy, we use a scheme similar
2363 * to MD RAID is_mddev_idle(): if the partition stats reveal "significant"
2364 * (more than 64 sectors) of activity we cannot account for with our own resync
2365 * activity, it obviously is "busy".
2366 *
2367 * The current sync rate used here uses only the most recent two step marks,
2368 * to have a short time average so we can react faster.
2369 */
Lars Ellenberge8299872014-04-28 18:43:19 +02002370bool drbd_rs_should_slow_down(struct drbd_device *device, sector_t sector)
2371{
2372 struct lc_element *tmp;
2373 bool throttle = true;
2374
2375 if (!drbd_rs_c_min_rate_throttle(device))
2376 return false;
2377
2378 spin_lock_irq(&device->al_lock);
2379 tmp = lc_find(device->resync, BM_SECT_TO_EXT(sector));
2380 if (tmp) {
2381 struct bm_extent *bm_ext = lc_entry(tmp, struct bm_extent, lce);
2382 if (test_bit(BME_PRIORITY, &bm_ext->flags))
2383 throttle = false;
2384 /* Do not slow down if app IO is already waiting for this extent */
2385 }
2386 spin_unlock_irq(&device->al_lock);
2387
2388 return throttle;
2389}
2390
2391bool drbd_rs_c_min_rate_throttle(struct drbd_device *device)
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002392{
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002393 struct gendisk *disk = device->ldev->backing_bdev->bd_contains->bd_disk;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002394 unsigned long db, dt, dbdt;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002395 unsigned int c_min_rate;
Lars Ellenberge8299872014-04-28 18:43:19 +02002396 int curr_events;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002397
2398 rcu_read_lock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002399 c_min_rate = rcu_dereference(device->ldev->disk_conf)->c_min_rate;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002400 rcu_read_unlock();
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002401
2402 /* feature disabled? */
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002403 if (c_min_rate == 0)
Lars Ellenberge8299872014-04-28 18:43:19 +02002404 return false;
Philipp Reisnere3555d82010-11-07 15:56:29 +01002405
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002406 curr_events = (int)part_stat_read(&disk->part0, sectors[0]) +
2407 (int)part_stat_read(&disk->part0, sectors[1]) -
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002408 atomic_read(&device->rs_sect_ev);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002409 if (!device->rs_last_events || curr_events - device->rs_last_events > 64) {
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002410 unsigned long rs_left;
2411 int i;
2412
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002413 device->rs_last_events = curr_events;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002414
2415 /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP,
2416 * approx. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002417 i = (device->rs_last_mark + DRBD_SYNC_MARKS-1) % DRBD_SYNC_MARKS;
Lars Ellenberg2649f082010-11-05 10:05:47 +01002418
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002419 if (device->state.conn == C_VERIFY_S || device->state.conn == C_VERIFY_T)
2420 rs_left = device->ov_left;
Lars Ellenberg2649f082010-11-05 10:05:47 +01002421 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002422 rs_left = drbd_bm_total_weight(device) - device->rs_failed;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002423
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002424 dt = ((long)jiffies - (long)device->rs_mark_time[i]) / HZ;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002425 if (!dt)
2426 dt++;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002427 db = device->rs_mark_left[i] - rs_left;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002428 dbdt = Bit2KB(db/dt);
2429
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02002430 if (dbdt > c_min_rate)
Lars Ellenberge8299872014-04-28 18:43:19 +02002431 return true;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002432 }
Lars Ellenberge8299872014-04-28 18:43:19 +02002433 return false;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002434}
2435
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02002436static int receive_DataRequest(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002437{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002438 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002439 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002440 sector_t sector;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002441 sector_t capacity;
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002442 struct drbd_peer_request *peer_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002443 struct digest_info *di = NULL;
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002444 int size, verb;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002445 unsigned int fault_type;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02002446 struct p_block_req *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002447
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002448 peer_device = conn_peer_device(connection, pi->vnr);
2449 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01002450 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002451 device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002452 capacity = drbd_get_capacity(device->this_bdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002453
2454 sector = be64_to_cpu(p->sector);
2455 size = be32_to_cpu(p->blksize);
2456
Andreas Gruenbacherc670a392011-02-21 12:41:39 +01002457 if (size <= 0 || !IS_ALIGNED(size, 512) || size > DRBD_MAX_BIO_SIZE) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002458 drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
Philipp Reisnerb411b362009-09-25 16:07:19 -07002459 (unsigned long long)sector, size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002460 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002461 }
2462 if (sector + (size>>9) > capacity) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002463 drbd_err(device, "%s:%d: sector: %llus, size: %u\n", __FILE__, __LINE__,
Philipp Reisnerb411b362009-09-25 16:07:19 -07002464 (unsigned long long)sector, size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002465 return -EINVAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002466 }
2467
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002468 if (!get_ldev_if_state(device, D_UP_TO_DATE)) {
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002469 verb = 1;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002470 switch (pi->cmd) {
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002471 case P_DATA_REQUEST:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002472 drbd_send_ack_rp(peer_device, P_NEG_DREPLY, p);
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002473 break;
2474 case P_RS_DATA_REQUEST:
2475 case P_CSUM_RS_REQUEST:
2476 case P_OV_REQUEST:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002477 drbd_send_ack_rp(peer_device, P_NEG_RS_DREPLY , p);
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002478 break;
2479 case P_OV_REPLY:
2480 verb = 0;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002481 dec_rs_pending(device);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002482 drbd_send_ack_ex(peer_device, P_OV_RESULT, sector, size, ID_IN_SYNC);
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002483 break;
2484 default:
Andreas Gruenbacher49ba9b12011-03-25 00:35:45 +01002485 BUG();
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002486 }
2487 if (verb && __ratelimit(&drbd_ratelimit_state))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002488 drbd_err(device, "Can not satisfy peer's read request, "
Philipp Reisnerb411b362009-09-25 16:07:19 -07002489 "no local data.\n");
Philipp Reisnerb18b37b2010-10-13 15:32:44 +02002490
Lars Ellenberga821cc42010-09-06 12:31:37 +02002491 /* drain possibly payload */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002492 return drbd_drain_block(peer_device, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002493 }
2494
2495 /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD
2496 * "criss-cross" setup, that might cause write-out on some other DRBD,
2497 * which in turn might block on the other node at this very place. */
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02002498 peer_req = drbd_alloc_peer_req(peer_device, p->block_id, sector, size,
2499 true /* has real payload */, GFP_NOIO);
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002500 if (!peer_req) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002501 put_ldev(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002502 return -ENOMEM;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002503 }
2504
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002505 switch (pi->cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002506 case P_DATA_REQUEST:
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002507 peer_req->w.cb = w_e_end_data_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002508 fault_type = DRBD_FAULT_DT_RD;
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002509 /* application IO, don't drbd_rs_begin_io */
2510 goto submit;
2511
Philipp Reisnerb411b362009-09-25 16:07:19 -07002512 case P_RS_DATA_REQUEST:
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002513 peer_req->w.cb = w_e_end_rsdata_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002514 fault_type = DRBD_FAULT_RS_RD;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002515 /* used in the sector offset progress display */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002516 device->bm_resync_fo = BM_SECT_TO_BIT(sector);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002517 break;
2518
2519 case P_OV_REPLY:
2520 case P_CSUM_RS_REQUEST:
2521 fault_type = DRBD_FAULT_RS_RD;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002522 di = kmalloc(sizeof(*di) + pi->size, GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002523 if (!di)
2524 goto out_free_e;
2525
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002526 di->digest_size = pi->size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002527 di->digest = (((char *)di)+sizeof(struct digest_info));
2528
Andreas Gruenbacherdb830c42011-02-04 15:57:48 +01002529 peer_req->digest = di;
2530 peer_req->flags |= EE_HAS_DIGEST;
Lars Ellenbergc36c3ce2010-08-11 20:42:55 +02002531
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002532 if (drbd_recv_all(peer_device->connection, di->digest, pi->size))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002533 goto out_free_e;
2534
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002535 if (pi->cmd == P_CSUM_RS_REQUEST) {
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002536 D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002537 peer_req->w.cb = w_e_end_csum_rs_req;
Lars Ellenberg5f9915b2010-11-09 14:15:24 +01002538 /* used in the sector offset progress display */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002539 device->bm_resync_fo = BM_SECT_TO_BIT(sector);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01002540 } else if (pi->cmd == P_OV_REPLY) {
Lars Ellenberg2649f082010-11-05 10:05:47 +01002541 /* track progress, we may need to throttle */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002542 atomic_add(size >> 9, &device->rs_sect_in);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002543 peer_req->w.cb = w_e_end_ov_reply;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002544 dec_rs_pending(device);
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002545 /* drbd_rs_begin_io done when we sent this request,
2546 * but accounting still needs to be done. */
2547 goto submit_for_resync;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002548 }
2549 break;
2550
2551 case P_OV_REQUEST:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002552 if (device->ov_start_sector == ~(sector_t)0 &&
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02002553 peer_device->connection->agreed_pro_version >= 90) {
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002554 unsigned long now = jiffies;
2555 int i;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002556 device->ov_start_sector = sector;
2557 device->ov_position = sector;
2558 device->ov_left = drbd_bm_bits(device) - BM_SECT_TO_BIT(sector);
2559 device->rs_total = device->ov_left;
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002560 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002561 device->rs_mark_left[i] = device->ov_left;
2562 device->rs_mark_time[i] = now;
Lars Ellenbergde228bb2010-11-05 09:43:15 +01002563 }
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002564 drbd_info(device, "Online Verify start sector: %llu\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07002565 (unsigned long long)sector);
2566 }
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002567 peer_req->w.cb = w_e_end_ov_req;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002568 fault_type = DRBD_FAULT_RS_RD;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002569 break;
2570
Philipp Reisnerb411b362009-09-25 16:07:19 -07002571 default:
Andreas Gruenbacher49ba9b12011-03-25 00:35:45 +01002572 BUG();
Philipp Reisnerb411b362009-09-25 16:07:19 -07002573 }
2574
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002575 /* Throttle, drbd_rs_begin_io and submit should become asynchronous
2576 * wrt the receiver, but it is not as straightforward as it may seem.
2577 * Various places in the resync start and stop logic assume resync
2578 * requests are processed in order, requeuing this on the worker thread
2579 * introduces a bunch of new code for synchronization between threads.
2580 *
2581 * Unlimited throttling before drbd_rs_begin_io may stall the resync
2582 * "forever", throttling after drbd_rs_begin_io will lock that extent
2583 * for application writes for the same time. For now, just throttle
2584 * here, where the rest of the code expects the receiver to sleep for
2585 * a while, anyways.
2586 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002587
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002588 /* Throttle before drbd_rs_begin_io, as that locks out application IO;
2589 * this defers syncer requests for some time, before letting at least
2590 * on request through. The resync controller on the receiving side
2591 * will adapt to the incoming rate accordingly.
2592 *
2593 * We cannot throttle here if remote is Primary/SyncTarget:
2594 * we would also throttle its application reads.
2595 * In that case, throttling is done on the SyncTarget only.
2596 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002597 if (device->state.peer != R_PRIMARY && drbd_rs_should_slow_down(device, sector))
Philipp Reisnere3555d82010-11-07 15:56:29 +01002598 schedule_timeout_uninterruptible(HZ/10);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002599 if (drbd_rs_begin_io(device, sector))
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002600 goto out_free_e;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002601
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002602submit_for_resync:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002603 atomic_add(size >> 9, &device->rs_sect_ev);
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02002604
Lars Ellenberg80a40e42010-08-11 23:28:00 +02002605submit:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002606 inc_unacked(device);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002607 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002608 list_add_tail(&peer_req->w.list, &device->read_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002609 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002610
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002611 if (drbd_submit_peer_request(device, peer_req, READ, fault_type) == 0)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002612 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002613
Lars Ellenberg10f6d9922011-01-24 14:47:09 +01002614 /* don't care for the reason here */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002615 drbd_err(device, "submit failed, triggering re-connect\n");
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002616 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbachera8cd15b2011-08-25 15:49:40 +02002617 list_del(&peer_req->w.list);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02002618 spin_unlock_irq(&device->resource->req_lock);
Lars Ellenberg22cc37a2010-09-14 20:40:41 +02002619 /* no drbd_rs_complete_io(), we are dropping the connection anyways */
2620
Philipp Reisnerb411b362009-09-25 16:07:19 -07002621out_free_e:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002622 put_ldev(device);
2623 drbd_free_peer_req(device, peer_req);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01002624 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002625}
2626
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002627/**
2628 * drbd_asb_recover_0p - Recover after split-brain with no remaining primaries
2629 */
2630static int drbd_asb_recover_0p(struct drbd_peer_device *peer_device) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002631{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002632 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002633 int self, peer, rv = -100;
2634 unsigned long ch_self, ch_peer;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002635 enum drbd_after_sb_p after_sb_0p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002636
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002637 self = device->ldev->md.uuid[UI_BITMAP] & 1;
2638 peer = device->p_uuid[UI_BITMAP] & 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002639
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002640 ch_peer = device->p_uuid[UI_SIZE];
2641 ch_self = device->comm_bm_set;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002642
Philipp Reisner44ed1672011-04-19 17:10:19 +02002643 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002644 after_sb_0p = rcu_dereference(peer_device->connection->net_conf)->after_sb_0p;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002645 rcu_read_unlock();
2646 switch (after_sb_0p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002647 case ASB_CONSENSUS:
2648 case ASB_DISCARD_SECONDARY:
2649 case ASB_CALL_HELPER:
Philipp Reisner44ed1672011-04-19 17:10:19 +02002650 case ASB_VIOLENTLY:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002651 drbd_err(device, "Configuration error.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002652 break;
2653 case ASB_DISCONNECT:
2654 break;
2655 case ASB_DISCARD_YOUNGER_PRI:
2656 if (self == 0 && peer == 1) {
2657 rv = -1;
2658 break;
2659 }
2660 if (self == 1 && peer == 0) {
2661 rv = 1;
2662 break;
2663 }
2664 /* Else fall through to one of the other strategies... */
2665 case ASB_DISCARD_OLDER_PRI:
2666 if (self == 0 && peer == 1) {
2667 rv = 1;
2668 break;
2669 }
2670 if (self == 1 && peer == 0) {
2671 rv = -1;
2672 break;
2673 }
2674 /* Else fall through to one of the other strategies... */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002675 drbd_warn(device, "Discard younger/older primary did not find a decision\n"
Philipp Reisnerb411b362009-09-25 16:07:19 -07002676 "Using discard-least-changes instead\n");
2677 case ASB_DISCARD_ZERO_CHG:
2678 if (ch_peer == 0 && ch_self == 0) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002679 rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002680 ? -1 : 1;
2681 break;
2682 } else {
2683 if (ch_peer == 0) { rv = 1; break; }
2684 if (ch_self == 0) { rv = -1; break; }
2685 }
Philipp Reisner44ed1672011-04-19 17:10:19 +02002686 if (after_sb_0p == ASB_DISCARD_ZERO_CHG)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002687 break;
2688 case ASB_DISCARD_LEAST_CHG:
2689 if (ch_self < ch_peer)
2690 rv = -1;
2691 else if (ch_self > ch_peer)
2692 rv = 1;
2693 else /* ( ch_self == ch_peer ) */
2694 /* Well, then use something else. */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002695 rv = test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002696 ? -1 : 1;
2697 break;
2698 case ASB_DISCARD_LOCAL:
2699 rv = -1;
2700 break;
2701 case ASB_DISCARD_REMOTE:
2702 rv = 1;
2703 }
2704
2705 return rv;
2706}
2707
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002708/**
2709 * drbd_asb_recover_1p - Recover after split-brain with one remaining primary
2710 */
2711static int drbd_asb_recover_1p(struct drbd_peer_device *peer_device) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002712{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002713 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002714 int hg, rv = -100;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002715 enum drbd_after_sb_p after_sb_1p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002716
Philipp Reisner44ed1672011-04-19 17:10:19 +02002717 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002718 after_sb_1p = rcu_dereference(peer_device->connection->net_conf)->after_sb_1p;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002719 rcu_read_unlock();
2720 switch (after_sb_1p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002721 case ASB_DISCARD_YOUNGER_PRI:
2722 case ASB_DISCARD_OLDER_PRI:
2723 case ASB_DISCARD_LEAST_CHG:
2724 case ASB_DISCARD_LOCAL:
2725 case ASB_DISCARD_REMOTE:
Philipp Reisner44ed1672011-04-19 17:10:19 +02002726 case ASB_DISCARD_ZERO_CHG:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002727 drbd_err(device, "Configuration error.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002728 break;
2729 case ASB_DISCONNECT:
2730 break;
2731 case ASB_CONSENSUS:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002732 hg = drbd_asb_recover_0p(peer_device);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002733 if (hg == -1 && device->state.role == R_SECONDARY)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002734 rv = hg;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002735 if (hg == 1 && device->state.role == R_PRIMARY)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002736 rv = hg;
2737 break;
2738 case ASB_VIOLENTLY:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002739 rv = drbd_asb_recover_0p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002740 break;
2741 case ASB_DISCARD_SECONDARY:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002742 return device->state.role == R_PRIMARY ? 1 : -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002743 case ASB_CALL_HELPER:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002744 hg = drbd_asb_recover_0p(peer_device);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002745 if (hg == -1 && device->state.role == R_PRIMARY) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002746 enum drbd_state_rv rv2;
2747
Philipp Reisnerb411b362009-09-25 16:07:19 -07002748 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2749 * we might be here in C_WF_REPORT_PARAMS which is transient.
2750 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002751 rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002752 if (rv2 != SS_SUCCESS) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002753 drbd_khelper(device, "pri-lost-after-sb");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002754 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002755 drbd_warn(device, "Successfully gave up primary role.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002756 rv = hg;
2757 }
2758 } else
2759 rv = hg;
2760 }
2761
2762 return rv;
2763}
2764
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002765/**
2766 * drbd_asb_recover_2p - Recover after split-brain with two remaining primaries
2767 */
2768static int drbd_asb_recover_2p(struct drbd_peer_device *peer_device) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002769{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002770 struct drbd_device *device = peer_device->device;
Andreas Gruenbacher6184ea22010-12-09 14:23:27 +01002771 int hg, rv = -100;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002772 enum drbd_after_sb_p after_sb_2p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002773
Philipp Reisner44ed1672011-04-19 17:10:19 +02002774 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002775 after_sb_2p = rcu_dereference(peer_device->connection->net_conf)->after_sb_2p;
Philipp Reisner44ed1672011-04-19 17:10:19 +02002776 rcu_read_unlock();
2777 switch (after_sb_2p) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002778 case ASB_DISCARD_YOUNGER_PRI:
2779 case ASB_DISCARD_OLDER_PRI:
2780 case ASB_DISCARD_LEAST_CHG:
2781 case ASB_DISCARD_LOCAL:
2782 case ASB_DISCARD_REMOTE:
2783 case ASB_CONSENSUS:
2784 case ASB_DISCARD_SECONDARY:
Philipp Reisner44ed1672011-04-19 17:10:19 +02002785 case ASB_DISCARD_ZERO_CHG:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002786 drbd_err(device, "Configuration error.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002787 break;
2788 case ASB_VIOLENTLY:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002789 rv = drbd_asb_recover_0p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002790 break;
2791 case ASB_DISCONNECT:
2792 break;
2793 case ASB_CALL_HELPER:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02002794 hg = drbd_asb_recover_0p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002795 if (hg == -1) {
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002796 enum drbd_state_rv rv2;
2797
Philipp Reisnerb411b362009-09-25 16:07:19 -07002798 /* drbd_change_state() does not sleep while in SS_IN_TRANSIENT_STATE,
2799 * we might be here in C_WF_REPORT_PARAMS which is transient.
2800 * we do not need to wait for the after state change work either. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002801 rv2 = drbd_change_state(device, CS_VERBOSE, NS(role, R_SECONDARY));
Andreas Gruenbacherbb437942010-12-09 14:02:35 +01002802 if (rv2 != SS_SUCCESS) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002803 drbd_khelper(device, "pri-lost-after-sb");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002804 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002805 drbd_warn(device, "Successfully gave up primary role.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002806 rv = hg;
2807 }
2808 } else
2809 rv = hg;
2810 }
2811
2812 return rv;
2813}
2814
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002815static void drbd_uuid_dump(struct drbd_device *device, char *text, u64 *uuid,
Philipp Reisnerb411b362009-09-25 16:07:19 -07002816 u64 bits, u64 flags)
2817{
2818 if (!uuid) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002819 drbd_info(device, "%s uuid info vanished while I was looking!\n", text);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002820 return;
2821 }
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002822 drbd_info(device, "%s %016llX:%016llX:%016llX:%016llX bits:%llu flags:%llX\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07002823 text,
2824 (unsigned long long)uuid[UI_CURRENT],
2825 (unsigned long long)uuid[UI_BITMAP],
2826 (unsigned long long)uuid[UI_HISTORY_START],
2827 (unsigned long long)uuid[UI_HISTORY_END],
2828 (unsigned long long)bits,
2829 (unsigned long long)flags);
2830}
2831
2832/*
2833 100 after split brain try auto recover
2834 2 C_SYNC_SOURCE set BitMap
2835 1 C_SYNC_SOURCE use BitMap
2836 0 no Sync
2837 -1 C_SYNC_TARGET use BitMap
2838 -2 C_SYNC_TARGET set BitMap
2839 -100 after split brain, disconnect
2840-1000 unrelated data
Philipp Reisner4a23f262011-01-11 17:42:17 +01002841-1091 requires proto 91
2842-1096 requires proto 96
Philipp Reisnerb411b362009-09-25 16:07:19 -07002843 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002844static int drbd_uuid_compare(struct drbd_device *device, int *rule_nr) __must_hold(local)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002845{
2846 u64 self, peer;
2847 int i, j;
2848
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002849 self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
2850 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002851
2852 *rule_nr = 10;
2853 if (self == UUID_JUST_CREATED && peer == UUID_JUST_CREATED)
2854 return 0;
2855
2856 *rule_nr = 20;
2857 if ((self == UUID_JUST_CREATED || self == (u64)0) &&
2858 peer != UUID_JUST_CREATED)
2859 return -2;
2860
2861 *rule_nr = 30;
2862 if (self != UUID_JUST_CREATED &&
2863 (peer == UUID_JUST_CREATED || peer == (u64)0))
2864 return 2;
2865
2866 if (self == peer) {
2867 int rct, dc; /* roles at crash time */
2868
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002869 if (device->p_uuid[UI_BITMAP] == (u64)0 && device->ldev->md.uuid[UI_BITMAP] != (u64)0) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002870
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002871 if (first_peer_device(device)->connection->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002872 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002873
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002874 if ((device->ldev->md.uuid[UI_BITMAP] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) &&
2875 (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1))) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002876 drbd_info(device, "was SyncSource, missed the resync finished event, corrected myself:\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002877 drbd_uuid_move_history(device);
2878 device->ldev->md.uuid[UI_HISTORY_START] = device->ldev->md.uuid[UI_BITMAP];
2879 device->ldev->md.uuid[UI_BITMAP] = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002880
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002881 drbd_uuid_dump(device, "self", device->ldev->md.uuid,
2882 device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002883 *rule_nr = 34;
2884 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002885 drbd_info(device, "was SyncSource (peer failed to write sync_uuid)\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002886 *rule_nr = 36;
2887 }
2888
2889 return 1;
2890 }
2891
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002892 if (device->ldev->md.uuid[UI_BITMAP] == (u64)0 && device->p_uuid[UI_BITMAP] != (u64)0) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002893
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002894 if (first_peer_device(device)->connection->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002895 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002896
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002897 if ((device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) == (device->p_uuid[UI_BITMAP] & ~((u64)1)) &&
2898 (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) == (device->p_uuid[UI_HISTORY_START] & ~((u64)1))) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002899 drbd_info(device, "was SyncTarget, peer missed the resync finished event, corrected peer:\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002900
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002901 device->p_uuid[UI_HISTORY_START + 1] = device->p_uuid[UI_HISTORY_START];
2902 device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_BITMAP];
2903 device->p_uuid[UI_BITMAP] = 0UL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002904
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002905 drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002906 *rule_nr = 35;
2907 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002908 drbd_info(device, "was SyncTarget (failed to write sync_uuid)\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07002909 *rule_nr = 37;
2910 }
2911
2912 return -1;
2913 }
2914
2915 /* Common power [off|failure] */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002916 rct = (test_bit(CRASHED_PRIMARY, &device->flags) ? 1 : 0) +
2917 (device->p_uuid[UI_FLAGS] & 2);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002918 /* lowest bit is set when we were primary,
2919 * next bit (weight 2) is set when peer was primary */
2920 *rule_nr = 40;
2921
2922 switch (rct) {
2923 case 0: /* !self_pri && !peer_pri */ return 0;
2924 case 1: /* self_pri && !peer_pri */ return 1;
2925 case 2: /* !self_pri && peer_pri */ return -1;
2926 case 3: /* self_pri && peer_pri */
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002927 dc = test_bit(RESOLVE_CONFLICTS, &first_peer_device(device)->connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002928 return dc ? -1 : 1;
2929 }
2930 }
2931
2932 *rule_nr = 50;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002933 peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002934 if (self == peer)
2935 return -1;
2936
2937 *rule_nr = 51;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002938 peer = device->p_uuid[UI_HISTORY_START] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002939 if (self == peer) {
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002940 if (first_peer_device(device)->connection->agreed_pro_version < 96 ?
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002941 (device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1)) ==
2942 (device->p_uuid[UI_HISTORY_START + 1] & ~((u64)1)) :
2943 peer + UUID_NEW_BM_OFFSET == (device->p_uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002944 /* The last P_SYNC_UUID did not get though. Undo the last start of
2945 resync as sync source modifications of the peer's UUIDs. */
2946
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002947 if (first_peer_device(device)->connection->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002948 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002949
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002950 device->p_uuid[UI_BITMAP] = device->p_uuid[UI_HISTORY_START];
2951 device->p_uuid[UI_HISTORY_START] = device->p_uuid[UI_HISTORY_START + 1];
Philipp Reisner4a23f262011-01-11 17:42:17 +01002952
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002953 drbd_info(device, "Lost last syncUUID packet, corrected:\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002954 drbd_uuid_dump(device, "peer", device->p_uuid, device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
Philipp Reisner4a23f262011-01-11 17:42:17 +01002955
Philipp Reisnerb411b362009-09-25 16:07:19 -07002956 return -1;
2957 }
2958 }
2959
2960 *rule_nr = 60;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002961 self = device->ldev->md.uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002962 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002963 peer = device->p_uuid[i] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002964 if (self == peer)
2965 return -2;
2966 }
2967
2968 *rule_nr = 70;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002969 self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
2970 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002971 if (self == peer)
2972 return 1;
2973
2974 *rule_nr = 71;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002975 self = device->ldev->md.uuid[UI_HISTORY_START] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002976 if (self == peer) {
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002977 if (first_peer_device(device)->connection->agreed_pro_version < 96 ?
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002978 (device->ldev->md.uuid[UI_HISTORY_START + 1] & ~((u64)1)) ==
2979 (device->p_uuid[UI_HISTORY_START] & ~((u64)1)) :
2980 self + UUID_NEW_BM_OFFSET == (device->ldev->md.uuid[UI_BITMAP] & ~((u64)1))) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07002981 /* The last P_SYNC_UUID did not get though. Undo the last start of
2982 resync as sync source modifications of our UUIDs. */
2983
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02002984 if (first_peer_device(device)->connection->agreed_pro_version < 91)
Philipp Reisner4a23f262011-01-11 17:42:17 +01002985 return -1091;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002986
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002987 __drbd_uuid_set(device, UI_BITMAP, device->ldev->md.uuid[UI_HISTORY_START]);
2988 __drbd_uuid_set(device, UI_HISTORY_START, device->ldev->md.uuid[UI_HISTORY_START + 1]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002989
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02002990 drbd_info(device, "Last syncUUID did not get through, corrected:\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02002991 drbd_uuid_dump(device, "self", device->ldev->md.uuid,
2992 device->state.disk >= D_NEGOTIATING ? drbd_bm_total_weight(device) : 0, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002993
2994 return 1;
2995 }
2996 }
2997
2998
2999 *rule_nr = 80;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003000 peer = device->p_uuid[UI_CURRENT] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003001 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003002 self = device->ldev->md.uuid[i] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003003 if (self == peer)
3004 return 2;
3005 }
3006
3007 *rule_nr = 90;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003008 self = device->ldev->md.uuid[UI_BITMAP] & ~((u64)1);
3009 peer = device->p_uuid[UI_BITMAP] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003010 if (self == peer && self != ((u64)0))
3011 return 100;
3012
3013 *rule_nr = 100;
3014 for (i = UI_HISTORY_START; i <= UI_HISTORY_END; i++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003015 self = device->ldev->md.uuid[i] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003016 for (j = UI_HISTORY_START; j <= UI_HISTORY_END; j++) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003017 peer = device->p_uuid[j] & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003018 if (self == peer)
3019 return -100;
3020 }
3021 }
3022
3023 return -1000;
3024}
3025
3026/* drbd_sync_handshake() returns the new conn state on success, or
3027 CONN_MASK (-1) on failure.
3028 */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003029static enum drbd_conns drbd_sync_handshake(struct drbd_peer_device *peer_device,
3030 enum drbd_role peer_role,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003031 enum drbd_disk_state peer_disk) __must_hold(local)
3032{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003033 struct drbd_device *device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003034 enum drbd_conns rv = C_MASK;
3035 enum drbd_disk_state mydisk;
Philipp Reisner44ed1672011-04-19 17:10:19 +02003036 struct net_conf *nc;
Andreas Gruenbacher6dff2902011-06-28 14:18:12 +02003037 int hg, rule_nr, rr_conflict, tentative;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003038
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003039 mydisk = device->state.disk;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003040 if (mydisk == D_NEGOTIATING)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003041 mydisk = device->new_state_tmp.disk;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003042
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003043 drbd_info(device, "drbd_sync_handshake:\n");
Philipp Reisner9f2247b2012-08-16 14:25:58 +02003044
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003045 spin_lock_irq(&device->ldev->md.uuid_lock);
3046 drbd_uuid_dump(device, "self", device->ldev->md.uuid, device->comm_bm_set, 0);
3047 drbd_uuid_dump(device, "peer", device->p_uuid,
3048 device->p_uuid[UI_SIZE], device->p_uuid[UI_FLAGS]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003049
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003050 hg = drbd_uuid_compare(device, &rule_nr);
3051 spin_unlock_irq(&device->ldev->md.uuid_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003052
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003053 drbd_info(device, "uuid_compare()=%d by rule %d\n", hg, rule_nr);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003054
3055 if (hg == -1000) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003056 drbd_alert(device, "Unrelated data, aborting!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003057 return C_MASK;
3058 }
Philipp Reisner4a23f262011-01-11 17:42:17 +01003059 if (hg < -1000) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003060 drbd_alert(device, "To resolve this both sides have to support at least protocol %d\n", -hg - 1000);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003061 return C_MASK;
3062 }
3063
3064 if ((mydisk == D_INCONSISTENT && peer_disk > D_INCONSISTENT) ||
3065 (peer_disk == D_INCONSISTENT && mydisk > D_INCONSISTENT)) {
3066 int f = (hg == -100) || abs(hg) == 2;
3067 hg = mydisk > D_INCONSISTENT ? 1 : -1;
3068 if (f)
3069 hg = hg*2;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003070 drbd_info(device, "Becoming sync %s due to disk states.\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07003071 hg > 0 ? "source" : "target");
3072 }
3073
Adam Gandelman3a11a482010-04-08 16:48:23 -07003074 if (abs(hg) == 100)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003075 drbd_khelper(device, "initial-split-brain");
Adam Gandelman3a11a482010-04-08 16:48:23 -07003076
Philipp Reisner44ed1672011-04-19 17:10:19 +02003077 rcu_read_lock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003078 nc = rcu_dereference(peer_device->connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +02003079
3080 if (hg == 100 || (hg == -100 && nc->always_asbp)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003081 int pcount = (device->state.role == R_PRIMARY)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003082 + (peer_role == R_PRIMARY);
3083 int forced = (hg == -100);
3084
3085 switch (pcount) {
3086 case 0:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003087 hg = drbd_asb_recover_0p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003088 break;
3089 case 1:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003090 hg = drbd_asb_recover_1p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003091 break;
3092 case 2:
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003093 hg = drbd_asb_recover_2p(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003094 break;
3095 }
3096 if (abs(hg) < 100) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003097 drbd_warn(device, "Split-Brain detected, %d primaries, "
Philipp Reisnerb411b362009-09-25 16:07:19 -07003098 "automatically solved. Sync from %s node\n",
3099 pcount, (hg < 0) ? "peer" : "this");
3100 if (forced) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003101 drbd_warn(device, "Doing a full sync, since"
Philipp Reisnerb411b362009-09-25 16:07:19 -07003102 " UUIDs where ambiguous.\n");
3103 hg = hg*2;
3104 }
3105 }
3106 }
3107
3108 if (hg == -100) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003109 if (test_bit(DISCARD_MY_DATA, &device->flags) && !(device->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003110 hg = -1;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003111 if (!test_bit(DISCARD_MY_DATA, &device->flags) && (device->p_uuid[UI_FLAGS]&1))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003112 hg = 1;
3113
3114 if (abs(hg) < 100)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003115 drbd_warn(device, "Split-Brain detected, manually solved. "
Philipp Reisnerb411b362009-09-25 16:07:19 -07003116 "Sync from %s node\n",
3117 (hg < 0) ? "peer" : "this");
3118 }
Philipp Reisner44ed1672011-04-19 17:10:19 +02003119 rr_conflict = nc->rr_conflict;
Andreas Gruenbacher6dff2902011-06-28 14:18:12 +02003120 tentative = nc->tentative;
Philipp Reisner44ed1672011-04-19 17:10:19 +02003121 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07003122
3123 if (hg == -100) {
Lars Ellenberg580b9762010-02-26 23:15:23 +01003124 /* FIXME this log message is not correct if we end up here
3125 * after an attempted attach on a diskless node.
3126 * We just refuse to attach -- well, we drop the "connection"
3127 * to that disk, in a way... */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003128 drbd_alert(device, "Split-Brain detected but unresolved, dropping connection!\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003129 drbd_khelper(device, "split-brain");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003130 return C_MASK;
3131 }
3132
3133 if (hg > 0 && mydisk <= D_INCONSISTENT) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003134 drbd_err(device, "I shall become SyncSource, but I am inconsistent!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003135 return C_MASK;
3136 }
3137
3138 if (hg < 0 && /* by intention we do not use mydisk here. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003139 device->state.role == R_PRIMARY && device->state.disk >= D_CONSISTENT) {
Philipp Reisner44ed1672011-04-19 17:10:19 +02003140 switch (rr_conflict) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003141 case ASB_CALL_HELPER:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003142 drbd_khelper(device, "pri-lost");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003143 /* fall through */
3144 case ASB_DISCONNECT:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003145 drbd_err(device, "I shall become SyncTarget, but I am primary!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003146 return C_MASK;
3147 case ASB_VIOLENTLY:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003148 drbd_warn(device, "Becoming SyncTarget, violating the stable-data"
Philipp Reisnerb411b362009-09-25 16:07:19 -07003149 "assumption\n");
3150 }
3151 }
3152
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003153 if (tentative || test_bit(CONN_DRY_RUN, &peer_device->connection->flags)) {
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003154 if (hg == 0)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003155 drbd_info(device, "dry-run connect: No resync, would become Connected immediately.\n");
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003156 else
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003157 drbd_info(device, "dry-run connect: Would become %s, doing a %s resync.",
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003158 drbd_conn_str(hg > 0 ? C_SYNC_SOURCE : C_SYNC_TARGET),
3159 abs(hg) >= 2 ? "full" : "bit-map based");
3160 return C_MASK;
3161 }
3162
Philipp Reisnerb411b362009-09-25 16:07:19 -07003163 if (abs(hg) >= 2) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003164 drbd_info(device, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003165 if (drbd_bitmap_io(device, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003166 BM_LOCKED_SET_ALLOWED))
Philipp Reisnerb411b362009-09-25 16:07:19 -07003167 return C_MASK;
3168 }
3169
3170 if (hg > 0) { /* become sync source. */
3171 rv = C_WF_BITMAP_S;
3172 } else if (hg < 0) { /* become sync target */
3173 rv = C_WF_BITMAP_T;
3174 } else {
3175 rv = C_CONNECTED;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003176 if (drbd_bm_total_weight(device)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003177 drbd_info(device, "No resync, but %lu bits in bitmap!\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003178 drbd_bm_total_weight(device));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003179 }
3180 }
3181
3182 return rv;
3183}
3184
Philipp Reisnerf179d762011-05-16 17:31:47 +02003185static enum drbd_after_sb_p convert_after_sb(enum drbd_after_sb_p peer)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003186{
3187 /* ASB_DISCARD_REMOTE - ASB_DISCARD_LOCAL is valid */
Philipp Reisnerf179d762011-05-16 17:31:47 +02003188 if (peer == ASB_DISCARD_REMOTE)
3189 return ASB_DISCARD_LOCAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003190
3191 /* any other things with ASB_DISCARD_REMOTE or ASB_DISCARD_LOCAL are invalid */
Philipp Reisnerf179d762011-05-16 17:31:47 +02003192 if (peer == ASB_DISCARD_LOCAL)
3193 return ASB_DISCARD_REMOTE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003194
3195 /* everything else is valid if they are equal on both sides. */
Philipp Reisnerf179d762011-05-16 17:31:47 +02003196 return peer;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003197}
3198
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003199static int receive_protocol(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003200{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003201 struct p_protocol *p = pi->data;
Philipp Reisner036b17e2011-05-16 17:38:11 +02003202 enum drbd_after_sb_p p_after_sb_0p, p_after_sb_1p, p_after_sb_2p;
3203 int p_proto, p_discard_my_data, p_two_primaries, cf;
3204 struct net_conf *nc, *old_net_conf, *new_net_conf = NULL;
3205 char integrity_alg[SHARED_SECRET_MAX] = "";
Andreas Gruenbacheraccdbcc2011-07-15 17:41:09 +02003206 struct crypto_hash *peer_integrity_tfm = NULL;
Philipp Reisner7aca6c72011-05-17 10:12:56 +02003207 void *int_dig_in = NULL, *int_dig_vv = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003208
Philipp Reisnerb411b362009-09-25 16:07:19 -07003209 p_proto = be32_to_cpu(p->protocol);
3210 p_after_sb_0p = be32_to_cpu(p->after_sb_0p);
3211 p_after_sb_1p = be32_to_cpu(p->after_sb_1p);
3212 p_after_sb_2p = be32_to_cpu(p->after_sb_2p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003213 p_two_primaries = be32_to_cpu(p->two_primaries);
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003214 cf = be32_to_cpu(p->conn_flags);
Andreas Gruenbacher6139f602011-05-06 20:00:02 +02003215 p_discard_my_data = cf & CF_DISCARD_MY_DATA;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003216
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003217 if (connection->agreed_pro_version >= 87) {
Andreas Gruenbacher86db0612011-04-28 15:24:18 +02003218 int err;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01003219
Andreas Gruenbacher88104ca2011-04-28 21:47:21 +02003220 if (pi->size > sizeof(integrity_alg))
Andreas Gruenbacher86db0612011-04-28 15:24:18 +02003221 return -EIO;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003222 err = drbd_recv_all(connection, integrity_alg, pi->size);
Andreas Gruenbacher86db0612011-04-28 15:24:18 +02003223 if (err)
3224 return err;
Philipp Reisner036b17e2011-05-16 17:38:11 +02003225 integrity_alg[SHARED_SECRET_MAX - 1] = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003226 }
3227
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003228 if (pi->cmd != P_PROTOCOL_UPDATE) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003229 clear_bit(CONN_DRY_RUN, &connection->flags);
Philipp Reisner036b17e2011-05-16 17:38:11 +02003230
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003231 if (cf & CF_DRY_RUN)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003232 set_bit(CONN_DRY_RUN, &connection->flags);
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003233
3234 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003235 nc = rcu_dereference(connection->net_conf);
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003236
3237 if (p_proto != nc->wire_protocol) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003238 drbd_err(connection, "incompatible %s settings\n", "protocol");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003239 goto disconnect_rcu_unlock;
3240 }
3241
3242 if (convert_after_sb(p_after_sb_0p) != nc->after_sb_0p) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003243 drbd_err(connection, "incompatible %s settings\n", "after-sb-0pri");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003244 goto disconnect_rcu_unlock;
3245 }
3246
3247 if (convert_after_sb(p_after_sb_1p) != nc->after_sb_1p) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003248 drbd_err(connection, "incompatible %s settings\n", "after-sb-1pri");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003249 goto disconnect_rcu_unlock;
3250 }
3251
3252 if (convert_after_sb(p_after_sb_2p) != nc->after_sb_2p) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003253 drbd_err(connection, "incompatible %s settings\n", "after-sb-2pri");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003254 goto disconnect_rcu_unlock;
3255 }
3256
3257 if (p_discard_my_data && nc->discard_my_data) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003258 drbd_err(connection, "incompatible %s settings\n", "discard-my-data");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003259 goto disconnect_rcu_unlock;
3260 }
3261
3262 if (p_two_primaries != nc->two_primaries) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003263 drbd_err(connection, "incompatible %s settings\n", "allow-two-primaries");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003264 goto disconnect_rcu_unlock;
3265 }
3266
3267 if (strcmp(integrity_alg, nc->integrity_alg)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003268 drbd_err(connection, "incompatible %s settings\n", "data-integrity-alg");
Andreas Gruenbacherfbc12f42011-07-15 17:04:26 +02003269 goto disconnect_rcu_unlock;
3270 }
3271
3272 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07003273 }
3274
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003275 if (integrity_alg[0]) {
3276 int hash_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003277
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003278 /*
3279 * We can only change the peer data integrity algorithm
3280 * here. Changing our own data integrity algorithm
3281 * requires that we send a P_PROTOCOL_UPDATE packet at
3282 * the same time; otherwise, the peer has no way to
3283 * tell between which packets the algorithm should
3284 * change.
3285 */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003286
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003287 peer_integrity_tfm = crypto_alloc_hash(integrity_alg, 0, CRYPTO_ALG_ASYNC);
3288 if (!peer_integrity_tfm) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003289 drbd_err(connection, "peer data-integrity-alg %s not supported\n",
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003290 integrity_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003291 goto disconnect;
3292 }
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003293
3294 hash_size = crypto_hash_digestsize(peer_integrity_tfm);
3295 int_dig_in = kmalloc(hash_size, GFP_KERNEL);
3296 int_dig_vv = kmalloc(hash_size, GFP_KERNEL);
3297 if (!(int_dig_in && int_dig_vv)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003298 drbd_err(connection, "Allocation of buffers for data integrity checking failed\n");
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003299 goto disconnect;
3300 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003301 }
3302
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003303 new_net_conf = kmalloc(sizeof(struct net_conf), GFP_KERNEL);
3304 if (!new_net_conf) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003305 drbd_err(connection, "Allocation of new net_conf failed\n");
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003306 goto disconnect;
3307 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003308
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003309 mutex_lock(&connection->data.mutex);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003310 mutex_lock(&connection->resource->conf_update);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003311 old_net_conf = connection->net_conf;
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003312 *new_net_conf = *old_net_conf;
3313
3314 new_net_conf->wire_protocol = p_proto;
3315 new_net_conf->after_sb_0p = convert_after_sb(p_after_sb_0p);
3316 new_net_conf->after_sb_1p = convert_after_sb(p_after_sb_1p);
3317 new_net_conf->after_sb_2p = convert_after_sb(p_after_sb_2p);
3318 new_net_conf->two_primaries = p_two_primaries;
3319
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003320 rcu_assign_pointer(connection->net_conf, new_net_conf);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003321 mutex_unlock(&connection->resource->conf_update);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003322 mutex_unlock(&connection->data.mutex);
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003323
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003324 crypto_free_hash(connection->peer_integrity_tfm);
3325 kfree(connection->int_dig_in);
3326 kfree(connection->int_dig_vv);
3327 connection->peer_integrity_tfm = peer_integrity_tfm;
3328 connection->int_dig_in = int_dig_in;
3329 connection->int_dig_vv = int_dig_vv;
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003330
3331 if (strcmp(old_net_conf->integrity_alg, integrity_alg))
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003332 drbd_info(connection, "peer data-integrity-alg: %s\n",
Andreas Gruenbacher7d4c7822011-07-17 23:06:12 +02003333 integrity_alg[0] ? integrity_alg : "(none)");
3334
3335 synchronize_rcu();
3336 kfree(old_net_conf);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003337 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003338
Philipp Reisner44ed1672011-04-19 17:10:19 +02003339disconnect_rcu_unlock:
3340 rcu_read_unlock();
Philipp Reisnerb411b362009-09-25 16:07:19 -07003341disconnect:
Andreas Gruenbacherb792c352011-07-15 16:48:49 +02003342 crypto_free_hash(peer_integrity_tfm);
Philipp Reisner036b17e2011-05-16 17:38:11 +02003343 kfree(int_dig_in);
3344 kfree(int_dig_vv);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003345 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003346 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003347}
3348
3349/* helper function
3350 * input: alg name, feature name
3351 * return: NULL (alg name was "")
3352 * ERR_PTR(error) if something goes wrong
3353 * or the crypto hash ptr, if it worked out ok. */
Rashika Kheriaf63e6312013-12-19 15:11:09 +05303354static
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003355struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_device *device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003356 const char *alg, const char *name)
3357{
3358 struct crypto_hash *tfm;
3359
3360 if (!alg[0])
3361 return NULL;
3362
3363 tfm = crypto_alloc_hash(alg, 0, CRYPTO_ALG_ASYNC);
3364 if (IS_ERR(tfm)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003365 drbd_err(device, "Can not allocate \"%s\" as %s (reason: %ld)\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07003366 alg, name, PTR_ERR(tfm));
3367 return tfm;
3368 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003369 return tfm;
3370}
3371
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003372static int ignore_remaining_packet(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003373{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003374 void *buffer = connection->data.rbuf;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003375 int size = pi->size;
3376
3377 while (size) {
3378 int s = min_t(int, size, DRBD_SOCKET_BUFFER_SIZE);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003379 s = drbd_recv(connection, buffer, s);
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003380 if (s <= 0) {
3381 if (s < 0)
3382 return s;
3383 break;
3384 }
3385 size -= s;
3386 }
3387 if (size)
3388 return -EIO;
3389 return 0;
3390}
3391
3392/*
3393 * config_unknown_volume - device configuration command for unknown volume
3394 *
3395 * When a device is added to an existing connection, the node on which the
3396 * device is added first will send configuration commands to its peer but the
3397 * peer will not know about the device yet. It will warn and ignore these
3398 * commands. Once the device is added on the second node, the second node will
3399 * send the same device configuration commands, but in the other direction.
3400 *
3401 * (We can also end up here if drbd is misconfigured.)
3402 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003403static int config_unknown_volume(struct drbd_connection *connection, struct packet_info *pi)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003404{
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02003405 drbd_warn(connection, "%s packet received for volume %u, which is not configured locally\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02003406 cmdname(pi->cmd), pi->vnr);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003407 return ignore_remaining_packet(connection, pi);
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003408}
3409
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003410static int receive_SyncParam(struct drbd_connection *connection, struct packet_info *pi)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003411{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003412 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003413 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003414 struct p_rs_param_95 *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003415 unsigned int header_size, data_size, exp_max_sz;
3416 struct crypto_hash *verify_tfm = NULL;
3417 struct crypto_hash *csums_tfm = NULL;
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003418 struct net_conf *old_net_conf, *new_net_conf = NULL;
Philipp Reisner813472c2011-05-03 16:47:02 +02003419 struct disk_conf *old_disk_conf = NULL, *new_disk_conf = NULL;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003420 const int apv = connection->agreed_pro_version;
Philipp Reisner813472c2011-05-03 16:47:02 +02003421 struct fifo_buffer *old_plan = NULL, *new_plan = NULL;
Philipp Reisner778f2712010-07-06 11:14:00 +02003422 int fifo_size = 0;
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003423 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003424
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003425 peer_device = conn_peer_device(connection, pi->vnr);
3426 if (!peer_device)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003427 return config_unknown_volume(connection, pi);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003428 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003429
3430 exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param)
3431 : apv == 88 ? sizeof(struct p_rs_param)
3432 + SHARED_SECRET_MAX
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003433 : apv <= 94 ? sizeof(struct p_rs_param_89)
3434 : /* apv >= 95 */ sizeof(struct p_rs_param_95);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003435
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003436 if (pi->size > exp_max_sz) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003437 drbd_err(device, "SyncParam packet too long: received %u, expected <= %u bytes\n",
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003438 pi->size, exp_max_sz);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003439 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003440 }
3441
3442 if (apv <= 88) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003443 header_size = sizeof(struct p_rs_param);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003444 data_size = pi->size - header_size;
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003445 } else if (apv <= 94) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003446 header_size = sizeof(struct p_rs_param_89);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003447 data_size = pi->size - header_size;
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02003448 D_ASSERT(device, data_size == 0);
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003449 } else {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003450 header_size = sizeof(struct p_rs_param_95);
Andreas Gruenbachere2857212011-03-25 00:57:38 +01003451 data_size = pi->size - header_size;
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02003452 D_ASSERT(device, data_size == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003453 }
3454
3455 /* initialize verify_alg and csums_alg */
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003456 p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003457 memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
3458
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003459 err = drbd_recv_all(peer_device->connection, p, header_size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003460 if (err)
3461 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003462
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003463 mutex_lock(&connection->resource->conf_update);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003464 old_net_conf = peer_device->connection->net_conf;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003465 if (get_ldev(device)) {
Philipp Reisner813472c2011-05-03 16:47:02 +02003466 new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3467 if (!new_disk_conf) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003468 put_ldev(device);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003469 mutex_unlock(&connection->resource->conf_update);
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003470 drbd_err(device, "Allocation of new disk_conf failed\n");
Philipp Reisner813472c2011-05-03 16:47:02 +02003471 return -ENOMEM;
3472 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003473
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003474 old_disk_conf = device->ldev->disk_conf;
Philipp Reisner813472c2011-05-03 16:47:02 +02003475 *new_disk_conf = *old_disk_conf;
3476
Andreas Gruenbacher6394b932011-05-11 14:29:52 +02003477 new_disk_conf->resync_rate = be32_to_cpu(p->resync_rate);
Philipp Reisner813472c2011-05-03 16:47:02 +02003478 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003479
3480 if (apv >= 88) {
3481 if (apv == 88) {
Philipp Reisner5de73822012-03-28 10:17:32 +02003482 if (data_size > SHARED_SECRET_MAX || data_size == 0) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003483 drbd_err(device, "verify-alg of wrong size, "
Philipp Reisner5de73822012-03-28 10:17:32 +02003484 "peer wants %u, accepting only up to %u byte\n",
3485 data_size, SHARED_SECRET_MAX);
Philipp Reisner813472c2011-05-03 16:47:02 +02003486 err = -EIO;
3487 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003488 }
3489
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003490 err = drbd_recv_all(peer_device->connection, p->verify_alg, data_size);
Philipp Reisner813472c2011-05-03 16:47:02 +02003491 if (err)
3492 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003493 /* we expect NUL terminated string */
3494 /* but just in case someone tries to be evil */
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02003495 D_ASSERT(device, p->verify_alg[data_size-1] == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003496 p->verify_alg[data_size-1] = 0;
3497
3498 } else /* apv >= 89 */ {
3499 /* we still expect NUL terminated strings */
3500 /* but just in case someone tries to be evil */
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02003501 D_ASSERT(device, p->verify_alg[SHARED_SECRET_MAX-1] == 0);
3502 D_ASSERT(device, p->csums_alg[SHARED_SECRET_MAX-1] == 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003503 p->verify_alg[SHARED_SECRET_MAX-1] = 0;
3504 p->csums_alg[SHARED_SECRET_MAX-1] = 0;
3505 }
3506
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003507 if (strcmp(old_net_conf->verify_alg, p->verify_alg)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003508 if (device->state.conn == C_WF_REPORT_PARAMS) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003509 drbd_err(device, "Different verify-alg settings. me=\"%s\" peer=\"%s\"\n",
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003510 old_net_conf->verify_alg, p->verify_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003511 goto disconnect;
3512 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003513 verify_tfm = drbd_crypto_alloc_digest_safe(device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003514 p->verify_alg, "verify-alg");
3515 if (IS_ERR(verify_tfm)) {
3516 verify_tfm = NULL;
3517 goto disconnect;
3518 }
3519 }
3520
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003521 if (apv >= 89 && strcmp(old_net_conf->csums_alg, p->csums_alg)) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003522 if (device->state.conn == C_WF_REPORT_PARAMS) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003523 drbd_err(device, "Different csums-alg settings. me=\"%s\" peer=\"%s\"\n",
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003524 old_net_conf->csums_alg, p->csums_alg);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003525 goto disconnect;
3526 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003527 csums_tfm = drbd_crypto_alloc_digest_safe(device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003528 p->csums_alg, "csums-alg");
3529 if (IS_ERR(csums_tfm)) {
3530 csums_tfm = NULL;
3531 goto disconnect;
3532 }
3533 }
3534
Philipp Reisner813472c2011-05-03 16:47:02 +02003535 if (apv > 94 && new_disk_conf) {
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003536 new_disk_conf->c_plan_ahead = be32_to_cpu(p->c_plan_ahead);
3537 new_disk_conf->c_delay_target = be32_to_cpu(p->c_delay_target);
3538 new_disk_conf->c_fill_target = be32_to_cpu(p->c_fill_target);
3539 new_disk_conf->c_max_rate = be32_to_cpu(p->c_max_rate);
Philipp Reisner778f2712010-07-06 11:14:00 +02003540
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003541 fifo_size = (new_disk_conf->c_plan_ahead * 10 * SLEEP_TIME) / HZ;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003542 if (fifo_size != device->rs_plan_s->size) {
Philipp Reisner813472c2011-05-03 16:47:02 +02003543 new_plan = fifo_alloc(fifo_size);
3544 if (!new_plan) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003545 drbd_err(device, "kmalloc of fifo_buffer failed");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003546 put_ldev(device);
Philipp Reisner778f2712010-07-06 11:14:00 +02003547 goto disconnect;
3548 }
3549 }
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02003550 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003551
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003552 if (verify_tfm || csums_tfm) {
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003553 new_net_conf = kzalloc(sizeof(struct net_conf), GFP_KERNEL);
3554 if (!new_net_conf) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003555 drbd_err(device, "Allocation of new net_conf failed\n");
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003556 goto disconnect;
3557 }
3558
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003559 *new_net_conf = *old_net_conf;
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003560
3561 if (verify_tfm) {
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003562 strcpy(new_net_conf->verify_alg, p->verify_alg);
3563 new_net_conf->verify_alg_len = strlen(p->verify_alg) + 1;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003564 crypto_free_hash(peer_device->connection->verify_tfm);
3565 peer_device->connection->verify_tfm = verify_tfm;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003566 drbd_info(device, "using verify-alg: \"%s\"\n", p->verify_alg);
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003567 }
3568 if (csums_tfm) {
Philipp Reisner2ec91e02011-05-03 14:58:00 +02003569 strcpy(new_net_conf->csums_alg, p->csums_alg);
3570 new_net_conf->csums_alg_len = strlen(p->csums_alg) + 1;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003571 crypto_free_hash(peer_device->connection->csums_tfm);
3572 peer_device->connection->csums_tfm = csums_tfm;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003573 drbd_info(device, "using csums-alg: \"%s\"\n", p->csums_alg);
Philipp Reisner91fd4da2011-04-20 17:47:29 +02003574 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003575 rcu_assign_pointer(connection->net_conf, new_net_conf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003576 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003577 }
3578
Philipp Reisner813472c2011-05-03 16:47:02 +02003579 if (new_disk_conf) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003580 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
3581 put_ldev(device);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003582 }
Philipp Reisner813472c2011-05-03 16:47:02 +02003583
3584 if (new_plan) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003585 old_plan = device->rs_plan_s;
3586 rcu_assign_pointer(device->rs_plan_s, new_plan);
Philipp Reisner813472c2011-05-03 16:47:02 +02003587 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003588
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003589 mutex_unlock(&connection->resource->conf_update);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003590 synchronize_rcu();
3591 if (new_net_conf)
3592 kfree(old_net_conf);
3593 kfree(old_disk_conf);
Philipp Reisner813472c2011-05-03 16:47:02 +02003594 kfree(old_plan);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003595
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003596 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003597
Philipp Reisner813472c2011-05-03 16:47:02 +02003598reconnect:
3599 if (new_disk_conf) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003600 put_ldev(device);
Philipp Reisner813472c2011-05-03 16:47:02 +02003601 kfree(new_disk_conf);
3602 }
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003603 mutex_unlock(&connection->resource->conf_update);
Philipp Reisner813472c2011-05-03 16:47:02 +02003604 return -EIO;
3605
Philipp Reisnerb411b362009-09-25 16:07:19 -07003606disconnect:
Philipp Reisner813472c2011-05-03 16:47:02 +02003607 kfree(new_plan);
3608 if (new_disk_conf) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003609 put_ldev(device);
Philipp Reisner813472c2011-05-03 16:47:02 +02003610 kfree(new_disk_conf);
3611 }
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003612 mutex_unlock(&connection->resource->conf_update);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003613 /* just for completeness: actually not needed,
3614 * as this is not reached if csums_tfm was ok. */
3615 crypto_free_hash(csums_tfm);
3616 /* but free the verify_tfm again, if csums_tfm did not work out */
3617 crypto_free_hash(verify_tfm);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003618 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003619 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003620}
3621
Philipp Reisnerb411b362009-09-25 16:07:19 -07003622/* warn if the arguments differ by more than 12.5% */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003623static void warn_if_differ_considerably(struct drbd_device *device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003624 const char *s, sector_t a, sector_t b)
3625{
3626 sector_t d;
3627 if (a == 0 || b == 0)
3628 return;
3629 d = (a > b) ? (a - b) : (b - a);
3630 if (d > (a>>3) || d > (b>>3))
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003631 drbd_warn(device, "Considerable difference in %s: %llus vs. %llus\n", s,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003632 (unsigned long long)a, (unsigned long long)b);
3633}
3634
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003635static int receive_sizes(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003636{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003637 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003638 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003639 struct p_sizes *p = pi->data;
Philipp Reisnere96c9632013-06-25 16:50:07 +02003640 enum determine_dev_size dd = DS_UNCHANGED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003641 sector_t p_size, p_usize, my_usize;
3642 int ldsc = 0; /* local disk size changed */
Philipp Reisnere89b5912010-03-24 17:11:33 +01003643 enum dds_flags ddsf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003644
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003645 peer_device = conn_peer_device(connection, pi->vnr);
3646 if (!peer_device)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003647 return config_unknown_volume(connection, pi);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003648 device = peer_device->device;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003649
Philipp Reisnerb411b362009-09-25 16:07:19 -07003650 p_size = be64_to_cpu(p->d_size);
3651 p_usize = be64_to_cpu(p->u_size);
3652
Philipp Reisnerb411b362009-09-25 16:07:19 -07003653 /* just store the peer's disk size for now.
3654 * we still need to figure out whether we accept that. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003655 device->p_size = p_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003656
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003657 if (get_ldev(device)) {
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003658 rcu_read_lock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003659 my_usize = rcu_dereference(device->ldev->disk_conf)->disk_size;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003660 rcu_read_unlock();
3661
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003662 warn_if_differ_considerably(device, "lower level device sizes",
3663 p_size, drbd_get_max_capacity(device->ldev));
3664 warn_if_differ_considerably(device, "user requested size",
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003665 p_usize, my_usize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003666
3667 /* if this is the first connect, or an otherwise expected
3668 * param exchange, choose the minimum */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003669 if (device->state.conn == C_WF_REPORT_PARAMS)
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003670 p_usize = min_not_zero(my_usize, p_usize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003671
3672 /* Never shrink a device with usable data during connect.
3673 But allow online shrinking if we are connected. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003674 if (drbd_new_dev_size(device, device->ldev, p_usize, 0) <
3675 drbd_get_capacity(device->this_bdev) &&
3676 device->state.disk >= D_OUTDATED &&
3677 device->state.conn < C_CONNECTED) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003678 drbd_err(device, "The peer's disk size is too small!\n");
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003679 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003680 put_ldev(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003681 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003682 }
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003683
3684 if (my_usize != p_usize) {
3685 struct disk_conf *old_disk_conf, *new_disk_conf = NULL;
3686
3687 new_disk_conf = kzalloc(sizeof(struct disk_conf), GFP_KERNEL);
3688 if (!new_disk_conf) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003689 drbd_err(device, "Allocation of new disk_conf failed\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003690 put_ldev(device);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003691 return -ENOMEM;
3692 }
3693
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003694 mutex_lock(&connection->resource->conf_update);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003695 old_disk_conf = device->ldev->disk_conf;
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003696 *new_disk_conf = *old_disk_conf;
3697 new_disk_conf->disk_size = p_usize;
3698
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003699 rcu_assign_pointer(device->ldev->disk_conf, new_disk_conf);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003700 mutex_unlock(&connection->resource->conf_update);
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003701 synchronize_rcu();
3702 kfree(old_disk_conf);
3703
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003704 drbd_info(device, "Peer sets u_size to %lu sectors\n",
Philipp Reisnerdaeda1c2011-05-03 15:00:55 +02003705 (unsigned long)my_usize);
3706 }
3707
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003708 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003709 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003710
Lars Ellenberg20c68fd2014-04-28 18:43:25 +02003711 device->peer_max_bio_size = be32_to_cpu(p->max_bio_size);
3712 drbd_reconsider_max_bio_size(device);
3713 /* Leave drbd_reconsider_max_bio_size() before drbd_determine_dev_size().
3714 In case we cleared the QUEUE_FLAG_DISCARD from our queue in
3715 drbd_reconsider_max_bio_size(), we can be sure that after
3716 drbd_determine_dev_size() no REQ_DISCARDs are in the queue. */
3717
Philipp Reisnere89b5912010-03-24 17:11:33 +01003718 ddsf = be16_to_cpu(p->dds_flags);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003719 if (get_ldev(device)) {
3720 dd = drbd_determine_dev_size(device, ddsf, NULL);
3721 put_ldev(device);
Philipp Reisnere96c9632013-06-25 16:50:07 +02003722 if (dd == DS_ERROR)
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003723 return -EIO;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003724 drbd_md_sync(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003725 } else {
3726 /* I am diskless, need to accept the peer's size. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003727 drbd_set_my_capacity(device, p_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003728 }
3729
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003730 if (get_ldev(device)) {
3731 if (device->ldev->known_size != drbd_get_capacity(device->ldev->backing_bdev)) {
3732 device->ldev->known_size = drbd_get_capacity(device->ldev->backing_bdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003733 ldsc = 1;
3734 }
3735
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003736 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003737 }
3738
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003739 if (device->state.conn > C_WF_REPORT_PARAMS) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003740 if (be64_to_cpu(p->c_size) !=
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003741 drbd_get_capacity(device->this_bdev) || ldsc) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003742 /* we have different sizes, probably peer
3743 * needs to know my new size... */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003744 drbd_send_sizes(peer_device, 0, ddsf);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003745 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003746 if (test_and_clear_bit(RESIZE_PENDING, &device->flags) ||
3747 (dd == DS_GREW && device->state.conn == C_CONNECTED)) {
3748 if (device->state.pdsk >= D_INCONSISTENT &&
3749 device->state.disk >= D_INCONSISTENT) {
Philipp Reisnere89b5912010-03-24 17:11:33 +01003750 if (ddsf & DDSF_NO_RESYNC)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003751 drbd_info(device, "Resync of new storage suppressed with --assume-clean\n");
Philipp Reisnere89b5912010-03-24 17:11:33 +01003752 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003753 resync_after_online_grow(device);
Philipp Reisnere89b5912010-03-24 17:11:33 +01003754 } else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003755 set_bit(RESYNC_AFTER_NEG, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003756 }
3757 }
3758
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003759 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003760}
3761
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003762static int receive_uuids(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003763{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003764 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003765 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003766 struct p_uuids *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003767 u64 *p_uuid;
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003768 int i, updated_uuids = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003769
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003770 peer_device = conn_peer_device(connection, pi->vnr);
3771 if (!peer_device)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003772 return config_unknown_volume(connection, pi);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003773 device = peer_device->device;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003774
Philipp Reisnerb411b362009-09-25 16:07:19 -07003775 p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO);
Jing Wang063eacf2012-10-25 15:00:56 +08003776 if (!p_uuid) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003777 drbd_err(device, "kmalloc of p_uuid failed\n");
Jing Wang063eacf2012-10-25 15:00:56 +08003778 return false;
3779 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003780
3781 for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++)
3782 p_uuid[i] = be64_to_cpu(p->uuid[i]);
3783
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003784 kfree(device->p_uuid);
3785 device->p_uuid = p_uuid;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003786
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003787 if (device->state.conn < C_CONNECTED &&
3788 device->state.disk < D_INCONSISTENT &&
3789 device->state.role == R_PRIMARY &&
3790 (device->ed_uuid & ~((u64)1)) != (p_uuid[UI_CURRENT] & ~((u64)1))) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003791 drbd_err(device, "Can only connect to data with current UUID=%016llX\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003792 (unsigned long long)device->ed_uuid);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003793 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003794 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003795 }
3796
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003797 if (get_ldev(device)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003798 int skip_initial_sync =
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003799 device->state.conn == C_CONNECTED &&
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003800 peer_device->connection->agreed_pro_version >= 90 &&
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003801 device->ldev->md.uuid[UI_CURRENT] == UUID_JUST_CREATED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07003802 (p_uuid[UI_FLAGS] & 8);
3803 if (skip_initial_sync) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003804 drbd_info(device, "Accepted new current UUID, preparing to skip initial sync\n");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003805 drbd_bitmap_io(device, &drbd_bmio_clear_n_write,
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01003806 "clear_n_write from receive_uuids",
3807 BM_LOCKED_TEST_ALLOWED);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003808 _drbd_uuid_set(device, UI_CURRENT, p_uuid[UI_CURRENT]);
3809 _drbd_uuid_set(device, UI_BITMAP, 0);
3810 _drbd_set_state(_NS2(device, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
Philipp Reisnerb411b362009-09-25 16:07:19 -07003811 CS_VERBOSE, NULL);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003812 drbd_md_sync(device);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003813 updated_uuids = 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003814 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003815 put_ldev(device);
3816 } else if (device->state.disk < D_INCONSISTENT &&
3817 device->state.role == R_PRIMARY) {
Philipp Reisner18a50fa2010-06-21 14:14:15 +02003818 /* I am a diskless primary, the peer just created a new current UUID
3819 for me. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003820 updated_uuids = drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003821 }
3822
3823 /* Before we test for the disk state, we should wait until an eventually
3824 ongoing cluster wide state change is finished. That is important if
3825 we are primary and are detaching from our disk. We need to see the
3826 new disk state... */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003827 mutex_lock(device->state_mutex);
3828 mutex_unlock(device->state_mutex);
3829 if (device->state.conn >= C_CONNECTED && device->state.disk < D_INCONSISTENT)
3830 updated_uuids |= drbd_set_ed_uuid(device, p_uuid[UI_CURRENT]);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003831
3832 if (updated_uuids)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003833 drbd_print_uuids(device, "receiver updated UUIDs to");
Philipp Reisnerb411b362009-09-25 16:07:19 -07003834
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003835 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003836}
3837
3838/**
3839 * convert_state() - Converts the peer's view of the cluster state to our point of view
3840 * @ps: The state as seen by the peer.
3841 */
3842static union drbd_state convert_state(union drbd_state ps)
3843{
3844 union drbd_state ms;
3845
3846 static enum drbd_conns c_tab[] = {
Philipp Reisner369bea62011-07-06 23:04:44 +02003847 [C_WF_REPORT_PARAMS] = C_WF_REPORT_PARAMS,
Philipp Reisnerb411b362009-09-25 16:07:19 -07003848 [C_CONNECTED] = C_CONNECTED,
3849
3850 [C_STARTING_SYNC_S] = C_STARTING_SYNC_T,
3851 [C_STARTING_SYNC_T] = C_STARTING_SYNC_S,
3852 [C_DISCONNECTING] = C_TEAR_DOWN, /* C_NETWORK_FAILURE, */
3853 [C_VERIFY_S] = C_VERIFY_T,
3854 [C_MASK] = C_MASK,
3855 };
3856
3857 ms.i = ps.i;
3858
3859 ms.conn = c_tab[ps.conn];
3860 ms.peer = ps.role;
3861 ms.role = ps.peer;
3862 ms.pdsk = ps.disk;
3863 ms.disk = ps.pdsk;
3864 ms.peer_isp = (ps.aftr_isp | ps.user_isp);
3865
3866 return ms;
3867}
3868
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003869static int receive_req_state(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003870{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003871 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003872 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003873 struct p_req_state *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003874 union drbd_state mask, val;
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +01003875 enum drbd_state_rv rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003876
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003877 peer_device = conn_peer_device(connection, pi->vnr);
3878 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003879 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003880 device = peer_device->device;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003881
Philipp Reisnerb411b362009-09-25 16:07:19 -07003882 mask.i = be32_to_cpu(p->mask);
3883 val.i = be32_to_cpu(p->val);
3884
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003885 if (test_bit(RESOLVE_CONFLICTS, &peer_device->connection->flags) &&
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003886 mutex_is_locked(device->state_mutex)) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003887 drbd_send_sr_reply(peer_device, SS_CONCURRENT_ST_CHG);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003888 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003889 }
3890
3891 mask = convert_state(mask);
3892 val = convert_state(val);
3893
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003894 rv = drbd_change_state(device, CS_VERBOSE, mask, val);
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02003895 drbd_send_sr_reply(peer_device, rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003896
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003897 drbd_md_sync(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003898
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003899 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003900}
3901
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003902static int receive_req_conn_state(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003903{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003904 struct p_req_state *p = pi->data;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003905 union drbd_state mask, val;
3906 enum drbd_state_rv rv;
3907
3908 mask.i = be32_to_cpu(p->mask);
3909 val.i = be32_to_cpu(p->val);
3910
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003911 if (test_bit(RESOLVE_CONFLICTS, &connection->flags) &&
3912 mutex_is_locked(&connection->cstate_mutex)) {
3913 conn_send_sr_reply(connection, SS_CONCURRENT_ST_CHG);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003914 return 0;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003915 }
3916
3917 mask = convert_state(mask);
3918 val = convert_state(val);
3919
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003920 rv = conn_request_state(connection, mask, val, CS_VERBOSE | CS_LOCAL_ONLY | CS_IGN_OUTD_FAIL);
3921 conn_send_sr_reply(connection, rv);
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003922
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003923 return 0;
Philipp Reisnerdfafcc82011-03-16 10:55:07 +01003924}
3925
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003926static int receive_state(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003927{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003928 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003929 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02003930 struct p_state *p = pi->data;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02003931 union drbd_state os, ns, peer_state;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003932 enum drbd_disk_state real_peer_disk;
Philipp Reisner65d922c2010-06-16 16:18:09 +02003933 enum chg_state_flags cs_flags;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003934 int rv;
3935
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003936 peer_device = conn_peer_device(connection, pi->vnr);
3937 if (!peer_device)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02003938 return config_unknown_volume(connection, pi);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02003939 device = peer_device->device;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01003940
Philipp Reisnerb411b362009-09-25 16:07:19 -07003941 peer_state.i = be32_to_cpu(p->state);
3942
3943 real_peer_disk = peer_state.disk;
3944 if (peer_state.disk == D_NEGOTIATING) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003945 real_peer_disk = device->p_uuid[UI_FLAGS] & 4 ? D_INCONSISTENT : D_CONSISTENT;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02003946 drbd_info(device, "real peer disk state = %s\n", drbd_disk_str(real_peer_disk));
Philipp Reisnerb411b362009-09-25 16:07:19 -07003947 }
3948
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003949 spin_lock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003950 retry:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003951 os = ns = drbd_read_state(device);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02003952 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003953
Lars Ellenberg545752d2011-12-05 14:39:25 +01003954 /* If some other part of the code (asender thread, timeout)
3955 * already decided to close the connection again,
3956 * we must not "re-establish" it here. */
3957 if (os.conn <= C_TEAR_DOWN)
Lars Ellenberg58ffa582012-07-26 14:09:49 +02003958 return -ECONNRESET;
Lars Ellenberg545752d2011-12-05 14:39:25 +01003959
Lars Ellenberg40424e42011-09-26 15:24:56 +02003960 /* If this is the "end of sync" confirmation, usually the peer disk
3961 * transitions from D_INCONSISTENT to D_UP_TO_DATE. For empty (0 bits
3962 * set) resync started in PausedSyncT, or if the timing of pause-/
3963 * unpause-sync events has been "just right", the peer disk may
3964 * transition from D_CONSISTENT to D_UP_TO_DATE as well.
3965 */
3966 if ((os.pdsk == D_INCONSISTENT || os.pdsk == D_CONSISTENT) &&
3967 real_peer_disk == D_UP_TO_DATE &&
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003968 os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) {
3969 /* If we are (becoming) SyncSource, but peer is still in sync
3970 * preparation, ignore its uptodate-ness to avoid flapping, it
3971 * will change to inconsistent once the peer reaches active
3972 * syncing states.
3973 * It may have changed syncer-paused flags, however, so we
3974 * cannot ignore this completely. */
3975 if (peer_state.conn > C_CONNECTED &&
3976 peer_state.conn < C_SYNC_SOURCE)
3977 real_peer_disk = D_INCONSISTENT;
3978
3979 /* if peer_state changes to connected at the same time,
3980 * it explicitly notifies us that it finished resync.
3981 * Maybe we should finish it up, too? */
3982 else if (os.conn >= C_SYNC_SOURCE &&
3983 peer_state.conn == C_CONNECTED) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003984 if (drbd_bm_total_weight(device) <= device->rs_failed)
3985 drbd_resync_finished(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01003986 return 0;
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003987 }
3988 }
3989
Lars Ellenberg02b91b52012-06-28 18:26:52 +02003990 /* explicit verify finished notification, stop sector reached. */
3991 if (os.conn == C_VERIFY_T && os.disk == D_UP_TO_DATE &&
3992 peer_state.conn == C_CONNECTED && real_peer_disk == D_UP_TO_DATE) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02003993 ov_out_of_sync_print(device);
3994 drbd_resync_finished(device);
Lars Ellenberg58ffa582012-07-26 14:09:49 +02003995 return 0;
Lars Ellenberg02b91b52012-06-28 18:26:52 +02003996 }
3997
Lars Ellenberge9ef7bb2010-10-07 15:55:39 +02003998 /* peer says his disk is inconsistent, while we think it is uptodate,
3999 * and this happens while the peer still thinks we have a sync going on,
4000 * but we think we are already done with the sync.
4001 * We ignore this to avoid flapping pdsk.
4002 * This should not happen, if the peer is a recent version of drbd. */
4003 if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT &&
4004 os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE)
4005 real_peer_disk = D_UP_TO_DATE;
4006
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004007 if (ns.conn == C_WF_REPORT_PARAMS)
4008 ns.conn = C_CONNECTED;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004009
Philipp Reisner67531712010-10-27 12:21:30 +02004010 if (peer_state.conn == C_AHEAD)
4011 ns.conn = C_BEHIND;
4012
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004013 if (device->p_uuid && peer_state.disk >= D_NEGOTIATING &&
4014 get_ldev_if_state(device, D_NEGOTIATING)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004015 int cr; /* consider resync */
4016
4017 /* if we established a new connection */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004018 cr = (os.conn < C_CONNECTED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004019 /* if we had an established connection
4020 * and one of the nodes newly attaches a disk */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004021 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07004022 (peer_state.disk == D_NEGOTIATING ||
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004023 os.disk == D_NEGOTIATING));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004024 /* if we have both been inconsistent, and the peer has been
4025 * forced to be UpToDate with --overwrite-data */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004026 cr |= test_bit(CONSIDER_RESYNC, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004027 /* if we had been plain connected, and the admin requested to
4028 * start a sync by "invalidate" or "invalidate-remote" */
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004029 cr |= (os.conn == C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07004030 (peer_state.conn >= C_STARTING_SYNC_S &&
4031 peer_state.conn <= C_WF_BITMAP_T));
4032
4033 if (cr)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004034 ns.conn = drbd_sync_handshake(peer_device, peer_state.role, real_peer_disk);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004035
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004036 put_ldev(device);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004037 if (ns.conn == C_MASK) {
4038 ns.conn = C_CONNECTED;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004039 if (device->state.disk == D_NEGOTIATING) {
4040 drbd_force_state(device, NS(disk, D_FAILED));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004041 } else if (peer_state.disk == D_NEGOTIATING) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004042 drbd_err(device, "Disk attach process on the peer node was aborted.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004043 peer_state.disk = D_DISKLESS;
Lars Ellenberg580b9762010-02-26 23:15:23 +01004044 real_peer_disk = D_DISKLESS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004045 } else {
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004046 if (test_and_clear_bit(CONN_DRY_RUN, &peer_device->connection->flags))
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004047 return -EIO;
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02004048 D_ASSERT(device, os.conn == C_WF_REPORT_PARAMS);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004049 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004050 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004051 }
4052 }
4053 }
4054
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004055 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004056 if (os.i != drbd_read_state(device).i)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004057 goto retry;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004058 clear_bit(CONSIDER_RESYNC, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004059 ns.peer = peer_state.role;
4060 ns.pdsk = real_peer_disk;
4061 ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp);
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004062 if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004063 ns.disk = device->new_state_tmp.disk;
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004064 cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004065 if (ns.pdsk == D_CONSISTENT && drbd_suspended(device) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED &&
4066 test_bit(NEW_CUR_UUID, &device->flags)) {
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01004067 /* Do not allow tl_restart(RESEND) for a rebooted peer. We can only allow this
Philipp Reisner481c6f52010-06-22 14:03:27 +02004068 for temporal network outages! */
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004069 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004070 drbd_err(device, "Aborting Connect, can not thaw IO with an only Consistent peer\n");
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004071 tl_clear(peer_device->connection);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004072 drbd_uuid_new_current(device);
4073 clear_bit(NEW_CUR_UUID, &device->flags);
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004074 conn_request_state(peer_device->connection, NS2(conn, C_PROTOCOL_ERROR, susp, 0), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004075 return -EIO;
Philipp Reisner481c6f52010-06-22 14:03:27 +02004076 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004077 rv = _drbd_set_state(device, ns, cs_flags, NULL);
4078 ns = drbd_read_state(device);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004079 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004080
4081 if (rv < SS_SUCCESS) {
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004082 conn_request_state(peer_device->connection, NS(conn, C_DISCONNECTING), CS_HARD);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004083 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004084 }
4085
Lars Ellenberg4ac4aad2010-07-22 17:39:26 +02004086 if (os.conn > C_WF_REPORT_PARAMS) {
4087 if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07004088 peer_state.disk != D_NEGOTIATING ) {
4089 /* we want resync, peer has not yet decided to sync... */
4090 /* Nowadays only used when forcing a node into primary role and
4091 setting its disk to UpToDate with that */
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004092 drbd_send_uuids(peer_device);
4093 drbd_send_current_state(peer_device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004094 }
4095 }
4096
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004097 clear_bit(DISCARD_MY_DATA, &device->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004098
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004099 drbd_md_sync(device); /* update connected indicator, la_size_sect, ... */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004100
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004101 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004102}
4103
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004104static int receive_sync_uuid(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004105{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004106 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004107 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004108 struct p_rs_uuid *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004109
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004110 peer_device = conn_peer_device(connection, pi->vnr);
4111 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004112 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004113 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004114
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004115 wait_event(device->misc_wait,
4116 device->state.conn == C_WF_SYNC_UUID ||
4117 device->state.conn == C_BEHIND ||
4118 device->state.conn < C_CONNECTED ||
4119 device->state.disk < D_NEGOTIATING);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004120
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02004121 /* D_ASSERT(device, device->state.conn == C_WF_SYNC_UUID ); */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004122
Philipp Reisnerb411b362009-09-25 16:07:19 -07004123 /* Here the _drbd_uuid_ functions are right, current should
4124 _not_ be rotated into the history */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004125 if (get_ldev_if_state(device, D_NEGOTIATING)) {
4126 _drbd_uuid_set(device, UI_CURRENT, be64_to_cpu(p->uuid));
4127 _drbd_uuid_set(device, UI_BITMAP, 0UL);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004128
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004129 drbd_print_uuids(device, "updated sync uuid");
4130 drbd_start_resync(device, C_SYNC_TARGET);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004131
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004132 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004133 } else
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004134 drbd_err(device, "Ignoring SyncUUID packet!\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004135
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004136 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004137}
4138
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004139/**
4140 * receive_bitmap_plain
4141 *
4142 * Return 0 when done, 1 when another iteration is needed, and a negative error
4143 * code upon failure.
4144 */
4145static int
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004146receive_bitmap_plain(struct drbd_peer_device *peer_device, unsigned int size,
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004147 unsigned long *p, struct bm_xfer_ctx *c)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004148{
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004149 unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE -
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004150 drbd_header_size(peer_device->connection);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004151 unsigned int num_words = min_t(size_t, data_size / sizeof(*p),
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004152 c->bm_words - c->word_offset);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004153 unsigned int want = num_words * sizeof(*p);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004154 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004155
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004156 if (want != size) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004157 drbd_err(peer_device, "%s:want (%u) != size (%u)\n", __func__, want, size);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004158 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004159 }
4160 if (want == 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004161 return 0;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004162 err = drbd_recv_all(peer_device->connection, p, want);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004163 if (err)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004164 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004165
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004166 drbd_bm_merge_lel(peer_device->device, c->word_offset, num_words, p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004167
4168 c->word_offset += num_words;
4169 c->bit_offset = c->word_offset * BITS_PER_LONG;
4170 if (c->bit_offset > c->bm_bits)
4171 c->bit_offset = c->bm_bits;
4172
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004173 return 1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004174}
4175
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004176static enum drbd_bitmap_code dcbp_get_code(struct p_compressed_bm *p)
4177{
4178 return (enum drbd_bitmap_code)(p->encoding & 0x0f);
4179}
4180
4181static int dcbp_get_start(struct p_compressed_bm *p)
4182{
4183 return (p->encoding & 0x80) != 0;
4184}
4185
4186static int dcbp_get_pad_bits(struct p_compressed_bm *p)
4187{
4188 return (p->encoding >> 4) & 0x7;
4189}
4190
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004191/**
4192 * recv_bm_rle_bits
4193 *
4194 * Return 0 when done, 1 when another iteration is needed, and a negative error
4195 * code upon failure.
4196 */
4197static int
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004198recv_bm_rle_bits(struct drbd_peer_device *peer_device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07004199 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01004200 struct bm_xfer_ctx *c,
4201 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004202{
4203 struct bitstream bs;
4204 u64 look_ahead;
4205 u64 rl;
4206 u64 tmp;
4207 unsigned long s = c->bit_offset;
4208 unsigned long e;
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004209 int toggle = dcbp_get_start(p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004210 int have;
4211 int bits;
4212
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004213 bitstream_init(&bs, p->code, len, dcbp_get_pad_bits(p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004214
4215 bits = bitstream_get_bits(&bs, &look_ahead, 64);
4216 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004217 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004218
4219 for (have = bits; have > 0; s += rl, toggle = !toggle) {
4220 bits = vli_decode_bits(&rl, look_ahead);
4221 if (bits <= 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004222 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004223
4224 if (toggle) {
4225 e = s + rl -1;
4226 if (e >= c->bm_bits) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004227 drbd_err(peer_device, "bitmap overflow (e:%lu) while decoding bm RLE packet\n", e);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004228 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004229 }
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004230 _drbd_bm_set_bits(peer_device->device, s, e);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004231 }
4232
4233 if (have < bits) {
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004234 drbd_err(peer_device, "bitmap decoding error: h:%d b:%d la:0x%08llx l:%u/%u\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07004235 have, bits, look_ahead,
4236 (unsigned int)(bs.cur.b - p->code),
4237 (unsigned int)bs.buf_len);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004238 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004239 }
Lars Ellenbergd2da5b02013-10-23 10:59:18 +02004240 /* if we consumed all 64 bits, assign 0; >> 64 is "undefined"; */
4241 if (likely(bits < 64))
4242 look_ahead >>= bits;
4243 else
4244 look_ahead = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004245 have -= bits;
4246
4247 bits = bitstream_get_bits(&bs, &tmp, 64 - have);
4248 if (bits < 0)
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004249 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004250 look_ahead |= tmp << have;
4251 have += bits;
4252 }
4253
4254 c->bit_offset = s;
4255 bm_xfer_ctx_bit_to_word_offset(c);
4256
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004257 return (s != c->bm_bits);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004258}
4259
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004260/**
4261 * decode_bitmap_c
4262 *
4263 * Return 0 when done, 1 when another iteration is needed, and a negative error
4264 * code upon failure.
4265 */
4266static int
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004267decode_bitmap_c(struct drbd_peer_device *peer_device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07004268 struct p_compressed_bm *p,
Philipp Reisnerc6d25cf2011-01-19 16:13:06 +01004269 struct bm_xfer_ctx *c,
4270 unsigned int len)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004271{
Andreas Gruenbachera02d1242011-03-22 17:20:45 +01004272 if (dcbp_get_code(p) == RLE_VLI_Bits)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004273 return recv_bm_rle_bits(peer_device, p, c, len - sizeof(*p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004274
4275 /* other variants had been implemented for evaluation,
4276 * but have been dropped as this one turned out to be "best"
4277 * during all our tests. */
4278
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004279 drbd_err(peer_device, "receive_bitmap_c: unknown encoding %u\n", p->encoding);
4280 conn_request_state(peer_device->connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004281 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004282}
4283
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004284void INFO_bm_xfer_stats(struct drbd_device *device,
Philipp Reisnerb411b362009-09-25 16:07:19 -07004285 const char *direction, struct bm_xfer_ctx *c)
4286{
4287 /* what would it take to transfer it "plaintext" */
Andreas Gruenbachera6b32bc2011-05-31 14:33:49 +02004288 unsigned int header_size = drbd_header_size(first_peer_device(device)->connection);
Andreas Gruenbacher50d0b1a2011-03-30 11:53:51 +02004289 unsigned int data_size = DRBD_SOCKET_BUFFER_SIZE - header_size;
4290 unsigned int plain =
4291 header_size * (DIV_ROUND_UP(c->bm_words, data_size) + 1) +
4292 c->bm_words * sizeof(unsigned long);
4293 unsigned int total = c->bytes[0] + c->bytes[1];
4294 unsigned int r;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004295
4296 /* total can not be zero. but just in case: */
4297 if (total == 0)
4298 return;
4299
4300 /* don't report if not compressed */
4301 if (total >= plain)
4302 return;
4303
4304 /* total < plain. check for overflow, still */
4305 r = (total > UINT_MAX/1000) ? (total / (plain/1000))
4306 : (1000 * total / plain);
4307
4308 if (r > 1000)
4309 r = 1000;
4310
4311 r = 1000 - r;
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004312 drbd_info(device, "%s bitmap stats [Bytes(packets)]: plain %u(%u), RLE %u(%u), "
Philipp Reisnerb411b362009-09-25 16:07:19 -07004313 "total %u; compression: %u.%u%%\n",
4314 direction,
4315 c->bytes[1], c->packets[1],
4316 c->bytes[0], c->packets[0],
4317 total, r/10, r % 10);
4318}
4319
4320/* Since we are processing the bitfield from lower addresses to higher,
4321 it does not matter if the process it in 32 bit chunks or 64 bit
4322 chunks as long as it is little endian. (Understand it as byte stream,
4323 beginning with the lowest byte...) If we would use big endian
4324 we would need to process it from the highest address to the lowest,
4325 in order to be agnostic to the 32 vs 64 bits issue.
4326
4327 returns 0 on failure, 1 if we successfully received it. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004328static int receive_bitmap(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004329{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004330 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004331 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004332 struct bm_xfer_ctx c;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004333 int err;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004334
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004335 peer_device = conn_peer_device(connection, pi->vnr);
4336 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004337 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004338 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004339
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004340 drbd_bm_lock(device, "receive bitmap", BM_LOCKED_SET_ALLOWED);
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004341 /* you are supposed to send additional out-of-sync information
4342 * if you actually set bits during this phase */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004343
Philipp Reisnerb411b362009-09-25 16:07:19 -07004344 c = (struct bm_xfer_ctx) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004345 .bm_bits = drbd_bm_bits(device),
4346 .bm_words = drbd_bm_words(device),
Philipp Reisnerb411b362009-09-25 16:07:19 -07004347 };
4348
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004349 for(;;) {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004350 if (pi->cmd == P_BITMAP)
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004351 err = receive_bitmap_plain(peer_device, pi->size, pi->data, &c);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004352 else if (pi->cmd == P_COMPRESSED_BITMAP) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004353 /* MAYBE: sanity check that we speak proto >= 90,
4354 * and the feature is enabled! */
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004355 struct p_compressed_bm *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004356
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004357 if (pi->size > DRBD_SOCKET_BUFFER_SIZE - drbd_header_size(connection)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004358 drbd_err(device, "ReportCBitmap packet too large\n");
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004359 err = -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004360 goto out;
4361 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004362 if (pi->size <= sizeof(*p)) {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004363 drbd_err(device, "ReportCBitmap packet too small (l:%u)\n", pi->size);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004364 err = -EIO;
Andreas Gruenbacher78fcbda2010-12-10 22:18:27 +01004365 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004366 }
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004367 err = drbd_recv_all(peer_device->connection, p, pi->size);
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004368 if (err)
4369 goto out;
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004370 err = decode_bitmap_c(peer_device, p, &c, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004371 } else {
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004372 drbd_warn(device, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", pi->cmd);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004373 err = -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004374 goto out;
4375 }
4376
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004377 c.packets[pi->cmd == P_BITMAP]++;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004378 c.bytes[pi->cmd == P_BITMAP] += drbd_header_size(connection) + pi->size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004379
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004380 if (err <= 0) {
4381 if (err < 0)
4382 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004383 break;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004384 }
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004385 err = drbd_recv_header(peer_device->connection, pi);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004386 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004387 goto out;
Andreas Gruenbacher2c464072010-12-11 21:53:12 +01004388 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004389
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004390 INFO_bm_xfer_stats(device, "receive", &c);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004391
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004392 if (device->state.conn == C_WF_BITMAP_T) {
Andreas Gruenbacherde1f8e42010-12-10 21:04:00 +01004393 enum drbd_state_rv rv;
4394
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004395 err = drbd_send_bitmap(device);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004396 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004397 goto out;
4398 /* Omit CS_ORDERED with this state transition to avoid deadlocks. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004399 rv = _drbd_request_state(device, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02004400 D_ASSERT(device, rv == SS_SUCCESS);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004401 } else if (device->state.conn != C_WF_BITMAP_S) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004402 /* admin may have requested C_DISCONNECTING,
4403 * other threads may have noticed network errors */
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004404 drbd_info(device, "unexpected cstate (%s) in receive_bitmap\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004405 drbd_conn_str(device->state.conn));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004406 }
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004407 err = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004408
Philipp Reisnerb411b362009-09-25 16:07:19 -07004409 out:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004410 drbd_bm_unlock(device);
4411 if (!err && device->state.conn == C_WF_BITMAP_S)
4412 drbd_start_resync(device, C_SYNC_SOURCE);
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004413 return err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004414}
4415
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004416static int receive_skip(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004417{
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004418 drbd_warn(connection, "skipping unknown optional packet type %d, l: %d!\n",
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004419 pi->cmd, pi->size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004420
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004421 return ignore_remaining_packet(connection, pi);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004422}
4423
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004424static int receive_UnplugRemote(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004425{
Philipp Reisnerb411b362009-09-25 16:07:19 -07004426 /* Make sure we've acked all the TCP data associated
4427 * with the data requests being unplugged */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004428 drbd_tcp_quickack(connection->data.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004429
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004430 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004431}
4432
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004433static int receive_out_of_sync(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisner73a01a12010-10-27 14:33:00 +02004434{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004435 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004436 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004437 struct p_block_desc *p = pi->data;
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004438
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004439 peer_device = conn_peer_device(connection, pi->vnr);
4440 if (!peer_device)
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004441 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02004442 device = peer_device->device;
Philipp Reisner73a01a12010-10-27 14:33:00 +02004443
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004444 switch (device->state.conn) {
Lars Ellenbergf735e3632010-12-17 21:06:18 +01004445 case C_WF_SYNC_UUID:
4446 case C_WF_BITMAP_T:
4447 case C_BEHIND:
4448 break;
4449 default:
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004450 drbd_err(device, "ASSERT FAILED cstate = %s, expected: WFSyncUUID|WFBitMapT|Behind\n",
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004451 drbd_conn_str(device->state.conn));
Lars Ellenbergf735e3632010-12-17 21:06:18 +01004452 }
4453
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004454 drbd_set_out_of_sync(device, be64_to_cpu(p->sector), be32_to_cpu(p->blksize));
Philipp Reisner73a01a12010-10-27 14:33:00 +02004455
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004456 return 0;
Philipp Reisner73a01a12010-10-27 14:33:00 +02004457}
4458
Philipp Reisner02918be2010-08-20 14:35:10 +02004459struct data_cmd {
4460 int expect_payload;
4461 size_t pkt_size;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004462 int (*fn)(struct drbd_connection *, struct packet_info *);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004463};
4464
Philipp Reisner02918be2010-08-20 14:35:10 +02004465static struct data_cmd drbd_cmd_handler[] = {
4466 [P_DATA] = { 1, sizeof(struct p_data), receive_Data },
4467 [P_DATA_REPLY] = { 1, sizeof(struct p_data), receive_DataReply },
4468 [P_RS_DATA_REPLY] = { 1, sizeof(struct p_data), receive_RSDataReply } ,
4469 [P_BARRIER] = { 0, sizeof(struct p_barrier), receive_Barrier } ,
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004470 [P_BITMAP] = { 1, 0, receive_bitmap } ,
4471 [P_COMPRESSED_BITMAP] = { 1, 0, receive_bitmap } ,
4472 [P_UNPLUG_REMOTE] = { 0, 0, receive_UnplugRemote },
Philipp Reisner02918be2010-08-20 14:35:10 +02004473 [P_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
4474 [P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004475 [P_SYNC_PARAM] = { 1, 0, receive_SyncParam },
4476 [P_SYNC_PARAM89] = { 1, 0, receive_SyncParam },
Philipp Reisner02918be2010-08-20 14:35:10 +02004477 [P_PROTOCOL] = { 1, sizeof(struct p_protocol), receive_protocol },
4478 [P_UUIDS] = { 0, sizeof(struct p_uuids), receive_uuids },
4479 [P_SIZES] = { 0, sizeof(struct p_sizes), receive_sizes },
4480 [P_STATE] = { 0, sizeof(struct p_state), receive_state },
4481 [P_STATE_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_state },
4482 [P_SYNC_UUID] = { 0, sizeof(struct p_rs_uuid), receive_sync_uuid },
4483 [P_OV_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest },
4484 [P_OV_REPLY] = { 1, sizeof(struct p_block_req), receive_DataRequest },
4485 [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest },
4486 [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip },
Philipp Reisner73a01a12010-10-27 14:33:00 +02004487 [P_OUT_OF_SYNC] = { 0, sizeof(struct p_block_desc), receive_out_of_sync },
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004488 [P_CONN_ST_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_conn_state },
Philipp Reisner036b17e2011-05-16 17:38:11 +02004489 [P_PROTOCOL_UPDATE] = { 1, sizeof(struct p_protocol), receive_protocol },
Lars Ellenberga0fb3c42014-04-28 18:43:23 +02004490 [P_TRIM] = { 0, sizeof(struct p_trim), receive_Data },
Philipp Reisner02918be2010-08-20 14:35:10 +02004491};
4492
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004493static void drbdd(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004494{
Philipp Reisner77351055b2011-02-07 17:24:26 +01004495 struct packet_info pi;
Philipp Reisner02918be2010-08-20 14:35:10 +02004496 size_t shs; /* sub header size */
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004497 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004498
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004499 while (get_t_state(&connection->receiver) == RUNNING) {
Andreas Gruenbacherdeebe192011-03-25 00:01:04 +01004500 struct data_cmd *cmd;
4501
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004502 drbd_thread_current_set_cpu(&connection->receiver);
4503 if (drbd_recv_header(connection, &pi))
Philipp Reisner02918be2010-08-20 14:35:10 +02004504 goto err_out;
4505
Andreas Gruenbacherdeebe192011-03-25 00:01:04 +01004506 cmd = &drbd_cmd_handler[pi.cmd];
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004507 if (unlikely(pi.cmd >= ARRAY_SIZE(drbd_cmd_handler) || !cmd->fn)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004508 drbd_err(connection, "Unexpected data packet %s (0x%04x)",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004509 cmdname(pi.cmd), pi.cmd);
Philipp Reisner02918be2010-08-20 14:35:10 +02004510 goto err_out;
Lars Ellenberg0b33a912009-11-16 15:58:04 +01004511 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004512
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004513 shs = cmd->pkt_size;
4514 if (pi.size > shs && !cmd->expect_payload) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004515 drbd_err(connection, "No payload expected %s l:%d\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004516 cmdname(pi.cmd), pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02004517 goto err_out;
4518 }
4519
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02004520 if (shs) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004521 err = drbd_recv_all_warn(connection, pi.data, shs);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004522 if (err)
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02004523 goto err_out;
Andreas Gruenbachere2857212011-03-25 00:57:38 +01004524 pi.size -= shs;
Lars Ellenbergc13f7e12010-10-29 23:32:01 +02004525 }
4526
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004527 err = cmd->fn(connection, &pi);
Andreas Gruenbacher4a76b162011-03-25 02:43:51 +01004528 if (err) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004529 drbd_err(connection, "error receiving %s, e: %d l: %d!\n",
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004530 cmdname(pi.cmd), err, pi.size);
Philipp Reisner02918be2010-08-20 14:35:10 +02004531 goto err_out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004532 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004533 }
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004534 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004535
Andreas Gruenbacher82bc0192011-03-17 12:10:19 +01004536 err_out:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004537 conn_request_state(connection, NS(conn, C_PROTOCOL_ERROR), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004538}
4539
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004540static void conn_disconnect(struct drbd_connection *connection)
Philipp Reisnerf70b35112010-06-24 14:34:40 +02004541{
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02004542 struct drbd_peer_device *peer_device;
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004543 enum drbd_conns oc;
Philipp Reisner376694a2011-11-07 10:54:28 +01004544 int vnr;
Philipp Reisnerf70b35112010-06-24 14:34:40 +02004545
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004546 if (connection->cstate == C_STANDALONE)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004547 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004548
Lars Ellenberg545752d2011-12-05 14:39:25 +01004549 /* We are about to start the cleanup after connection loss.
4550 * Make sure drbd_make_request knows about that.
4551 * Usually we should be in some network failure state already,
4552 * but just in case we are not, we fix it up here.
4553 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004554 conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
Lars Ellenberg545752d2011-12-05 14:39:25 +01004555
Philipp Reisnerb411b362009-09-25 16:07:19 -07004556 /* asender does not clean up anything. it must not interfere, either */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004557 drbd_thread_stop(&connection->asender);
4558 drbd_free_sock(connection);
Philipp Reisner360cc742011-02-08 14:29:53 +01004559
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02004560 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02004561 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
4562 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004563 kref_get(&device->kref);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02004564 rcu_read_unlock();
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004565 drbd_disconnected(peer_device);
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02004566 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02004567 rcu_read_lock();
4568 }
4569 rcu_read_unlock();
4570
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004571 if (!list_empty(&connection->current_epoch->list))
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004572 drbd_err(connection, "ASSERTION FAILED: connection->current_epoch->list not empty\n");
Philipp Reisner12038a32011-11-09 19:18:00 +01004573 /* ok, no more ee's on the fly, it is safe to reset the epoch_size */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004574 atomic_set(&connection->current_epoch->epoch_size, 0);
4575 connection->send.seen_any_write_yet = false;
Philipp Reisner12038a32011-11-09 19:18:00 +01004576
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004577 drbd_info(connection, "Connection closed\n");
Philipp Reisner360cc742011-02-08 14:29:53 +01004578
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004579 if (conn_highest_role(connection) == R_PRIMARY && conn_highest_pdsk(connection) >= D_UNKNOWN)
4580 conn_try_outdate_peer_async(connection);
Philipp Reisnercb703452011-03-24 11:03:07 +01004581
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004582 spin_lock_irq(&connection->resource->req_lock);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004583 oc = connection->cstate;
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004584 if (oc >= C_UNCONNECTED)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004585 _conn_request_state(connection, NS(conn, C_UNCONNECTED), CS_VERBOSE);
Philipp Reisnerbbeb6412011-02-10 13:45:46 +01004586
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004587 spin_unlock_irq(&connection->resource->req_lock);
Philipp Reisner360cc742011-02-08 14:29:53 +01004588
Lars Ellenbergf3dfa402011-05-02 10:45:05 +02004589 if (oc == C_DISCONNECTING)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004590 conn_request_state(connection, NS(conn, C_STANDALONE), CS_VERBOSE | CS_HARD);
Philipp Reisner360cc742011-02-08 14:29:53 +01004591}
4592
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004593static int drbd_disconnected(struct drbd_peer_device *peer_device)
Philipp Reisner360cc742011-02-08 14:29:53 +01004594{
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004595 struct drbd_device *device = peer_device->device;
Philipp Reisner360cc742011-02-08 14:29:53 +01004596 unsigned int i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004597
Philipp Reisner85719572010-07-21 10:20:17 +02004598 /* wait for current activity to cease. */
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004599 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004600 _drbd_wait_ee_list_empty(device, &device->active_ee);
4601 _drbd_wait_ee_list_empty(device, &device->sync_ee);
4602 _drbd_wait_ee_list_empty(device, &device->read_ee);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02004603 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004604
4605 /* We do not have data structures that would allow us to
4606 * get the rs_pending_cnt down to 0 again.
4607 * * On C_SYNC_TARGET we do not have any data structures describing
4608 * the pending RSDataRequest's we have sent.
4609 * * On C_SYNC_SOURCE there is no data structure that tracks
4610 * the P_RS_DATA_REPLY blocks that we sent to the SyncTarget.
4611 * And no, it is not the sum of the reference counts in the
4612 * resync_LRU. The resync_LRU tracks the whole operation including
4613 * the disk-IO, while the rs_pending_cnt only tracks the blocks
4614 * on the fly. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004615 drbd_rs_cancel_all(device);
4616 device->rs_total = 0;
4617 device->rs_failed = 0;
4618 atomic_set(&device->rs_pending_cnt, 0);
4619 wake_up(&device->misc_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004620
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004621 del_timer_sync(&device->resync_timer);
4622 resync_timer_fn((unsigned long)device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004623
Philipp Reisnerb411b362009-09-25 16:07:19 -07004624 /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier,
4625 * w_make_resync_request etc. which may still be on the worker queue
4626 * to be "canceled" */
Andreas Gruenbacherb5043c52011-07-28 15:56:02 +02004627 drbd_flush_workqueue(&peer_device->connection->sender_work);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004628
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004629 drbd_finish_peer_reqs(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004630
Philipp Reisnerd10b4ea2011-11-30 23:25:36 +01004631 /* This second workqueue flush is necessary, since drbd_finish_peer_reqs()
4632 might have issued a work again. The one before drbd_finish_peer_reqs() is
4633 necessary to reclain net_ee in drbd_finish_peer_reqs(). */
Andreas Gruenbacherb5043c52011-07-28 15:56:02 +02004634 drbd_flush_workqueue(&peer_device->connection->sender_work);
Philipp Reisnerd10b4ea2011-11-30 23:25:36 +01004635
Lars Ellenberg08332d72012-08-17 15:09:13 +02004636 /* need to do it again, drbd_finish_peer_reqs() may have populated it
4637 * again via drbd_try_clear_on_disk_bm(). */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004638 drbd_rs_cancel_all(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004639
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004640 kfree(device->p_uuid);
4641 device->p_uuid = NULL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004642
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004643 if (!drbd_suspended(device))
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02004644 tl_clear(peer_device->connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004645
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004646 drbd_md_sync(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004647
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004648 /* serialize with bitmap writeout triggered by the state change,
4649 * if any. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004650 wait_event(device->misc_wait, !test_bit(BITMAP_IO, &device->flags));
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004651
Philipp Reisnerb411b362009-09-25 16:07:19 -07004652 /* tcp_close and release of sendpage pages can be deferred. I don't
4653 * want to use SO_LINGER, because apparently it can be deferred for
4654 * more than 20 seconds (longest time I checked).
4655 *
4656 * Actually we don't care for exactly when the network stack does its
4657 * put_page(), but release our reference on these pages right here.
4658 */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004659 i = drbd_free_peer_reqs(device, &device->net_ee);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004660 if (i)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004661 drbd_info(device, "net_ee not empty, killed %u entries\n", i);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004662 i = atomic_read(&device->pp_in_use_by_net);
Lars Ellenberg435f0742010-09-06 12:30:25 +02004663 if (i)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004664 drbd_info(device, "pp_in_use_by_net = %d, expected 0\n", i);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02004665 i = atomic_read(&device->pp_in_use);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004666 if (i)
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02004667 drbd_info(device, "pp_in_use = %d, expected 0\n", i);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004668
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02004669 D_ASSERT(device, list_empty(&device->read_ee));
4670 D_ASSERT(device, list_empty(&device->active_ee));
4671 D_ASSERT(device, list_empty(&device->sync_ee));
4672 D_ASSERT(device, list_empty(&device->done_ee));
Philipp Reisnerb411b362009-09-25 16:07:19 -07004673
Philipp Reisner360cc742011-02-08 14:29:53 +01004674 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004675}
4676
4677/*
4678 * We support PRO_VERSION_MIN to PRO_VERSION_MAX. The protocol version
4679 * we can agree on is stored in agreed_pro_version.
4680 *
4681 * feature flags and the reserved array should be enough room for future
4682 * enhancements of the handshake protocol, and possible plugins...
4683 *
4684 * for now, they are expected to be zero, but ignored.
4685 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004686static int drbd_send_features(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004687{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004688 struct drbd_socket *sock;
4689 struct p_connection_features *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004690
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004691 sock = &connection->data;
4692 p = conn_prepare_command(connection, sock);
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004693 if (!p)
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004694 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004695 memset(p, 0, sizeof(*p));
4696 p->protocol_min = cpu_to_be32(PRO_VERSION_MIN);
4697 p->protocol_max = cpu_to_be32(PRO_VERSION_MAX);
Lars Ellenberg20c68fd2014-04-28 18:43:25 +02004698 p->feature_flags = cpu_to_be32(PRO_FEATURES);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004699 return conn_send_command(connection, sock, P_CONNECTION_FEATURES, sizeof(*p), NULL, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004700}
4701
4702/*
4703 * return values:
4704 * 1 yes, we have a valid connection
4705 * 0 oops, did not work out, please try again
4706 * -1 peer talks different language,
4707 * no point in trying again, please go standalone.
4708 */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004709static int drbd_do_features(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004710{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004711 /* ASSERT current == connection->receiver ... */
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004712 struct p_connection_features *p;
4713 const int expect = sizeof(struct p_connection_features);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004714 struct packet_info pi;
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004715 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004716
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004717 err = drbd_send_features(connection);
Andreas Gruenbachere8d17b02011-03-16 00:54:19 +01004718 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004719 return 0;
4720
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004721 err = drbd_recv_header(connection, &pi);
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004722 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004723 return 0;
4724
Andreas Gruenbacher60381782011-03-28 17:05:50 +02004725 if (pi.cmd != P_CONNECTION_FEATURES) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004726 drbd_err(connection, "expected ConnectionFeatures packet, received: %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004727 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004728 return -1;
4729 }
4730
Philipp Reisner77351055b2011-02-07 17:24:26 +01004731 if (pi.size != expect) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004732 drbd_err(connection, "expected ConnectionFeatures length: %u, received: %u\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01004733 expect, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004734 return -1;
4735 }
4736
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004737 p = pi.data;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004738 err = drbd_recv_all_warn(connection, p, expect);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004739 if (err)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004740 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004741
Philipp Reisnerb411b362009-09-25 16:07:19 -07004742 p->protocol_min = be32_to_cpu(p->protocol_min);
4743 p->protocol_max = be32_to_cpu(p->protocol_max);
4744 if (p->protocol_max == 0)
4745 p->protocol_max = p->protocol_min;
4746
4747 if (PRO_VERSION_MAX < p->protocol_min ||
4748 PRO_VERSION_MIN > p->protocol_max)
4749 goto incompat;
4750
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004751 connection->agreed_pro_version = min_t(int, PRO_VERSION_MAX, p->protocol_max);
Lars Ellenberg20c68fd2014-04-28 18:43:25 +02004752 connection->agreed_features = PRO_FEATURES & be32_to_cpu(p->feature_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004753
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004754 drbd_info(connection, "Handshake successful: "
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004755 "Agreed network protocol version %d\n", connection->agreed_pro_version);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004756
Lars Ellenberg20c68fd2014-04-28 18:43:25 +02004757 drbd_info(connection, "Agreed to%ssupport TRIM on protocol level\n",
4758 connection->agreed_features & FF_TRIM ? " " : " not ");
4759
Philipp Reisnerb411b362009-09-25 16:07:19 -07004760 return 1;
4761
4762 incompat:
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004763 drbd_err(connection, "incompatible DRBD dialects: "
Philipp Reisnerb411b362009-09-25 16:07:19 -07004764 "I support %d-%d, peer supports %d-%d\n",
4765 PRO_VERSION_MIN, PRO_VERSION_MAX,
4766 p->protocol_min, p->protocol_max);
4767 return -1;
4768}
4769
4770#if !defined(CONFIG_CRYPTO_HMAC) && !defined(CONFIG_CRYPTO_HMAC_MODULE)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004771static int drbd_do_auth(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004772{
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004773 drbd_err(connection, "This kernel was build without CONFIG_CRYPTO_HMAC.\n");
4774 drbd_err(connection, "You need to disable 'cram-hmac-alg' in drbd.conf.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004775 return -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004776}
4777#else
4778#define CHALLENGE_LEN 64
Johannes Thomab10d96c2010-01-07 16:02:50 +01004779
4780/* Return value:
4781 1 - auth succeeded,
4782 0 - failed, try again (network error),
4783 -1 - auth failed, don't try again.
4784*/
4785
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004786static int drbd_do_auth(struct drbd_connection *connection)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004787{
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004788 struct drbd_socket *sock;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004789 char my_challenge[CHALLENGE_LEN]; /* 64 Bytes... */
4790 struct scatterlist sg;
4791 char *response = NULL;
4792 char *right_response = NULL;
4793 char *peers_ch = NULL;
Philipp Reisner44ed1672011-04-19 17:10:19 +02004794 unsigned int key_len;
4795 char secret[SHARED_SECRET_MAX]; /* 64 byte */
Philipp Reisnerb411b362009-09-25 16:07:19 -07004796 unsigned int resp_size;
4797 struct hash_desc desc;
Philipp Reisner77351055b2011-02-07 17:24:26 +01004798 struct packet_info pi;
Philipp Reisner44ed1672011-04-19 17:10:19 +02004799 struct net_conf *nc;
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004800 int err, rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004801
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004802 /* FIXME: Put the challenge/response into the preallocated socket buffer. */
4803
Philipp Reisner44ed1672011-04-19 17:10:19 +02004804 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004805 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +02004806 key_len = strlen(nc->shared_secret);
4807 memcpy(secret, nc->shared_secret, key_len);
4808 rcu_read_unlock();
4809
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004810 desc.tfm = connection->cram_hmac_tfm;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004811 desc.flags = 0;
4812
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004813 rv = crypto_hash_setkey(connection->cram_hmac_tfm, (u8 *)secret, key_len);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004814 if (rv) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004815 drbd_err(connection, "crypto_hash_setkey() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004816 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004817 goto fail;
4818 }
4819
4820 get_random_bytes(my_challenge, CHALLENGE_LEN);
4821
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004822 sock = &connection->data;
4823 if (!conn_prepare_command(connection, sock)) {
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004824 rv = 0;
4825 goto fail;
4826 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004827 rv = !conn_send_command(connection, sock, P_AUTH_CHALLENGE, 0,
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004828 my_challenge, CHALLENGE_LEN);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004829 if (!rv)
4830 goto fail;
4831
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004832 err = drbd_recv_header(connection, &pi);
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004833 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004834 rv = 0;
4835 goto fail;
4836 }
4837
Philipp Reisner77351055b2011-02-07 17:24:26 +01004838 if (pi.cmd != P_AUTH_CHALLENGE) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004839 drbd_err(connection, "expected AuthChallenge packet, received: %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004840 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004841 rv = 0;
4842 goto fail;
4843 }
4844
Philipp Reisner77351055b2011-02-07 17:24:26 +01004845 if (pi.size > CHALLENGE_LEN * 2) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004846 drbd_err(connection, "expected AuthChallenge payload too big.\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004847 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004848 goto fail;
4849 }
4850
Philipp Reisner67cca282014-04-28 18:43:30 +02004851 if (pi.size < CHALLENGE_LEN) {
4852 drbd_err(connection, "AuthChallenge payload too small.\n");
4853 rv = -1;
4854 goto fail;
4855 }
4856
Philipp Reisner77351055b2011-02-07 17:24:26 +01004857 peers_ch = kmalloc(pi.size, GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004858 if (peers_ch == NULL) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004859 drbd_err(connection, "kmalloc of peers_ch failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004860 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004861 goto fail;
4862 }
4863
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004864 err = drbd_recv_all_warn(connection, peers_ch, pi.size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004865 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004866 rv = 0;
4867 goto fail;
4868 }
4869
Philipp Reisner67cca282014-04-28 18:43:30 +02004870 if (!memcmp(my_challenge, peers_ch, CHALLENGE_LEN)) {
4871 drbd_err(connection, "Peer presented the same challenge!\n");
4872 rv = -1;
4873 goto fail;
4874 }
4875
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004876 resp_size = crypto_hash_digestsize(connection->cram_hmac_tfm);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004877 response = kmalloc(resp_size, GFP_NOIO);
4878 if (response == NULL) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004879 drbd_err(connection, "kmalloc of response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004880 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004881 goto fail;
4882 }
4883
4884 sg_init_table(&sg, 1);
Philipp Reisner77351055b2011-02-07 17:24:26 +01004885 sg_set_buf(&sg, peers_ch, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004886
4887 rv = crypto_hash_digest(&desc, &sg, sg.length, response);
4888 if (rv) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004889 drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004890 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004891 goto fail;
4892 }
4893
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004894 if (!conn_prepare_command(connection, sock)) {
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004895 rv = 0;
4896 goto fail;
4897 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004898 rv = !conn_send_command(connection, sock, P_AUTH_RESPONSE, 0,
Andreas Gruenbacher9f5bdc32011-03-28 14:23:08 +02004899 response, resp_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004900 if (!rv)
4901 goto fail;
4902
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004903 err = drbd_recv_header(connection, &pi);
Andreas Gruenbacher69bc7bc2011-03-16 17:31:52 +01004904 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004905 rv = 0;
4906 goto fail;
4907 }
4908
Philipp Reisner77351055b2011-02-07 17:24:26 +01004909 if (pi.cmd != P_AUTH_RESPONSE) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004910 drbd_err(connection, "expected AuthResponse packet, received: %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02004911 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004912 rv = 0;
4913 goto fail;
4914 }
4915
Philipp Reisner77351055b2011-02-07 17:24:26 +01004916 if (pi.size != resp_size) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004917 drbd_err(connection, "expected AuthResponse payload of wrong size\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004918 rv = 0;
4919 goto fail;
4920 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004921
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004922 err = drbd_recv_all_warn(connection, response , resp_size);
Andreas Gruenbachera5c31902011-03-24 03:28:04 +01004923 if (err) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004924 rv = 0;
4925 goto fail;
4926 }
4927
4928 right_response = kmalloc(resp_size, GFP_NOIO);
Julia Lawall2d1ee872009-12-27 22:27:11 +01004929 if (right_response == NULL) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004930 drbd_err(connection, "kmalloc of right_response failed\n");
Johannes Thomab10d96c2010-01-07 16:02:50 +01004931 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004932 goto fail;
4933 }
4934
4935 sg_set_buf(&sg, my_challenge, CHALLENGE_LEN);
4936
4937 rv = crypto_hash_digest(&desc, &sg, sg.length, right_response);
4938 if (rv) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004939 drbd_err(connection, "crypto_hash_digest() failed with %d\n", rv);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004940 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004941 goto fail;
4942 }
4943
4944 rv = !memcmp(response, right_response, resp_size);
4945
4946 if (rv)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004947 drbd_info(connection, "Peer authenticated using %d bytes HMAC\n",
Philipp Reisner44ed1672011-04-19 17:10:19 +02004948 resp_size);
Johannes Thomab10d96c2010-01-07 16:02:50 +01004949 else
4950 rv = -1;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004951
4952 fail:
4953 kfree(peers_ch);
4954 kfree(response);
4955 kfree(right_response);
4956
4957 return rv;
4958}
4959#endif
4960
Andreas Gruenbacher8fe60552011-07-22 11:04:36 +02004961int drbd_receiver(struct drbd_thread *thi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004962{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004963 struct drbd_connection *connection = thi->connection;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004964 int h;
4965
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004966 drbd_info(connection, "receiver (re)started\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004967
4968 do {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004969 h = conn_connect(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004970 if (h == 0) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004971 conn_disconnect(connection);
Philipp Reisner20ee6392011-01-18 15:28:59 +01004972 schedule_timeout_interruptible(HZ);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004973 }
4974 if (h == -1) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004975 drbd_warn(connection, "Discarding network configuration.\n");
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004976 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004977 }
4978 } while (h == 0);
4979
Philipp Reisner91fd4da2011-04-20 17:47:29 +02004980 if (h > 0)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004981 drbdd(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004982
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004983 conn_disconnect(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004984
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02004985 drbd_info(connection, "receiver terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07004986 return 0;
4987}
4988
4989/* ********* acknowledge sender ******** */
4990
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004991static int got_conn_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004992{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02004993 struct p_req_state_reply *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004994 int retcode = be32_to_cpu(p->retcode);
4995
4996 if (retcode >= SS_SUCCESS) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004997 set_bit(CONN_WD_ST_CHG_OKAY, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004998 } else {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02004999 set_bit(CONN_WD_ST_CHG_FAIL, &connection->flags);
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005000 drbd_err(connection, "Requested state change failed by peer: %s (%d)\n",
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01005001 drbd_set_st_err_str(retcode), retcode);
5002 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005003 wake_up(&connection->ping_wait);
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01005004
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005005 return 0;
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01005006}
5007
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005008static int got_RqSReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005009{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005010 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005011 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005012 struct p_req_state_reply *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005013 int retcode = be32_to_cpu(p->retcode);
5014
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005015 peer_device = conn_peer_device(connection, pi->vnr);
5016 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005017 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005018 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005019
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005020 if (test_bit(CONN_WD_ST_CHG_REQ, &connection->flags)) {
Andreas Gruenbacher0b0ba1e2011-06-27 16:23:33 +02005021 D_ASSERT(device, connection->agreed_pro_version < 100);
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005022 return got_conn_RqSReply(connection, pi);
Philipp Reisner4d0fc3f2012-01-20 13:52:27 +01005023 }
5024
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01005025 if (retcode >= SS_SUCCESS) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005026 set_bit(CL_ST_CHG_SUCCESS, &device->flags);
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01005027 } else {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005028 set_bit(CL_ST_CHG_FAIL, &device->flags);
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02005029 drbd_err(device, "Requested state change failed by peer: %s (%d)\n",
Philipp Reisnere4f78ed2011-03-16 11:27:48 +01005030 drbd_set_st_err_str(retcode), retcode);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005031 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005032 wake_up(&device->state_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005033
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005034 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005035}
5036
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005037static int got_Ping(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005038{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005039 return drbd_send_ping_ack(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005040
5041}
5042
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005043static int got_PingAck(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005044{
5045 /* restore idle timeout */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005046 connection->meta.socket->sk->sk_rcvtimeo = connection->net_conf->ping_int*HZ;
5047 if (!test_and_set_bit(GOT_PING_ACK, &connection->flags))
5048 wake_up(&connection->ping_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005049
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005050 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005051}
5052
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005053static int got_IsInSync(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005054{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005055 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005056 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005057 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005058 sector_t sector = be64_to_cpu(p->sector);
5059 int blksize = be32_to_cpu(p->blksize);
5060
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005061 peer_device = conn_peer_device(connection, pi->vnr);
5062 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005063 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005064 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005065
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005066 D_ASSERT(device, peer_device->connection->agreed_pro_version >= 89);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005067
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005068 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005069
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005070 if (get_ldev(device)) {
5071 drbd_rs_complete_io(device, sector);
5072 drbd_set_in_sync(device, sector, blksize);
Lars Ellenberg1d53f092010-09-05 01:13:24 +02005073 /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005074 device->rs_same_csum += (blksize >> BM_BLOCK_SHIFT);
5075 put_ldev(device);
Lars Ellenberg1d53f092010-09-05 01:13:24 +02005076 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005077 dec_rs_pending(device);
5078 atomic_add(blksize >> 9, &device->rs_sect_in);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005079
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005080 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005081}
5082
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01005083static int
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005084validate_req_change_req_state(struct drbd_device *device, u64 id, sector_t sector,
Andreas Gruenbacherbc9c5c42011-01-21 18:00:55 +01005085 struct rb_root *root, const char *func,
5086 enum drbd_req_event what, bool missing_ok)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005087{
5088 struct drbd_request *req;
5089 struct bio_and_error m;
5090
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005091 spin_lock_irq(&device->resource->req_lock);
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005092 req = find_request(device, root, id, sector, missing_ok, func);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005093 if (unlikely(!req)) {
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005094 spin_unlock_irq(&device->resource->req_lock);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005095 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005096 }
5097 __req_mod(req, what, &m);
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005098 spin_unlock_irq(&device->resource->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005099
5100 if (m.bio)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005101 complete_master_bio(device, &m);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005102 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005103}
5104
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005105static int got_BlockAck(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005106{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005107 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005108 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005109 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005110 sector_t sector = be64_to_cpu(p->sector);
5111 int blksize = be32_to_cpu(p->blksize);
5112 enum drbd_req_event what;
5113
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005114 peer_device = conn_peer_device(connection, pi->vnr);
5115 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005116 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005117 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005118
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005119 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005120
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01005121 if (p->block_id == ID_SYNCER) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005122 drbd_set_in_sync(device, sector, blksize);
5123 dec_rs_pending(device);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005124 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005125 }
Andreas Gruenbachere05e1e52011-03-25 15:16:26 +01005126 switch (pi->cmd) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07005127 case P_RS_WRITE_ACK:
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01005128 what = WRITE_ACKED_BY_PEER_AND_SIS;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005129 break;
5130 case P_WRITE_ACK:
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01005131 what = WRITE_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005132 break;
5133 case P_RECV_ACK:
Andreas Gruenbacher8554df12011-01-25 15:37:43 +01005134 what = RECV_ACKED_BY_PEER;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005135 break;
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02005136 case P_SUPERSEDED:
5137 what = CONFLICT_RESOLVED;
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01005138 break;
5139 case P_RETRY_WRITE:
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01005140 what = POSTPONE_WRITE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005141 break;
5142 default:
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005143 BUG();
Philipp Reisnerb411b362009-09-25 16:07:19 -07005144 }
5145
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005146 return validate_req_change_req_state(device, p->block_id, sector,
5147 &device->write_requests, __func__,
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005148 what, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005149}
5150
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005151static int got_NegAck(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005152{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005153 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005154 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005155 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005156 sector_t sector = be64_to_cpu(p->sector);
Philipp Reisner2deb8332011-01-17 18:39:18 +01005157 int size = be32_to_cpu(p->blksize);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005158 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005159
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005160 peer_device = conn_peer_device(connection, pi->vnr);
5161 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005162 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005163 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005164
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005165 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005166
Andreas Gruenbacher579b57e2011-01-13 18:40:57 +01005167 if (p->block_id == ID_SYNCER) {
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005168 dec_rs_pending(device);
5169 drbd_rs_failed_io(device, sector, size);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005170 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005171 }
Philipp Reisner2deb8332011-01-17 18:39:18 +01005172
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005173 err = validate_req_change_req_state(device, p->block_id, sector,
5174 &device->write_requests, __func__,
Philipp Reisner303d1442011-04-13 16:24:47 -07005175 NEG_ACKED, true);
Andreas Gruenbacher85997672011-04-04 13:09:15 +02005176 if (err) {
Andreas Gruenbacherc3afd8f2011-01-20 22:25:40 +01005177 /* Protocol A has no P_WRITE_ACKs, but has P_NEG_ACKs.
5178 The master bio might already be completed, therefore the
5179 request is no longer in the collision hash. */
5180 /* In Protocol B we might already have got a P_RECV_ACK
5181 but then get a P_NEG_ACK afterwards. */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005182 drbd_set_out_of_sync(device, sector, size);
Philipp Reisner2deb8332011-01-17 18:39:18 +01005183 }
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005184 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005185}
5186
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005187static int got_NegDReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005188{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005189 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005190 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005191 struct p_block_ack *p = pi->data;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005192 sector_t sector = be64_to_cpu(p->sector);
5193
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005194 peer_device = conn_peer_device(connection, pi->vnr);
5195 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005196 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005197 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005198
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005199 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Andreas Gruenbacher7be8da02011-02-22 02:15:32 +01005200
Andreas Gruenbacherd0180172011-07-03 17:53:52 +02005201 drbd_err(device, "Got NegDReply; Sector %llus, len %u.\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07005202 (unsigned long long)sector, be32_to_cpu(p->blksize));
5203
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005204 return validate_req_change_req_state(device, p->block_id, sector,
5205 &device->read_requests, __func__,
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005206 NEG_ACKED, false);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005207}
5208
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005209static int got_NegRSDReply(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005210{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005211 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005212 struct drbd_device *device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005213 sector_t sector;
5214 int size;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005215 struct p_block_ack *p = pi->data;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005216
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005217 peer_device = conn_peer_device(connection, pi->vnr);
5218 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005219 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005220 device = peer_device->device;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005221
5222 sector = be64_to_cpu(p->sector);
5223 size = be32_to_cpu(p->blksize);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005224
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005225 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005226
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005227 dec_rs_pending(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005228
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005229 if (get_ldev_if_state(device, D_FAILED)) {
5230 drbd_rs_complete_io(device, sector);
Andreas Gruenbachere05e1e52011-03-25 15:16:26 +01005231 switch (pi->cmd) {
Philipp Reisnerd612d302010-12-27 10:53:28 +01005232 case P_NEG_RS_DREPLY:
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005233 drbd_rs_failed_io(device, sector, size);
Philipp Reisnerd612d302010-12-27 10:53:28 +01005234 case P_RS_CANCEL:
5235 break;
5236 default:
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005237 BUG();
Philipp Reisnerd612d302010-12-27 10:53:28 +01005238 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005239 put_ldev(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005240 }
5241
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005242 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005243}
5244
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005245static int got_BarrierAck(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005246{
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005247 struct p_barrier_ack *p = pi->data;
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005248 struct drbd_peer_device *peer_device;
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005249 int vnr;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005250
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005251 tl_release(connection, p->barrier, be32_to_cpu(p->set_size));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005252
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005253 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005254 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5255 struct drbd_device *device = peer_device->device;
5256
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005257 if (device->state.conn == C_AHEAD &&
5258 atomic_read(&device->ap_in_flight) == 0 &&
5259 !test_and_set_bit(AHEAD_TO_SYNC_SOURCE, &device->flags)) {
5260 device->start_resync_timer.expires = jiffies + HZ;
5261 add_timer(&device->start_resync_timer);
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005262 }
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02005263 }
Lars Ellenberg9ed57dc2012-03-26 20:55:17 +02005264 rcu_read_unlock();
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02005265
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005266 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005267}
5268
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005269static int got_OVResult(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisnerb411b362009-09-25 16:07:19 -07005270{
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005271 struct drbd_peer_device *peer_device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005272 struct drbd_device *device;
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005273 struct p_block_ack *p = pi->data;
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02005274 struct drbd_device_work *dw;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005275 sector_t sector;
5276 int size;
5277
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005278 peer_device = conn_peer_device(connection, pi->vnr);
5279 if (!peer_device)
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005280 return -EIO;
Andreas Gruenbacher9f4fe9a2011-08-09 03:54:55 +02005281 device = peer_device->device;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005282
Philipp Reisnerb411b362009-09-25 16:07:19 -07005283 sector = be64_to_cpu(p->sector);
5284 size = be32_to_cpu(p->blksize);
5285
Andreas Gruenbacher69a22772011-08-09 00:47:13 +02005286 update_peer_seq(peer_device, be32_to_cpu(p->seq_num));
Philipp Reisnerb411b362009-09-25 16:07:19 -07005287
5288 if (be64_to_cpu(p->block_id) == ID_OUT_OF_SYNC)
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005289 drbd_ov_out_of_sync_found(device, sector, size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005290 else
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005291 ov_out_of_sync_print(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005292
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005293 if (!get_ldev(device))
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005294 return 0;
Lars Ellenberg1d53f092010-09-05 01:13:24 +02005295
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005296 drbd_rs_complete_io(device, sector);
5297 dec_rs_pending(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005298
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005299 --device->ov_left;
Lars Ellenbergea5442a2010-11-05 09:48:01 +01005300
5301 /* let's advance progress step marks only for every other megabyte */
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005302 if ((device->ov_left & 0x200) == 0x200)
5303 drbd_advance_rs_marks(device, device->ov_left);
Lars Ellenbergea5442a2010-11-05 09:48:01 +01005304
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005305 if (device->ov_left == 0) {
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02005306 dw = kmalloc(sizeof(*dw), GFP_NOIO);
5307 if (dw) {
5308 dw->w.cb = w_ov_finished;
5309 dw->device = device;
5310 drbd_queue_work(&peer_device->connection->sender_work, &dw->w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005311 } else {
Andreas Gruenbacher84b8c062011-07-28 15:27:51 +02005312 drbd_err(device, "kmalloc(dw) failed.");
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005313 ov_out_of_sync_print(device);
5314 drbd_resync_finished(device);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005315 }
5316 }
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005317 put_ldev(device);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005318 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005319}
5320
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005321static int got_skip(struct drbd_connection *connection, struct packet_info *pi)
Philipp Reisner0ced55a2010-04-30 15:26:20 +02005322{
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005323 return 0;
Philipp Reisner0ced55a2010-04-30 15:26:20 +02005324}
5325
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005326static int connection_finish_peer_reqs(struct drbd_connection *connection)
Philipp Reisner32862ec2011-02-08 16:41:01 +01005327{
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005328 struct drbd_peer_device *peer_device;
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005329 int vnr, not_empty = 0;
Philipp Reisner32862ec2011-02-08 16:41:01 +01005330
5331 do {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005332 clear_bit(SIGNAL_ASENDER, &connection->flags);
Philipp Reisner32862ec2011-02-08 16:41:01 +01005333 flush_signals(current);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005334
5335 rcu_read_lock();
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005336 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5337 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005338 kref_get(&device->kref);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005339 rcu_read_unlock();
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005340 if (drbd_finish_peer_reqs(device)) {
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02005341 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005342 return 1;
Philipp Reisnerd3fcb492011-04-13 14:46:05 -07005343 }
Andreas Gruenbacher05a10ec2011-06-07 22:54:17 +02005344 kref_put(&device->kref, drbd_destroy_device);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005345 rcu_read_lock();
Philipp Reisner082a3432011-03-15 16:05:42 +01005346 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005347 set_bit(SIGNAL_ASENDER, &connection->flags);
Philipp Reisner082a3432011-03-15 16:05:42 +01005348
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005349 spin_lock_irq(&connection->resource->req_lock);
Andreas Gruenbacherc06ece62011-06-21 17:23:59 +02005350 idr_for_each_entry(&connection->peer_devices, peer_device, vnr) {
5351 struct drbd_device *device = peer_device->device;
Andreas Gruenbacherb30ab792011-07-03 13:26:43 +02005352 not_empty = !list_empty(&device->done_ee);
Philipp Reisner082a3432011-03-15 16:05:42 +01005353 if (not_empty)
5354 break;
5355 }
Andreas Gruenbacher05008132011-07-07 14:19:42 +02005356 spin_unlock_irq(&connection->resource->req_lock);
Philipp Reisnerc141ebd2011-05-05 16:13:10 +02005357 rcu_read_unlock();
Philipp Reisner32862ec2011-02-08 16:41:01 +01005358 } while (not_empty);
5359
5360 return 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005361}
5362
5363struct asender_cmd {
5364 size_t pkt_size;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005365 int (*fn)(struct drbd_connection *connection, struct packet_info *);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005366};
5367
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01005368static struct asender_cmd asender_tbl[] = {
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005369 [P_PING] = { 0, got_Ping },
5370 [P_PING_ACK] = { 0, got_PingAck },
Philipp Reisnerb411b362009-09-25 16:07:19 -07005371 [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
5372 [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
5373 [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck },
Lars Ellenbergd4dabbe2012-08-01 12:33:51 +02005374 [P_SUPERSEDED] = { sizeof(struct p_block_ack), got_BlockAck },
Philipp Reisnerb411b362009-09-25 16:07:19 -07005375 [P_NEG_ACK] = { sizeof(struct p_block_ack), got_NegAck },
5376 [P_NEG_DREPLY] = { sizeof(struct p_block_ack), got_NegDReply },
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005377 [P_NEG_RS_DREPLY] = { sizeof(struct p_block_ack), got_NegRSDReply },
Philipp Reisnerb411b362009-09-25 16:07:19 -07005378 [P_OV_RESULT] = { sizeof(struct p_block_ack), got_OVResult },
5379 [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck },
5380 [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply },
5381 [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync },
Philipp Reisner02918be2010-08-20 14:35:10 +02005382 [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip },
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005383 [P_RS_CANCEL] = { sizeof(struct p_block_ack), got_NegRSDReply },
5384 [P_CONN_ST_CHG_REPLY]={ sizeof(struct p_req_state_reply), got_conn_RqSReply },
5385 [P_RETRY_WRITE] = { sizeof(struct p_block_ack), got_BlockAck },
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01005386};
Philipp Reisnerb411b362009-09-25 16:07:19 -07005387
5388int drbd_asender(struct drbd_thread *thi)
5389{
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005390 struct drbd_connection *connection = thi->connection;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005391 struct asender_cmd *cmd = NULL;
Philipp Reisner77351055b2011-02-07 17:24:26 +01005392 struct packet_info pi;
Philipp Reisner257d0af2011-01-26 12:15:29 +01005393 int rv;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005394 void *buf = connection->meta.rbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005395 int received = 0;
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005396 unsigned int header_size = drbd_header_size(connection);
Andreas Gruenbacher52b061a2011-03-30 11:38:49 +02005397 int expect = header_size;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005398 bool ping_timeout_active = false;
5399 struct net_conf *nc;
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005400 int ping_timeo, tcp_cork, ping_int;
Philipp Reisner3990e042013-03-27 14:08:48 +01005401 struct sched_param param = { .sched_priority = 2 };
Philipp Reisnerb411b362009-09-25 16:07:19 -07005402
Philipp Reisner3990e042013-03-27 14:08:48 +01005403 rv = sched_setscheduler(current, SCHED_RR, &param);
5404 if (rv < 0)
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005405 drbd_err(connection, "drbd_asender: ERROR set priority, ret=%d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005406
Andreas Gruenbachere77a0a52011-01-25 15:43:39 +01005407 while (get_t_state(thi) == RUNNING) {
Philipp Reisner80822282011-02-08 12:46:30 +01005408 drbd_thread_current_set_cpu(thi);
Philipp Reisner44ed1672011-04-19 17:10:19 +02005409
5410 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005411 nc = rcu_dereference(connection->net_conf);
Philipp Reisner44ed1672011-04-19 17:10:19 +02005412 ping_timeo = nc->ping_timeo;
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005413 tcp_cork = nc->tcp_cork;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005414 ping_int = nc->ping_int;
5415 rcu_read_unlock();
5416
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005417 if (test_and_clear_bit(SEND_PING, &connection->flags)) {
5418 if (drbd_send_ping(connection)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005419 drbd_err(connection, "drbd_send_ping has failed\n");
Andreas Gruenbacher841ce242010-12-15 19:31:20 +01005420 goto reconnect;
5421 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005422 connection->meta.socket->sk->sk_rcvtimeo = ping_timeo * HZ / 10;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005423 ping_timeout_active = true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005424 }
5425
Philipp Reisner32862ec2011-02-08 16:41:01 +01005426 /* TODO: conditionally cork; it may hurt latency if we cork without
5427 much to send */
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005428 if (tcp_cork)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005429 drbd_tcp_cork(connection->meta.socket);
5430 if (connection_finish_peer_reqs(connection)) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005431 drbd_err(connection, "connection_finish_peer_reqs() failed\n");
Philipp Reisner32862ec2011-02-08 16:41:01 +01005432 goto reconnect;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005433 }
5434 /* but unconditionally uncork unless disabled */
Andreas Gruenbacherbb77d342011-05-04 15:25:35 +02005435 if (tcp_cork)
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005436 drbd_tcp_uncork(connection->meta.socket);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005437
5438 /* short circuit, recv_msg would return EINTR anyways. */
5439 if (signal_pending(current))
5440 continue;
5441
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005442 rv = drbd_recv_short(connection->meta.socket, buf, expect-received, 0);
5443 clear_bit(SIGNAL_ASENDER, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005444
5445 flush_signals(current);
5446
5447 /* Note:
5448 * -EINTR (on meta) we got a signal
5449 * -EAGAIN (on meta) rcvtimeo expired
5450 * -ECONNRESET other side closed the connection
5451 * -ERESTARTSYS (on data) we got a signal
5452 * rv < 0 other than above: unexpected error!
5453 * rv == expected: full header or command
5454 * rv < expected: "woken" by signal during receive
5455 * rv == 0 : "connection shut down by peer"
5456 */
5457 if (likely(rv > 0)) {
5458 received += rv;
5459 buf += rv;
5460 } else if (rv == 0) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005461 if (test_bit(DISCONNECT_SENT, &connection->flags)) {
Philipp Reisnerb66623e2012-08-08 21:19:09 +02005462 long t;
5463 rcu_read_lock();
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005464 t = rcu_dereference(connection->net_conf)->ping_timeo * HZ/10;
Philipp Reisnerb66623e2012-08-08 21:19:09 +02005465 rcu_read_unlock();
5466
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005467 t = wait_event_timeout(connection->ping_wait,
5468 connection->cstate < C_WF_REPORT_PARAMS,
Philipp Reisnerb66623e2012-08-08 21:19:09 +02005469 t);
Philipp Reisner599377a2012-08-17 14:50:22 +02005470 if (t)
5471 break;
5472 }
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005473 drbd_err(connection, "meta connection shut down by peer.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005474 goto reconnect;
5475 } else if (rv == -EAGAIN) {
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02005476 /* If the data socket received something meanwhile,
5477 * that is good enough: peer is still alive. */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005478 if (time_after(connection->last_received,
5479 jiffies - connection->meta.socket->sk->sk_rcvtimeo))
Lars Ellenbergcb6518c2011-06-20 14:44:45 +02005480 continue;
Lars Ellenbergf36af182011-03-09 22:44:55 +01005481 if (ping_timeout_active) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005482 drbd_err(connection, "PingAck did not arrive in time.\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005483 goto reconnect;
5484 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005485 set_bit(SEND_PING, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005486 continue;
5487 } else if (rv == -EINTR) {
5488 continue;
5489 } else {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005490 drbd_err(connection, "sock_recvmsg returned %d\n", rv);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005491 goto reconnect;
5492 }
5493
5494 if (received == expect && cmd == NULL) {
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005495 if (decode_header(connection, connection->meta.rbuf, &pi))
Philipp Reisnerb411b362009-09-25 16:07:19 -07005496 goto reconnect;
Andreas Gruenbacher7201b972011-03-14 18:23:00 +01005497 cmd = &asender_tbl[pi.cmd];
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005498 if (pi.cmd >= ARRAY_SIZE(asender_tbl) || !cmd->fn) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005499 drbd_err(connection, "Unexpected meta packet %s (0x%04x)\n",
Andreas Gruenbacher2fcb8f32011-07-03 11:41:08 +02005500 cmdname(pi.cmd), pi.cmd);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005501 goto disconnect;
5502 }
Andreas Gruenbachere6589832011-03-30 12:54:42 +02005503 expect = header_size + cmd->pkt_size;
Andreas Gruenbacher52b061a2011-03-30 11:38:49 +02005504 if (pi.size != expect - header_size) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005505 drbd_err(connection, "Wrong packet size on meta (c: %d, l: %d)\n",
Philipp Reisner77351055b2011-02-07 17:24:26 +01005506 pi.cmd, pi.size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005507 goto reconnect;
Philipp Reisner257d0af2011-01-26 12:15:29 +01005508 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005509 }
5510 if (received == expect) {
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005511 bool err;
Philipp Reisnera4fbda82011-03-16 11:13:17 +01005512
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005513 err = cmd->fn(connection, &pi);
Andreas Gruenbacher2735a592011-04-04 15:30:24 +02005514 if (err) {
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005515 drbd_err(connection, "%pf failed\n", cmd->fn);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005516 goto reconnect;
Andreas Gruenbacher1952e912011-03-25 15:37:43 +01005517 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005518
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005519 connection->last_received = jiffies;
Lars Ellenbergf36af182011-03-09 22:44:55 +01005520
Philipp Reisner44ed1672011-04-19 17:10:19 +02005521 if (cmd == &asender_tbl[P_PING_ACK]) {
5522 /* restore idle timeout */
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005523 connection->meta.socket->sk->sk_rcvtimeo = ping_int * HZ;
Philipp Reisner44ed1672011-04-19 17:10:19 +02005524 ping_timeout_active = false;
5525 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07005526
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005527 buf = connection->meta.rbuf;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005528 received = 0;
Andreas Gruenbacher52b061a2011-03-30 11:38:49 +02005529 expect = header_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07005530 cmd = NULL;
5531 }
5532 }
5533
5534 if (0) {
5535reconnect:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005536 conn_request_state(connection, NS(conn, C_NETWORK_FAILURE), CS_HARD);
5537 conn_md_sync(connection);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005538 }
5539 if (0) {
5540disconnect:
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005541 conn_request_state(connection, NS(conn, C_DISCONNECTING), CS_HARD);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005542 }
Andreas Gruenbacherbde89a92011-05-30 16:32:41 +02005543 clear_bit(SIGNAL_ASENDER, &connection->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07005544
Andreas Gruenbacher1ec861e2011-07-06 11:01:44 +02005545 drbd_info(connection, "asender terminated\n");
Philipp Reisnerb411b362009-09-25 16:07:19 -07005546
5547 return 0;
5548}