blob: f71a667f5f321c42b06b263afc61f22a693409cc [file] [log] [blame]
Philipp Reisnerb411b362009-09-25 16:07:19 -07001/*
2 drbd.c
3
4 This file is part of DRBD by Philipp Reisner and Lars Ellenberg.
5
6 Copyright (C) 2001-2008, LINBIT Information Technologies GmbH.
7 Copyright (C) 1999-2008, Philipp Reisner <philipp.reisner@linbit.com>.
8 Copyright (C) 2002-2008, Lars Ellenberg <lars.ellenberg@linbit.com>.
9
10 Thanks to Carter Burden, Bart Grantham and Gennadiy Nerubayev
11 from Logicworks, Inc. for making SDP replication support possible.
12
13 drbd is free software; you can redistribute it and/or modify
14 it under the terms of the GNU General Public License as published by
15 the Free Software Foundation; either version 2, or (at your option)
16 any later version.
17
18 drbd is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with drbd; see the file COPYING. If not, write to
25 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
26
27 */
28
Philipp Reisnerb411b362009-09-25 16:07:19 -070029#include <linux/module.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070030#include <linux/drbd.h>
31#include <asm/uaccess.h>
32#include <asm/types.h>
33#include <net/sock.h>
34#include <linux/ctype.h>
Arnd Bergmann2a48fc02010-06-02 14:28:52 +020035#include <linux/mutex.h>
Philipp Reisnerb411b362009-09-25 16:07:19 -070036#include <linux/fs.h>
37#include <linux/file.h>
38#include <linux/proc_fs.h>
39#include <linux/init.h>
40#include <linux/mm.h>
41#include <linux/memcontrol.h>
42#include <linux/mm_inline.h>
43#include <linux/slab.h>
44#include <linux/random.h>
45#include <linux/reboot.h>
46#include <linux/notifier.h>
47#include <linux/kthread.h>
48
49#define __KERNEL_SYSCALLS__
50#include <linux/unistd.h>
51#include <linux/vmalloc.h>
52
53#include <linux/drbd_limits.h>
54#include "drbd_int.h"
Philipp Reisnerb411b362009-09-25 16:07:19 -070055#include "drbd_req.h" /* only for _req_mod in tl_release and tl_clear */
56
57#include "drbd_vli.h"
58
59struct after_state_chg_work {
60 struct drbd_work w;
61 union drbd_state os;
62 union drbd_state ns;
63 enum chg_state_flags flags;
64 struct completion *done;
65};
66
Arnd Bergmann2a48fc02010-06-02 14:28:52 +020067static DEFINE_MUTEX(drbd_main_mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -070068int drbdd_init(struct drbd_thread *);
69int drbd_worker(struct drbd_thread *);
70int drbd_asender(struct drbd_thread *);
71
72int drbd_init(void);
73static int drbd_open(struct block_device *bdev, fmode_t mode);
74static int drbd_release(struct gendisk *gd, fmode_t mode);
75static int w_after_state_ch(struct drbd_conf *mdev, struct drbd_work *w, int unused);
76static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
77 union drbd_state ns, enum chg_state_flags flags);
78static int w_md_sync(struct drbd_conf *mdev, struct drbd_work *w, int unused);
79static void md_sync_timer_fn(unsigned long data);
80static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused);
Lars Ellenberge9e6f3e2010-09-14 20:26:27 +020081static int w_go_diskless(struct drbd_conf *mdev, struct drbd_work *w, int unused);
Philipp Reisnerb411b362009-09-25 16:07:19 -070082
Philipp Reisnerb411b362009-09-25 16:07:19 -070083MODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, "
84 "Lars Ellenberg <lars@linbit.com>");
85MODULE_DESCRIPTION("drbd - Distributed Replicated Block Device v" REL_VERSION);
86MODULE_VERSION(REL_VERSION);
87MODULE_LICENSE("GPL");
Philipp Reisner2b8a90b2011-01-10 11:15:17 +010088MODULE_PARM_DESC(minor_count, "Maximum number of drbd devices ("
89 __stringify(DRBD_MINOR_COUNT_MIN) "-" __stringify(DRBD_MINOR_COUNT_MAX) ")");
Philipp Reisnerb411b362009-09-25 16:07:19 -070090MODULE_ALIAS_BLOCKDEV_MAJOR(DRBD_MAJOR);
91
92#include <linux/moduleparam.h>
93/* allow_open_on_secondary */
94MODULE_PARM_DESC(allow_oos, "DONT USE!");
95/* thanks to these macros, if compiled into the kernel (not-module),
96 * this becomes the boot parameter drbd.minor_count */
97module_param(minor_count, uint, 0444);
98module_param(disable_sendpage, bool, 0644);
99module_param(allow_oos, bool, 0);
100module_param(cn_idx, uint, 0444);
101module_param(proc_details, int, 0644);
102
103#ifdef CONFIG_DRBD_FAULT_INJECTION
104int enable_faults;
105int fault_rate;
106static int fault_count;
107int fault_devs;
108/* bitmap of enabled faults */
109module_param(enable_faults, int, 0664);
110/* fault rate % value - applies to all enabled faults */
111module_param(fault_rate, int, 0664);
112/* count of faults inserted */
113module_param(fault_count, int, 0664);
114/* bitmap of devices to insert faults on */
115module_param(fault_devs, int, 0644);
116#endif
117
118/* module parameter, defined */
Philipp Reisner2b8a90b2011-01-10 11:15:17 +0100119unsigned int minor_count = DRBD_MINOR_COUNT_DEF;
Rusty Russell90ab5ee2012-01-13 09:32:20 +1030120bool disable_sendpage;
121bool allow_oos;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700122unsigned int cn_idx = CN_IDX_DRBD;
123int proc_details; /* Detail level in proc drbd*/
124
125/* Module parameter for setting the user mode helper program
126 * to run. Default is /sbin/drbdadm */
127char usermode_helper[80] = "/sbin/drbdadm";
128
129module_param_string(usermode_helper, usermode_helper, sizeof(usermode_helper), 0644);
130
131/* in 2.6.x, our device mapping and config info contains our virtual gendisks
132 * as member "struct gendisk *vdisk;"
133 */
134struct drbd_conf **minor_table;
135
136struct kmem_cache *drbd_request_cache;
137struct kmem_cache *drbd_ee_cache; /* epoch entries */
138struct kmem_cache *drbd_bm_ext_cache; /* bitmap extents */
139struct kmem_cache *drbd_al_ext_cache; /* activity log extents */
140mempool_t *drbd_request_mempool;
141mempool_t *drbd_ee_mempool;
142
143/* I do not use a standard mempool, because:
144 1) I want to hand out the pre-allocated objects first.
145 2) I want to be able to interrupt sleeping allocation with a signal.
146 Note: This is a single linked list, the next pointer is the private
147 member of struct page.
148 */
149struct page *drbd_pp_pool;
150spinlock_t drbd_pp_lock;
151int drbd_pp_vacant;
152wait_queue_head_t drbd_pp_wait;
153
154DEFINE_RATELIMIT_STATE(drbd_ratelimit_state, 5 * HZ, 5);
155
Emese Revfy7d4e9d02009-12-14 00:59:30 +0100156static const struct block_device_operations drbd_ops = {
Philipp Reisnerb411b362009-09-25 16:07:19 -0700157 .owner = THIS_MODULE,
158 .open = drbd_open,
159 .release = drbd_release,
160};
161
162#define ARRY_SIZE(A) (sizeof(A)/sizeof(A[0]))
163
164#ifdef __CHECKER__
165/* When checking with sparse, and this is an inline function, sparse will
166 give tons of false positives. When this is a real functions sparse works.
167 */
168int _get_ldev_if_state(struct drbd_conf *mdev, enum drbd_disk_state mins)
169{
170 int io_allowed;
171
172 atomic_inc(&mdev->local_cnt);
173 io_allowed = (mdev->state.disk >= mins);
174 if (!io_allowed) {
175 if (atomic_dec_and_test(&mdev->local_cnt))
176 wake_up(&mdev->misc_wait);
177 }
178 return io_allowed;
179}
180
181#endif
182
183/**
184 * DOC: The transfer log
185 *
186 * The transfer log is a single linked list of &struct drbd_tl_epoch objects.
187 * mdev->newest_tle points to the head, mdev->oldest_tle points to the tail
188 * of the list. There is always at least one &struct drbd_tl_epoch object.
189 *
190 * Each &struct drbd_tl_epoch has a circular double linked list of requests
191 * attached.
192 */
193static int tl_init(struct drbd_conf *mdev)
194{
195 struct drbd_tl_epoch *b;
196
197 /* during device minor initialization, we may well use GFP_KERNEL */
198 b = kmalloc(sizeof(struct drbd_tl_epoch), GFP_KERNEL);
199 if (!b)
200 return 0;
201 INIT_LIST_HEAD(&b->requests);
202 INIT_LIST_HEAD(&b->w.list);
203 b->next = NULL;
204 b->br_number = 4711;
Philipp Reisner7e602c02010-05-27 14:49:27 +0200205 b->n_writes = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700206 b->w.cb = NULL; /* if this is != NULL, we need to dec_ap_pending in tl_clear */
207
208 mdev->oldest_tle = b;
209 mdev->newest_tle = b;
210 INIT_LIST_HEAD(&mdev->out_of_sequence_requests);
Philipp Reisner6d7e32f2011-03-15 10:25:18 +0100211 INIT_LIST_HEAD(&mdev->barrier_acked_requests);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700212
213 mdev->tl_hash = NULL;
214 mdev->tl_hash_s = 0;
215
216 return 1;
217}
218
219static void tl_cleanup(struct drbd_conf *mdev)
220{
221 D_ASSERT(mdev->oldest_tle == mdev->newest_tle);
222 D_ASSERT(list_empty(&mdev->out_of_sequence_requests));
223 kfree(mdev->oldest_tle);
224 mdev->oldest_tle = NULL;
225 kfree(mdev->unused_spare_tle);
226 mdev->unused_spare_tle = NULL;
227 kfree(mdev->tl_hash);
228 mdev->tl_hash = NULL;
229 mdev->tl_hash_s = 0;
230}
231
232/**
233 * _tl_add_barrier() - Adds a barrier to the transfer log
234 * @mdev: DRBD device.
235 * @new: Barrier to be added before the current head of the TL.
236 *
237 * The caller must hold the req_lock.
238 */
239void _tl_add_barrier(struct drbd_conf *mdev, struct drbd_tl_epoch *new)
240{
241 struct drbd_tl_epoch *newest_before;
242
243 INIT_LIST_HEAD(&new->requests);
244 INIT_LIST_HEAD(&new->w.list);
245 new->w.cb = NULL; /* if this is != NULL, we need to dec_ap_pending in tl_clear */
246 new->next = NULL;
Philipp Reisner7e602c02010-05-27 14:49:27 +0200247 new->n_writes = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700248
249 newest_before = mdev->newest_tle;
250 /* never send a barrier number == 0, because that is special-cased
251 * when using TCQ for our write ordering code */
252 new->br_number = (newest_before->br_number+1) ?: 1;
253 if (mdev->newest_tle != new) {
254 mdev->newest_tle->next = new;
255 mdev->newest_tle = new;
256 }
257}
258
259/**
260 * tl_release() - Free or recycle the oldest &struct drbd_tl_epoch object of the TL
261 * @mdev: DRBD device.
262 * @barrier_nr: Expected identifier of the DRBD write barrier packet.
263 * @set_size: Expected number of requests before that barrier.
264 *
265 * In case the passed barrier_nr or set_size does not match the oldest
266 * &struct drbd_tl_epoch objects this function will cause a termination
267 * of the connection.
268 */
269void tl_release(struct drbd_conf *mdev, unsigned int barrier_nr,
270 unsigned int set_size)
271{
272 struct drbd_tl_epoch *b, *nob; /* next old barrier */
273 struct list_head *le, *tle;
274 struct drbd_request *r;
275
276 spin_lock_irq(&mdev->req_lock);
277
278 b = mdev->oldest_tle;
279
280 /* first some paranoia code */
281 if (b == NULL) {
282 dev_err(DEV, "BAD! BarrierAck #%u received, but no epoch in tl!?\n",
283 barrier_nr);
284 goto bail;
285 }
286 if (b->br_number != barrier_nr) {
287 dev_err(DEV, "BAD! BarrierAck #%u received, expected #%u!\n",
288 barrier_nr, b->br_number);
289 goto bail;
290 }
Philipp Reisner7e602c02010-05-27 14:49:27 +0200291 if (b->n_writes != set_size) {
292 dev_err(DEV, "BAD! BarrierAck #%u received with n_writes=%u, expected n_writes=%u!\n",
293 barrier_nr, set_size, b->n_writes);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700294 goto bail;
295 }
296
297 /* Clean up list of requests processed during current epoch */
298 list_for_each_safe(le, tle, &b->requests) {
299 r = list_entry(le, struct drbd_request, tl_requests);
300 _req_mod(r, barrier_acked);
301 }
302 /* There could be requests on the list waiting for completion
303 of the write to the local disk. To avoid corruptions of
304 slab's data structures we have to remove the lists head.
305
306 Also there could have been a barrier ack out of sequence, overtaking
307 the write acks - which would be a bug and violating write ordering.
308 To not deadlock in case we lose connection while such requests are
309 still pending, we need some way to find them for the
310 _req_mode(connection_lost_while_pending).
311
312 These have been list_move'd to the out_of_sequence_requests list in
313 _req_mod(, barrier_acked) above.
314 */
Philipp Reisner6d7e32f2011-03-15 10:25:18 +0100315 list_splice_init(&b->requests, &mdev->barrier_acked_requests);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700316
317 nob = b->next;
318 if (test_and_clear_bit(CREATE_BARRIER, &mdev->flags)) {
319 _tl_add_barrier(mdev, b);
320 if (nob)
321 mdev->oldest_tle = nob;
322 /* if nob == NULL b was the only barrier, and becomes the new
323 barrier. Therefore mdev->oldest_tle points already to b */
324 } else {
325 D_ASSERT(nob != NULL);
326 mdev->oldest_tle = nob;
327 kfree(b);
328 }
329
330 spin_unlock_irq(&mdev->req_lock);
331 dec_ap_pending(mdev);
332
333 return;
334
335bail:
336 spin_unlock_irq(&mdev->req_lock);
337 drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR));
338}
339
Philipp Reisner617049a2010-12-22 12:48:31 +0100340
Philipp Reisner11b58e72010-05-12 17:08:26 +0200341/**
342 * _tl_restart() - Walks the transfer log, and applies an action to all requests
343 * @mdev: DRBD device.
344 * @what: The action/event to perform with all request objects
345 *
346 * @what might be one of connection_lost_while_pending, resend, fail_frozen_disk_io,
Philipp Reisnerfd2491f2011-07-18 16:25:15 +0200347 * restart_frozen_disk_io.
Philipp Reisner11b58e72010-05-12 17:08:26 +0200348 */
349static void _tl_restart(struct drbd_conf *mdev, enum drbd_req_event what)
350{
351 struct drbd_tl_epoch *b, *tmp, **pn;
Philipp Reisnerb9b98712010-06-22 11:26:48 +0200352 struct list_head *le, *tle, carry_reads;
Philipp Reisner11b58e72010-05-12 17:08:26 +0200353 struct drbd_request *req;
354 int rv, n_writes, n_reads;
355
356 b = mdev->oldest_tle;
357 pn = &mdev->oldest_tle;
358 while (b) {
359 n_writes = 0;
360 n_reads = 0;
Philipp Reisnerb9b98712010-06-22 11:26:48 +0200361 INIT_LIST_HEAD(&carry_reads);
Philipp Reisner11b58e72010-05-12 17:08:26 +0200362 list_for_each_safe(le, tle, &b->requests) {
363 req = list_entry(le, struct drbd_request, tl_requests);
364 rv = _req_mod(req, what);
365
366 n_writes += (rv & MR_WRITE) >> MR_WRITE_SHIFT;
367 n_reads += (rv & MR_READ) >> MR_READ_SHIFT;
368 }
369 tmp = b->next;
370
Philipp Reisnerb9b98712010-06-22 11:26:48 +0200371 if (n_writes) {
Philipp Reisner11b58e72010-05-12 17:08:26 +0200372 if (what == resend) {
373 b->n_writes = n_writes;
374 if (b->w.cb == NULL) {
375 b->w.cb = w_send_barrier;
376 inc_ap_pending(mdev);
377 set_bit(CREATE_BARRIER, &mdev->flags);
378 }
379
380 drbd_queue_work(&mdev->data.work, &b->w);
381 }
382 pn = &b->next;
383 } else {
Philipp Reisnerb9b98712010-06-22 11:26:48 +0200384 if (n_reads)
385 list_add(&carry_reads, &b->requests);
Philipp Reisner11b58e72010-05-12 17:08:26 +0200386 /* there could still be requests on that ring list,
387 * in case local io is still pending */
388 list_del(&b->requests);
389
390 /* dec_ap_pending corresponding to queue_barrier.
391 * the newest barrier may not have been queued yet,
392 * in which case w.cb is still NULL. */
393 if (b->w.cb != NULL)
394 dec_ap_pending(mdev);
395
396 if (b == mdev->newest_tle) {
397 /* recycle, but reinit! */
398 D_ASSERT(tmp == NULL);
399 INIT_LIST_HEAD(&b->requests);
Philipp Reisnerb9b98712010-06-22 11:26:48 +0200400 list_splice(&carry_reads, &b->requests);
Philipp Reisner11b58e72010-05-12 17:08:26 +0200401 INIT_LIST_HEAD(&b->w.list);
402 b->w.cb = NULL;
403 b->br_number = net_random();
404 b->n_writes = 0;
405
406 *pn = b;
407 break;
408 }
409 *pn = tmp;
410 kfree(b);
411 }
412 b = tmp;
Philipp Reisnerb9b98712010-06-22 11:26:48 +0200413 list_splice(&carry_reads, &b->requests);
Philipp Reisner11b58e72010-05-12 17:08:26 +0200414 }
Philipp Reisner6d7e32f2011-03-15 10:25:18 +0100415
416 /* Actions operating on the disk state, also want to work on
417 requests that got barrier acked. */
418 switch (what) {
Philipp Reisner6d7e32f2011-03-15 10:25:18 +0100419 case fail_frozen_disk_io:
420 case restart_frozen_disk_io:
421 list_for_each_safe(le, tle, &mdev->barrier_acked_requests) {
422 req = list_entry(le, struct drbd_request, tl_requests);
423 _req_mod(req, what);
424 }
425
426 case connection_lost_while_pending:
427 case resend:
428 break;
429 default:
430 dev_err(DEV, "what = %d in _tl_restart()\n", what);
431 }
Philipp Reisner11b58e72010-05-12 17:08:26 +0200432}
433
Philipp Reisnerb411b362009-09-25 16:07:19 -0700434
435/**
436 * tl_clear() - Clears all requests and &struct drbd_tl_epoch objects out of the TL
437 * @mdev: DRBD device.
438 *
439 * This is called after the connection to the peer was lost. The storage covered
440 * by the requests on the transfer gets marked as our of sync. Called from the
441 * receiver thread and the worker thread.
442 */
443void tl_clear(struct drbd_conf *mdev)
444{
Philipp Reisnerb411b362009-09-25 16:07:19 -0700445 struct list_head *le, *tle;
446 struct drbd_request *r;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700447
448 spin_lock_irq(&mdev->req_lock);
449
Philipp Reisner11b58e72010-05-12 17:08:26 +0200450 _tl_restart(mdev, connection_lost_while_pending);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700451
452 /* we expect this list to be empty. */
453 D_ASSERT(list_empty(&mdev->out_of_sequence_requests));
454
455 /* but just in case, clean it up anyways! */
456 list_for_each_safe(le, tle, &mdev->out_of_sequence_requests) {
457 r = list_entry(le, struct drbd_request, tl_requests);
458 /* It would be nice to complete outside of spinlock.
459 * But this is easier for now. */
460 _req_mod(r, connection_lost_while_pending);
461 }
462
463 /* ensure bit indicating barrier is required is clear */
464 clear_bit(CREATE_BARRIER, &mdev->flags);
465
Philipp Reisner288f4222010-05-27 15:07:43 +0200466 memset(mdev->app_reads_hash, 0, APP_R_HSIZE*sizeof(void *));
467
Philipp Reisnerb411b362009-09-25 16:07:19 -0700468 spin_unlock_irq(&mdev->req_lock);
469}
470
Philipp Reisner11b58e72010-05-12 17:08:26 +0200471void tl_restart(struct drbd_conf *mdev, enum drbd_req_event what)
472{
473 spin_lock_irq(&mdev->req_lock);
474 _tl_restart(mdev, what);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700475 spin_unlock_irq(&mdev->req_lock);
476}
477
478/**
Philipp Reisnerfd2491f2011-07-18 16:25:15 +0200479 * tl_abort_disk_io() - Abort disk I/O for all requests for a certain mdev in the TL
480 * @mdev: DRBD device.
481 */
482void tl_abort_disk_io(struct drbd_conf *mdev)
483{
484 struct drbd_tl_epoch *b;
485 struct list_head *le, *tle;
486 struct drbd_request *req;
487
488 spin_lock_irq(&mdev->req_lock);
489 b = mdev->oldest_tle;
490 while (b) {
491 list_for_each_safe(le, tle, &b->requests) {
492 req = list_entry(le, struct drbd_request, tl_requests);
493 if (!(req->rq_state & RQ_LOCAL_PENDING))
494 continue;
495 _req_mod(req, abort_disk_io);
496 }
497 b = b->next;
498 }
499
500 list_for_each_safe(le, tle, &mdev->barrier_acked_requests) {
501 req = list_entry(le, struct drbd_request, tl_requests);
502 if (!(req->rq_state & RQ_LOCAL_PENDING))
503 continue;
504 _req_mod(req, abort_disk_io);
505 }
506
507 spin_unlock_irq(&mdev->req_lock);
508}
509
510/**
Andreas Gruenbacher81e84652010-12-09 15:03:57 +0100511 * cl_wide_st_chg() - true if the state change is a cluster wide one
Philipp Reisnerb411b362009-09-25 16:07:19 -0700512 * @mdev: DRBD device.
513 * @os: old (current) state.
514 * @ns: new (wanted) state.
515 */
516static int cl_wide_st_chg(struct drbd_conf *mdev,
517 union drbd_state os, union drbd_state ns)
518{
519 return (os.conn >= C_CONNECTED && ns.conn >= C_CONNECTED &&
520 ((os.role != R_PRIMARY && ns.role == R_PRIMARY) ||
521 (os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) ||
522 (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S) ||
Philipp Reisner02ee8f92011-03-14 11:54:47 +0100523 (os.disk != D_FAILED && ns.disk == D_FAILED))) ||
Philipp Reisnerb411b362009-09-25 16:07:19 -0700524 (os.conn >= C_CONNECTED && ns.conn == C_DISCONNECTING) ||
525 (os.conn == C_CONNECTED && ns.conn == C_VERIFY_S);
526}
527
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +0100528enum drbd_state_rv
529drbd_change_state(struct drbd_conf *mdev, enum chg_state_flags f,
530 union drbd_state mask, union drbd_state val)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700531{
532 unsigned long flags;
533 union drbd_state os, ns;
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +0100534 enum drbd_state_rv rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700535
536 spin_lock_irqsave(&mdev->req_lock, flags);
537 os = mdev->state;
538 ns.i = (os.i & ~mask.i) | val.i;
539 rv = _drbd_set_state(mdev, ns, f, NULL);
540 ns = mdev->state;
541 spin_unlock_irqrestore(&mdev->req_lock, flags);
542
543 return rv;
544}
545
546/**
547 * drbd_force_state() - Impose a change which happens outside our control on our state
548 * @mdev: DRBD device.
549 * @mask: mask of state bits to change.
550 * @val: value of new state bits.
551 */
552void drbd_force_state(struct drbd_conf *mdev,
553 union drbd_state mask, union drbd_state val)
554{
555 drbd_change_state(mdev, CS_HARD, mask, val);
556}
557
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +0100558static enum drbd_state_rv is_valid_state(struct drbd_conf *, union drbd_state);
559static enum drbd_state_rv is_valid_state_transition(struct drbd_conf *,
560 union drbd_state,
561 union drbd_state);
Philipp Reisner77e8fdf2011-06-29 10:49:13 +0200562enum sanitize_state_warnings {
563 NO_WARNING,
564 ABORTED_ONLINE_VERIFY,
565 ABORTED_RESYNC,
566 CONNECTION_LOST_NEGOTIATING,
567 IMPLICITLY_UPGRADED_DISK,
568 IMPLICITLY_UPGRADED_PDSK,
569};
Philipp Reisnerb411b362009-09-25 16:07:19 -0700570static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os,
Philipp Reisner77e8fdf2011-06-29 10:49:13 +0200571 union drbd_state ns, enum sanitize_state_warnings *warn);
Philipp Reisnerb411b362009-09-25 16:07:19 -0700572int drbd_send_state_req(struct drbd_conf *,
573 union drbd_state, union drbd_state);
574
Andreas Gruenbacherc8b32562010-12-08 01:06:16 +0100575static enum drbd_state_rv
576_req_st_cond(struct drbd_conf *mdev, union drbd_state mask,
577 union drbd_state val)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700578{
579 union drbd_state os, ns;
580 unsigned long flags;
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +0100581 enum drbd_state_rv rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700582
583 if (test_and_clear_bit(CL_ST_CHG_SUCCESS, &mdev->flags))
584 return SS_CW_SUCCESS;
585
586 if (test_and_clear_bit(CL_ST_CHG_FAIL, &mdev->flags))
587 return SS_CW_FAILED_BY_PEER;
588
589 rv = 0;
590 spin_lock_irqsave(&mdev->req_lock, flags);
591 os = mdev->state;
592 ns.i = (os.i & ~mask.i) | val.i;
593 ns = sanitize_state(mdev, os, ns, NULL);
594
595 if (!cl_wide_st_chg(mdev, os, ns))
596 rv = SS_CW_NO_NEED;
597 if (!rv) {
598 rv = is_valid_state(mdev, ns);
599 if (rv == SS_SUCCESS) {
600 rv = is_valid_state_transition(mdev, ns, os);
601 if (rv == SS_SUCCESS)
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +0100602 rv = SS_UNKNOWN_ERROR; /* cont waiting, otherwise fail. */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700603 }
604 }
605 spin_unlock_irqrestore(&mdev->req_lock, flags);
606
607 return rv;
608}
609
610/**
611 * drbd_req_state() - Perform an eventually cluster wide state change
612 * @mdev: DRBD device.
613 * @mask: mask of state bits to change.
614 * @val: value of new state bits.
615 * @f: flags
616 *
617 * Should not be called directly, use drbd_request_state() or
618 * _drbd_request_state().
619 */
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +0100620static enum drbd_state_rv
621drbd_req_state(struct drbd_conf *mdev, union drbd_state mask,
622 union drbd_state val, enum chg_state_flags f)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700623{
624 struct completion done;
625 unsigned long flags;
626 union drbd_state os, ns;
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +0100627 enum drbd_state_rv rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700628
629 init_completion(&done);
630
631 if (f & CS_SERIALIZE)
632 mutex_lock(&mdev->state_mutex);
633
634 spin_lock_irqsave(&mdev->req_lock, flags);
635 os = mdev->state;
636 ns.i = (os.i & ~mask.i) | val.i;
637 ns = sanitize_state(mdev, os, ns, NULL);
638
639 if (cl_wide_st_chg(mdev, os, ns)) {
640 rv = is_valid_state(mdev, ns);
641 if (rv == SS_SUCCESS)
642 rv = is_valid_state_transition(mdev, ns, os);
643 spin_unlock_irqrestore(&mdev->req_lock, flags);
644
645 if (rv < SS_SUCCESS) {
646 if (f & CS_VERBOSE)
647 print_st_err(mdev, os, ns, rv);
648 goto abort;
649 }
650
651 drbd_state_lock(mdev);
652 if (!drbd_send_state_req(mdev, mask, val)) {
653 drbd_state_unlock(mdev);
654 rv = SS_CW_FAILED_BY_PEER;
655 if (f & CS_VERBOSE)
656 print_st_err(mdev, os, ns, rv);
657 goto abort;
658 }
659
660 wait_event(mdev->state_wait,
661 (rv = _req_st_cond(mdev, mask, val)));
662
663 if (rv < SS_SUCCESS) {
664 drbd_state_unlock(mdev);
665 if (f & CS_VERBOSE)
666 print_st_err(mdev, os, ns, rv);
667 goto abort;
668 }
669 spin_lock_irqsave(&mdev->req_lock, flags);
670 os = mdev->state;
671 ns.i = (os.i & ~mask.i) | val.i;
672 rv = _drbd_set_state(mdev, ns, f, &done);
673 drbd_state_unlock(mdev);
674 } else {
675 rv = _drbd_set_state(mdev, ns, f, &done);
676 }
677
678 spin_unlock_irqrestore(&mdev->req_lock, flags);
679
680 if (f & CS_WAIT_COMPLETE && rv == SS_SUCCESS) {
681 D_ASSERT(current != mdev->worker.task);
682 wait_for_completion(&done);
683 }
684
685abort:
686 if (f & CS_SERIALIZE)
687 mutex_unlock(&mdev->state_mutex);
688
689 return rv;
690}
691
692/**
693 * _drbd_request_state() - Request a state change (with flags)
694 * @mdev: DRBD device.
695 * @mask: mask of state bits to change.
696 * @val: value of new state bits.
697 * @f: flags
698 *
699 * Cousin of drbd_request_state(), useful with the CS_WAIT_COMPLETE
700 * flag, or when logging of failed state change requests is not desired.
701 */
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +0100702enum drbd_state_rv
703_drbd_request_state(struct drbd_conf *mdev, union drbd_state mask,
704 union drbd_state val, enum chg_state_flags f)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700705{
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +0100706 enum drbd_state_rv rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700707
708 wait_event(mdev->state_wait,
709 (rv = drbd_req_state(mdev, mask, val, f)) != SS_IN_TRANSIENT_STATE);
710
711 return rv;
712}
713
714static void print_st(struct drbd_conf *mdev, char *name, union drbd_state ns)
715{
716 dev_err(DEV, " %s = { cs:%s ro:%s/%s ds:%s/%s %c%c%c%c }\n",
717 name,
718 drbd_conn_str(ns.conn),
719 drbd_role_str(ns.role),
720 drbd_role_str(ns.peer),
721 drbd_disk_str(ns.disk),
722 drbd_disk_str(ns.pdsk),
Philipp Reisnerfb22c402010-09-08 23:20:21 +0200723 is_susp(ns) ? 's' : 'r',
Philipp Reisnerb411b362009-09-25 16:07:19 -0700724 ns.aftr_isp ? 'a' : '-',
725 ns.peer_isp ? 'p' : '-',
726 ns.user_isp ? 'u' : '-'
727 );
728}
729
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +0100730void print_st_err(struct drbd_conf *mdev, union drbd_state os,
731 union drbd_state ns, enum drbd_state_rv err)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700732{
733 if (err == SS_IN_TRANSIENT_STATE)
734 return;
735 dev_err(DEV, "State change failed: %s\n", drbd_set_st_err_str(err));
736 print_st(mdev, " state", os);
737 print_st(mdev, "wanted", ns);
738}
739
740
Philipp Reisnerb411b362009-09-25 16:07:19 -0700741/**
742 * is_valid_state() - Returns an SS_ error code if ns is not valid
743 * @mdev: DRBD device.
744 * @ns: State to consider.
745 */
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +0100746static enum drbd_state_rv
747is_valid_state(struct drbd_conf *mdev, union drbd_state ns)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700748{
749 /* See drbd_state_sw_errors in drbd_strings.c */
750
751 enum drbd_fencing_p fp;
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +0100752 enum drbd_state_rv rv = SS_SUCCESS;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700753
754 fp = FP_DONT_CARE;
755 if (get_ldev(mdev)) {
756 fp = mdev->ldev->dc.fencing;
757 put_ldev(mdev);
758 }
759
760 if (get_net_conf(mdev)) {
761 if (!mdev->net_conf->two_primaries &&
762 ns.role == R_PRIMARY && ns.peer == R_PRIMARY)
763 rv = SS_TWO_PRIMARIES;
764 put_net_conf(mdev);
765 }
766
767 if (rv <= 0)
768 /* already found a reason to abort */;
769 else if (ns.role == R_SECONDARY && mdev->open_cnt)
770 rv = SS_DEVICE_IN_USE;
771
772 else if (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.disk < D_UP_TO_DATE)
773 rv = SS_NO_UP_TO_DATE_DISK;
774
775 else if (fp >= FP_RESOURCE &&
776 ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk >= D_UNKNOWN)
777 rv = SS_PRIMARY_NOP;
778
779 else if (ns.role == R_PRIMARY && ns.disk <= D_INCONSISTENT && ns.pdsk <= D_INCONSISTENT)
780 rv = SS_NO_UP_TO_DATE_DISK;
781
782 else if (ns.conn > C_CONNECTED && ns.disk < D_INCONSISTENT)
783 rv = SS_NO_LOCAL_DISK;
784
785 else if (ns.conn > C_CONNECTED && ns.pdsk < D_INCONSISTENT)
786 rv = SS_NO_REMOTE_DISK;
787
Lars Ellenberg8d4ce822010-04-01 16:59:32 +0200788 else if (ns.conn > C_CONNECTED && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE)
789 rv = SS_NO_UP_TO_DATE_DISK;
790
Philipp Reisnerb411b362009-09-25 16:07:19 -0700791 else if ((ns.conn == C_CONNECTED ||
792 ns.conn == C_WF_BITMAP_S ||
793 ns.conn == C_SYNC_SOURCE ||
794 ns.conn == C_PAUSED_SYNC_S) &&
795 ns.disk == D_OUTDATED)
796 rv = SS_CONNECTED_OUTDATES;
797
798 else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) &&
799 (mdev->sync_conf.verify_alg[0] == 0))
800 rv = SS_NO_VERIFY_ALG;
801
802 else if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) &&
803 mdev->agreed_pro_version < 88)
804 rv = SS_NOT_SUPPORTED;
805
Philipp Reisnerfa7d9392011-05-17 14:48:55 +0200806 else if (ns.conn >= C_CONNECTED && ns.pdsk == D_UNKNOWN)
807 rv = SS_CONNECTED_OUTDATES;
808
Philipp Reisnerb411b362009-09-25 16:07:19 -0700809 return rv;
810}
811
812/**
813 * is_valid_state_transition() - Returns an SS_ error code if the state transition is not possible
814 * @mdev: DRBD device.
815 * @ns: new state.
816 * @os: old state.
817 */
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +0100818static enum drbd_state_rv
819is_valid_state_transition(struct drbd_conf *mdev, union drbd_state ns,
820 union drbd_state os)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700821{
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +0100822 enum drbd_state_rv rv = SS_SUCCESS;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700823
824 if ((ns.conn == C_STARTING_SYNC_T || ns.conn == C_STARTING_SYNC_S) &&
825 os.conn > C_CONNECTED)
826 rv = SS_RESYNC_RUNNING;
827
828 if (ns.conn == C_DISCONNECTING && os.conn == C_STANDALONE)
829 rv = SS_ALREADY_STANDALONE;
830
831 if (ns.disk > D_ATTACHING && os.disk == D_DISKLESS)
832 rv = SS_IS_DISKLESS;
833
834 if (ns.conn == C_WF_CONNECTION && os.conn < C_UNCONNECTED)
835 rv = SS_NO_NET_CONFIG;
836
837 if (ns.disk == D_OUTDATED && os.disk < D_OUTDATED && os.disk != D_ATTACHING)
838 rv = SS_LOWER_THAN_OUTDATED;
839
840 if (ns.conn == C_DISCONNECTING && os.conn == C_UNCONNECTED)
841 rv = SS_IN_TRANSIENT_STATE;
842
843 if (ns.conn == os.conn && ns.conn == C_WF_REPORT_PARAMS)
844 rv = SS_IN_TRANSIENT_STATE;
845
846 if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) && os.conn < C_CONNECTED)
847 rv = SS_NEED_CONNECTION;
848
849 if ((ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T) &&
850 ns.conn != os.conn && os.conn > C_CONNECTED)
851 rv = SS_RESYNC_RUNNING;
852
853 if ((ns.conn == C_STARTING_SYNC_S || ns.conn == C_STARTING_SYNC_T) &&
854 os.conn < C_CONNECTED)
855 rv = SS_NEED_CONNECTION;
856
Philipp Reisner1fc80cf2010-11-22 14:18:47 +0100857 if ((ns.conn == C_SYNC_TARGET || ns.conn == C_SYNC_SOURCE)
858 && os.conn < C_WF_REPORT_PARAMS)
859 rv = SS_NEED_CONNECTION; /* No NetworkFailure -> SyncTarget etc... */
860
Philipp Reisnerb411b362009-09-25 16:07:19 -0700861 return rv;
862}
863
Philipp Reisner77e8fdf2011-06-29 10:49:13 +0200864static void print_sanitize_warnings(struct drbd_conf *mdev, enum sanitize_state_warnings warn)
865{
866 static const char *msg_table[] = {
867 [NO_WARNING] = "",
868 [ABORTED_ONLINE_VERIFY] = "Online-verify aborted.",
869 [ABORTED_RESYNC] = "Resync aborted.",
870 [CONNECTION_LOST_NEGOTIATING] = "Connection lost while negotiating, no data!",
871 [IMPLICITLY_UPGRADED_DISK] = "Implicitly upgraded disk",
872 [IMPLICITLY_UPGRADED_PDSK] = "Implicitly upgraded pdsk",
873 };
874
875 if (warn != NO_WARNING)
876 dev_warn(DEV, "%s\n", msg_table[warn]);
877}
878
Philipp Reisnerb411b362009-09-25 16:07:19 -0700879/**
880 * sanitize_state() - Resolves implicitly necessary additional changes to a state transition
881 * @mdev: DRBD device.
882 * @os: old state.
883 * @ns: new state.
884 * @warn_sync_abort:
885 *
886 * When we loose connection, we have to set the state of the peers disk (pdsk)
887 * to D_UNKNOWN. This rule and many more along those lines are in this function.
888 */
889static union drbd_state sanitize_state(struct drbd_conf *mdev, union drbd_state os,
Philipp Reisner77e8fdf2011-06-29 10:49:13 +0200890 union drbd_state ns, enum sanitize_state_warnings *warn)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700891{
892 enum drbd_fencing_p fp;
Philipp Reisnerab17b68f2010-11-17 16:54:36 +0100893 enum drbd_disk_state disk_min, disk_max, pdsk_min, pdsk_max;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700894
Philipp Reisner77e8fdf2011-06-29 10:49:13 +0200895 if (warn)
896 *warn = NO_WARNING;
897
Philipp Reisnerb411b362009-09-25 16:07:19 -0700898 fp = FP_DONT_CARE;
899 if (get_ldev(mdev)) {
900 fp = mdev->ldev->dc.fencing;
901 put_ldev(mdev);
902 }
903
904 /* Disallow Network errors to configure a device's network part */
905 if ((ns.conn >= C_TIMEOUT && ns.conn <= C_TEAR_DOWN) &&
906 os.conn <= C_DISCONNECTING)
907 ns.conn = os.conn;
908
Lars Ellenbergf2906e12010-07-21 17:04:32 +0200909 /* After a network error (+C_TEAR_DOWN) only C_UNCONNECTED or C_DISCONNECTING can follow.
910 * If you try to go into some Sync* state, that shall fail (elsewhere). */
Philipp Reisnerb411b362009-09-25 16:07:19 -0700911 if (os.conn >= C_TIMEOUT && os.conn <= C_TEAR_DOWN &&
Lars Ellenberg545752d2011-12-05 14:39:25 +0100912 ns.conn != C_UNCONNECTED && ns.conn != C_DISCONNECTING && ns.conn <= C_CONNECTED)
Philipp Reisnerb411b362009-09-25 16:07:19 -0700913 ns.conn = os.conn;
914
Lars Ellenberg82f59cc2010-10-16 12:13:47 +0200915 /* we cannot fail (again) if we already detached */
916 if (ns.disk == D_FAILED && os.disk == D_DISKLESS)
917 ns.disk = D_DISKLESS;
918
919 /* if we are only D_ATTACHING yet,
920 * we can (and should) go directly to D_DISKLESS. */
921 if (ns.disk == D_FAILED && os.disk == D_ATTACHING)
922 ns.disk = D_DISKLESS;
923
Philipp Reisnerb411b362009-09-25 16:07:19 -0700924 /* After C_DISCONNECTING only C_STANDALONE may follow */
925 if (os.conn == C_DISCONNECTING && ns.conn != C_STANDALONE)
926 ns.conn = os.conn;
927
928 if (ns.conn < C_CONNECTED) {
929 ns.peer_isp = 0;
930 ns.peer = R_UNKNOWN;
931 if (ns.pdsk > D_UNKNOWN || ns.pdsk < D_INCONSISTENT)
932 ns.pdsk = D_UNKNOWN;
933 }
934
935 /* Clear the aftr_isp when becoming unconfigured */
936 if (ns.conn == C_STANDALONE && ns.disk == D_DISKLESS && ns.role == R_SECONDARY)
937 ns.aftr_isp = 0;
938
Philipp Reisnerb411b362009-09-25 16:07:19 -0700939 /* Abort resync if a disk fails/detaches */
940 if (os.conn > C_CONNECTED && ns.conn > C_CONNECTED &&
941 (ns.disk <= D_FAILED || ns.pdsk <= D_FAILED)) {
Philipp Reisner77e8fdf2011-06-29 10:49:13 +0200942 if (warn)
943 *warn = os.conn == C_VERIFY_S || os.conn == C_VERIFY_T ?
944 ABORTED_ONLINE_VERIFY : ABORTED_RESYNC;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700945 ns.conn = C_CONNECTED;
946 }
947
Philipp Reisnerb411b362009-09-25 16:07:19 -0700948 /* Connection breaks down before we finished "Negotiating" */
949 if (ns.conn < C_CONNECTED && ns.disk == D_NEGOTIATING &&
950 get_ldev_if_state(mdev, D_NEGOTIATING)) {
951 if (mdev->ed_uuid == mdev->ldev->md.uuid[UI_CURRENT]) {
952 ns.disk = mdev->new_state_tmp.disk;
953 ns.pdsk = mdev->new_state_tmp.pdsk;
954 } else {
Philipp Reisner77e8fdf2011-06-29 10:49:13 +0200955 if (warn)
956 *warn = CONNECTION_LOST_NEGOTIATING;
Philipp Reisnerb411b362009-09-25 16:07:19 -0700957 ns.disk = D_DISKLESS;
958 ns.pdsk = D_UNKNOWN;
959 }
960 put_ldev(mdev);
961 }
962
Philipp Reisnerab17b68f2010-11-17 16:54:36 +0100963 /* D_CONSISTENT and D_OUTDATED vanish when we get connected */
964 if (ns.conn >= C_CONNECTED && ns.conn < C_AHEAD) {
965 if (ns.disk == D_CONSISTENT || ns.disk == D_OUTDATED)
966 ns.disk = D_UP_TO_DATE;
967 if (ns.pdsk == D_CONSISTENT || ns.pdsk == D_OUTDATED)
968 ns.pdsk = D_UP_TO_DATE;
969 }
970
971 /* Implications of the connection stat on the disk states */
972 disk_min = D_DISKLESS;
973 disk_max = D_UP_TO_DATE;
974 pdsk_min = D_INCONSISTENT;
975 pdsk_max = D_UNKNOWN;
976 switch ((enum drbd_conns)ns.conn) {
977 case C_WF_BITMAP_T:
978 case C_PAUSED_SYNC_T:
979 case C_STARTING_SYNC_T:
980 case C_WF_SYNC_UUID:
981 case C_BEHIND:
982 disk_min = D_INCONSISTENT;
983 disk_max = D_OUTDATED;
984 pdsk_min = D_UP_TO_DATE;
985 pdsk_max = D_UP_TO_DATE;
986 break;
987 case C_VERIFY_S:
988 case C_VERIFY_T:
989 disk_min = D_UP_TO_DATE;
990 disk_max = D_UP_TO_DATE;
991 pdsk_min = D_UP_TO_DATE;
992 pdsk_max = D_UP_TO_DATE;
993 break;
994 case C_CONNECTED:
995 disk_min = D_DISKLESS;
996 disk_max = D_UP_TO_DATE;
997 pdsk_min = D_DISKLESS;
998 pdsk_max = D_UP_TO_DATE;
999 break;
1000 case C_WF_BITMAP_S:
1001 case C_PAUSED_SYNC_S:
1002 case C_STARTING_SYNC_S:
1003 case C_AHEAD:
1004 disk_min = D_UP_TO_DATE;
1005 disk_max = D_UP_TO_DATE;
1006 pdsk_min = D_INCONSISTENT;
1007 pdsk_max = D_CONSISTENT; /* D_OUTDATED would be nice. But explicit outdate necessary*/
1008 break;
1009 case C_SYNC_TARGET:
1010 disk_min = D_INCONSISTENT;
1011 disk_max = D_INCONSISTENT;
1012 pdsk_min = D_UP_TO_DATE;
1013 pdsk_max = D_UP_TO_DATE;
1014 break;
1015 case C_SYNC_SOURCE:
1016 disk_min = D_UP_TO_DATE;
1017 disk_max = D_UP_TO_DATE;
1018 pdsk_min = D_INCONSISTENT;
1019 pdsk_max = D_INCONSISTENT;
1020 break;
1021 case C_STANDALONE:
1022 case C_DISCONNECTING:
1023 case C_UNCONNECTED:
1024 case C_TIMEOUT:
1025 case C_BROKEN_PIPE:
1026 case C_NETWORK_FAILURE:
1027 case C_PROTOCOL_ERROR:
1028 case C_TEAR_DOWN:
1029 case C_WF_CONNECTION:
1030 case C_WF_REPORT_PARAMS:
1031 case C_MASK:
1032 break;
1033 }
1034 if (ns.disk > disk_max)
1035 ns.disk = disk_max;
1036
1037 if (ns.disk < disk_min) {
Philipp Reisner77e8fdf2011-06-29 10:49:13 +02001038 if (warn)
1039 *warn = IMPLICITLY_UPGRADED_DISK;
Philipp Reisnerab17b68f2010-11-17 16:54:36 +01001040 ns.disk = disk_min;
1041 }
1042 if (ns.pdsk > pdsk_max)
1043 ns.pdsk = pdsk_max;
1044
1045 if (ns.pdsk < pdsk_min) {
Philipp Reisner77e8fdf2011-06-29 10:49:13 +02001046 if (warn)
1047 *warn = IMPLICITLY_UPGRADED_PDSK;
Philipp Reisnerab17b68f2010-11-17 16:54:36 +01001048 ns.pdsk = pdsk_min;
1049 }
1050
Philipp Reisnerb411b362009-09-25 16:07:19 -07001051 if (fp == FP_STONITH &&
Philipp Reisner0a492162009-10-21 13:08:29 +02001052 (ns.role == R_PRIMARY && ns.conn < C_CONNECTED && ns.pdsk > D_OUTDATED) &&
1053 !(os.role == R_PRIMARY && os.conn < C_CONNECTED && os.pdsk > D_OUTDATED))
Philipp Reisnerfb22c402010-09-08 23:20:21 +02001054 ns.susp_fen = 1; /* Suspend IO while fence-peer handler runs (peer lost) */
Philipp Reisner265be2d2010-05-31 10:14:17 +02001055
1056 if (mdev->sync_conf.on_no_data == OND_SUSPEND_IO &&
1057 (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE) &&
1058 !(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE))
Philipp Reisnerfb22c402010-09-08 23:20:21 +02001059 ns.susp_nod = 1; /* Suspend IO while no data available (no accessible data available) */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001060
1061 if (ns.aftr_isp || ns.peer_isp || ns.user_isp) {
1062 if (ns.conn == C_SYNC_SOURCE)
1063 ns.conn = C_PAUSED_SYNC_S;
1064 if (ns.conn == C_SYNC_TARGET)
1065 ns.conn = C_PAUSED_SYNC_T;
1066 } else {
1067 if (ns.conn == C_PAUSED_SYNC_S)
1068 ns.conn = C_SYNC_SOURCE;
1069 if (ns.conn == C_PAUSED_SYNC_T)
1070 ns.conn = C_SYNC_TARGET;
1071 }
1072
1073 return ns;
1074}
1075
1076/* helper for __drbd_set_state */
1077static void set_ov_position(struct drbd_conf *mdev, enum drbd_conns cs)
1078{
Lars Ellenberg30b743a2010-11-05 09:39:06 +01001079 if (mdev->agreed_pro_version < 90)
1080 mdev->ov_start_sector = 0;
1081 mdev->rs_total = drbd_bm_bits(mdev);
1082 mdev->ov_position = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001083 if (cs == C_VERIFY_T) {
1084 /* starting online verify from an arbitrary position
1085 * does not fit well into the existing protocol.
1086 * on C_VERIFY_T, we initialize ov_left and friends
1087 * implicitly in receive_DataRequest once the
1088 * first P_OV_REQUEST is received */
1089 mdev->ov_start_sector = ~(sector_t)0;
1090 } else {
1091 unsigned long bit = BM_SECT_TO_BIT(mdev->ov_start_sector);
Lars Ellenberg30b743a2010-11-05 09:39:06 +01001092 if (bit >= mdev->rs_total) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001093 mdev->ov_start_sector =
1094 BM_BIT_TO_SECT(mdev->rs_total - 1);
Lars Ellenberg30b743a2010-11-05 09:39:06 +01001095 mdev->rs_total = 1;
1096 } else
1097 mdev->rs_total -= bit;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001098 mdev->ov_position = mdev->ov_start_sector;
1099 }
Lars Ellenberg30b743a2010-11-05 09:39:06 +01001100 mdev->ov_left = mdev->rs_total;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001101}
1102
Philipp Reisner07782862010-08-31 12:00:50 +02001103static void drbd_resume_al(struct drbd_conf *mdev)
1104{
1105 if (test_and_clear_bit(AL_SUSPENDED, &mdev->flags))
1106 dev_info(DEV, "Resumed AL updates\n");
1107}
1108
Philipp Reisnerb411b362009-09-25 16:07:19 -07001109/**
1110 * __drbd_set_state() - Set a new DRBD state
1111 * @mdev: DRBD device.
1112 * @ns: new state.
1113 * @flags: Flags
1114 * @done: Optional completion, that will get completed after the after_state_ch() finished
1115 *
1116 * Caller needs to hold req_lock, and global_state_lock. Do not call directly.
1117 */
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +01001118enum drbd_state_rv
1119__drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
1120 enum chg_state_flags flags, struct completion *done)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001121{
1122 union drbd_state os;
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +01001123 enum drbd_state_rv rv = SS_SUCCESS;
Philipp Reisner77e8fdf2011-06-29 10:49:13 +02001124 enum sanitize_state_warnings ssw;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001125 struct after_state_chg_work *ascw;
1126
1127 os = mdev->state;
1128
Philipp Reisner77e8fdf2011-06-29 10:49:13 +02001129 ns = sanitize_state(mdev, os, ns, &ssw);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001130
1131 if (ns.i == os.i)
1132 return SS_NOTHING_TO_DO;
1133
1134 if (!(flags & CS_HARD)) {
1135 /* pre-state-change checks ; only look at ns */
1136 /* See drbd_state_sw_errors in drbd_strings.c */
1137
1138 rv = is_valid_state(mdev, ns);
1139 if (rv < SS_SUCCESS) {
1140 /* If the old state was illegal as well, then let
1141 this happen...*/
1142
Philipp Reisner1616a252010-06-10 16:55:15 +02001143 if (is_valid_state(mdev, os) == rv)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001144 rv = is_valid_state_transition(mdev, ns, os);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001145 } else
1146 rv = is_valid_state_transition(mdev, ns, os);
1147 }
1148
1149 if (rv < SS_SUCCESS) {
1150 if (flags & CS_VERBOSE)
1151 print_st_err(mdev, os, ns, rv);
1152 return rv;
1153 }
1154
Philipp Reisner77e8fdf2011-06-29 10:49:13 +02001155 print_sanitize_warnings(mdev, ssw);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001156
1157 {
Andreas Gruenbacher662d91a2010-12-07 03:01:41 +01001158 char *pbp, pb[300];
1159 pbp = pb;
1160 *pbp = 0;
1161 if (ns.role != os.role)
1162 pbp += sprintf(pbp, "role( %s -> %s ) ",
1163 drbd_role_str(os.role),
1164 drbd_role_str(ns.role));
1165 if (ns.peer != os.peer)
1166 pbp += sprintf(pbp, "peer( %s -> %s ) ",
1167 drbd_role_str(os.peer),
1168 drbd_role_str(ns.peer));
1169 if (ns.conn != os.conn)
1170 pbp += sprintf(pbp, "conn( %s -> %s ) ",
1171 drbd_conn_str(os.conn),
1172 drbd_conn_str(ns.conn));
1173 if (ns.disk != os.disk)
1174 pbp += sprintf(pbp, "disk( %s -> %s ) ",
1175 drbd_disk_str(os.disk),
1176 drbd_disk_str(ns.disk));
1177 if (ns.pdsk != os.pdsk)
1178 pbp += sprintf(pbp, "pdsk( %s -> %s ) ",
1179 drbd_disk_str(os.pdsk),
1180 drbd_disk_str(ns.pdsk));
1181 if (is_susp(ns) != is_susp(os))
1182 pbp += sprintf(pbp, "susp( %d -> %d ) ",
1183 is_susp(os),
1184 is_susp(ns));
1185 if (ns.aftr_isp != os.aftr_isp)
1186 pbp += sprintf(pbp, "aftr_isp( %d -> %d ) ",
1187 os.aftr_isp,
1188 ns.aftr_isp);
1189 if (ns.peer_isp != os.peer_isp)
1190 pbp += sprintf(pbp, "peer_isp( %d -> %d ) ",
1191 os.peer_isp,
1192 ns.peer_isp);
1193 if (ns.user_isp != os.user_isp)
1194 pbp += sprintf(pbp, "user_isp( %d -> %d ) ",
1195 os.user_isp,
1196 ns.user_isp);
1197 dev_info(DEV, "%s\n", pb);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001198 }
1199
1200 /* solve the race between becoming unconfigured,
1201 * worker doing the cleanup, and
1202 * admin reconfiguring us:
1203 * on (re)configure, first set CONFIG_PENDING,
1204 * then wait for a potentially exiting worker,
1205 * start the worker, and schedule one no_op.
1206 * then proceed with configuration.
1207 */
1208 if (ns.disk == D_DISKLESS &&
1209 ns.conn == C_STANDALONE &&
1210 ns.role == R_SECONDARY &&
1211 !test_and_set_bit(CONFIG_PENDING, &mdev->flags))
1212 set_bit(DEVICE_DYING, &mdev->flags);
1213
Lars Ellenberg82f59cc2010-10-16 12:13:47 +02001214 /* if we are going -> D_FAILED or D_DISKLESS, grab one extra reference
1215 * on the ldev here, to be sure the transition -> D_DISKLESS resp.
1216 * drbd_ldev_destroy() won't happen before our corresponding
1217 * after_state_ch works run, where we put_ldev again. */
1218 if ((os.disk != D_FAILED && ns.disk == D_FAILED) ||
1219 (os.disk != D_DISKLESS && ns.disk == D_DISKLESS))
1220 atomic_inc(&mdev->local_cnt);
1221
1222 mdev->state = ns;
Lars Ellenberg62b0da32011-01-20 13:25:21 +01001223
1224 if (os.disk == D_ATTACHING && ns.disk >= D_NEGOTIATING)
1225 drbd_print_uuids(mdev, "attached to UUIDs");
1226
Philipp Reisnerb411b362009-09-25 16:07:19 -07001227 wake_up(&mdev->misc_wait);
1228 wake_up(&mdev->state_wait);
1229
Philipp Reisnerb411b362009-09-25 16:07:19 -07001230 /* aborted verify run. log the last position */
1231 if ((os.conn == C_VERIFY_S || os.conn == C_VERIFY_T) &&
1232 ns.conn < C_CONNECTED) {
1233 mdev->ov_start_sector =
Lars Ellenberg30b743a2010-11-05 09:39:06 +01001234 BM_BIT_TO_SECT(drbd_bm_bits(mdev) - mdev->ov_left);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001235 dev_info(DEV, "Online Verify reached sector %llu\n",
1236 (unsigned long long)mdev->ov_start_sector);
1237 }
1238
1239 if ((os.conn == C_PAUSED_SYNC_T || os.conn == C_PAUSED_SYNC_S) &&
1240 (ns.conn == C_SYNC_TARGET || ns.conn == C_SYNC_SOURCE)) {
1241 dev_info(DEV, "Syncer continues.\n");
Lars Ellenberg1d7734a2010-08-11 21:21:50 +02001242 mdev->rs_paused += (long)jiffies
1243 -(long)mdev->rs_mark_time[mdev->rs_last_mark];
Philipp Reisner63106d32010-09-01 15:47:15 +02001244 if (ns.conn == C_SYNC_TARGET)
1245 mod_timer(&mdev->resync_timer, jiffies);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001246 }
1247
1248 if ((os.conn == C_SYNC_TARGET || os.conn == C_SYNC_SOURCE) &&
1249 (ns.conn == C_PAUSED_SYNC_T || ns.conn == C_PAUSED_SYNC_S)) {
1250 dev_info(DEV, "Resync suspended\n");
Lars Ellenberg1d7734a2010-08-11 21:21:50 +02001251 mdev->rs_mark_time[mdev->rs_last_mark] = jiffies;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001252 }
1253
1254 if (os.conn == C_CONNECTED &&
1255 (ns.conn == C_VERIFY_S || ns.conn == C_VERIFY_T)) {
Lars Ellenberg1d7734a2010-08-11 21:21:50 +02001256 unsigned long now = jiffies;
1257 int i;
1258
Lars Ellenberg30b743a2010-11-05 09:39:06 +01001259 set_ov_position(mdev, ns.conn);
Lars Ellenberg1d7734a2010-08-11 21:21:50 +02001260 mdev->rs_start = now;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02001261 mdev->rs_last_events = 0;
1262 mdev->rs_last_sect_ev = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001263 mdev->ov_last_oos_size = 0;
1264 mdev->ov_last_oos_start = 0;
1265
Lars Ellenberg1d7734a2010-08-11 21:21:50 +02001266 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
Lars Ellenberg30b743a2010-11-05 09:39:06 +01001267 mdev->rs_mark_left[i] = mdev->ov_left;
Lars Ellenberg1d7734a2010-08-11 21:21:50 +02001268 mdev->rs_mark_time[i] = now;
1269 }
1270
Lars Ellenberg2649f082010-11-05 10:05:47 +01001271 drbd_rs_controller_reset(mdev);
1272
Philipp Reisnerb411b362009-09-25 16:07:19 -07001273 if (ns.conn == C_VERIFY_S) {
1274 dev_info(DEV, "Starting Online Verify from sector %llu\n",
1275 (unsigned long long)mdev->ov_position);
1276 mod_timer(&mdev->resync_timer, jiffies);
1277 }
1278 }
1279
1280 if (get_ldev(mdev)) {
1281 u32 mdf = mdev->ldev->md.flags & ~(MDF_CONSISTENT|MDF_PRIMARY_IND|
1282 MDF_CONNECTED_IND|MDF_WAS_UP_TO_DATE|
1283 MDF_PEER_OUT_DATED|MDF_CRASHED_PRIMARY);
1284
1285 if (test_bit(CRASHED_PRIMARY, &mdev->flags))
1286 mdf |= MDF_CRASHED_PRIMARY;
1287 if (mdev->state.role == R_PRIMARY ||
1288 (mdev->state.pdsk < D_INCONSISTENT && mdev->state.peer == R_PRIMARY))
1289 mdf |= MDF_PRIMARY_IND;
1290 if (mdev->state.conn > C_WF_REPORT_PARAMS)
1291 mdf |= MDF_CONNECTED_IND;
1292 if (mdev->state.disk > D_INCONSISTENT)
1293 mdf |= MDF_CONSISTENT;
1294 if (mdev->state.disk > D_OUTDATED)
1295 mdf |= MDF_WAS_UP_TO_DATE;
1296 if (mdev->state.pdsk <= D_OUTDATED && mdev->state.pdsk >= D_INCONSISTENT)
1297 mdf |= MDF_PEER_OUT_DATED;
1298 if (mdf != mdev->ldev->md.flags) {
1299 mdev->ldev->md.flags = mdf;
1300 drbd_md_mark_dirty(mdev);
1301 }
1302 if (os.disk < D_CONSISTENT && ns.disk >= D_CONSISTENT)
1303 drbd_set_ed_uuid(mdev, mdev->ldev->md.uuid[UI_CURRENT]);
1304 put_ldev(mdev);
1305 }
1306
1307 /* Peer was forced D_UP_TO_DATE & R_PRIMARY, consider to resync */
1308 if (os.disk == D_INCONSISTENT && os.pdsk == D_INCONSISTENT &&
1309 os.peer == R_SECONDARY && ns.peer == R_PRIMARY)
1310 set_bit(CONSIDER_RESYNC, &mdev->flags);
1311
1312 /* Receiver should clean up itself */
1313 if (os.conn != C_DISCONNECTING && ns.conn == C_DISCONNECTING)
1314 drbd_thread_stop_nowait(&mdev->receiver);
1315
1316 /* Now the receiver finished cleaning up itself, it should die */
1317 if (os.conn != C_STANDALONE && ns.conn == C_STANDALONE)
1318 drbd_thread_stop_nowait(&mdev->receiver);
1319
1320 /* Upon network failure, we need to restart the receiver. */
Philipp Reisner1e86ac42011-08-04 10:33:08 +02001321 if (os.conn > C_WF_CONNECTION &&
Philipp Reisnerb411b362009-09-25 16:07:19 -07001322 ns.conn <= C_TEAR_DOWN && ns.conn >= C_TIMEOUT)
1323 drbd_thread_restart_nowait(&mdev->receiver);
1324
Philipp Reisner07782862010-08-31 12:00:50 +02001325 /* Resume AL writing if we get a connection */
1326 if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED)
1327 drbd_resume_al(mdev);
1328
Philipp Reisnerb411b362009-09-25 16:07:19 -07001329 ascw = kmalloc(sizeof(*ascw), GFP_ATOMIC);
1330 if (ascw) {
1331 ascw->os = os;
1332 ascw->ns = ns;
1333 ascw->flags = flags;
1334 ascw->w.cb = w_after_state_ch;
1335 ascw->done = done;
1336 drbd_queue_work(&mdev->data.work, &ascw->w);
1337 } else {
1338 dev_warn(DEV, "Could not kmalloc an ascw\n");
1339 }
1340
1341 return rv;
1342}
1343
1344static int w_after_state_ch(struct drbd_conf *mdev, struct drbd_work *w, int unused)
1345{
1346 struct after_state_chg_work *ascw =
1347 container_of(w, struct after_state_chg_work, w);
1348 after_state_ch(mdev, ascw->os, ascw->ns, ascw->flags);
1349 if (ascw->flags & CS_WAIT_COMPLETE) {
1350 D_ASSERT(ascw->done != NULL);
1351 complete(ascw->done);
1352 }
1353 kfree(ascw);
1354
1355 return 1;
1356}
1357
1358static void abw_start_sync(struct drbd_conf *mdev, int rv)
1359{
1360 if (rv) {
1361 dev_err(DEV, "Writing the bitmap failed not starting resync.\n");
1362 _drbd_request_state(mdev, NS(conn, C_CONNECTED), CS_VERBOSE);
1363 return;
1364 }
1365
1366 switch (mdev->state.conn) {
1367 case C_STARTING_SYNC_T:
1368 _drbd_request_state(mdev, NS(conn, C_WF_SYNC_UUID), CS_VERBOSE);
1369 break;
1370 case C_STARTING_SYNC_S:
1371 drbd_start_resync(mdev, C_SYNC_SOURCE);
1372 break;
1373 }
1374}
1375
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01001376int drbd_bitmap_io_from_worker(struct drbd_conf *mdev,
1377 int (*io_fn)(struct drbd_conf *),
1378 char *why, enum bm_flag flags)
Lars Ellenberg19f843a2010-12-15 08:59:11 +01001379{
1380 int rv;
1381
1382 D_ASSERT(current == mdev->worker.task);
1383
1384 /* open coded non-blocking drbd_suspend_io(mdev); */
1385 set_bit(SUSPEND_IO, &mdev->flags);
Lars Ellenberg19f843a2010-12-15 08:59:11 +01001386
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01001387 drbd_bm_lock(mdev, why, flags);
Lars Ellenberg19f843a2010-12-15 08:59:11 +01001388 rv = io_fn(mdev);
1389 drbd_bm_unlock(mdev);
1390
1391 drbd_resume_io(mdev);
1392
1393 return rv;
1394}
1395
Philipp Reisnerb411b362009-09-25 16:07:19 -07001396/**
1397 * after_state_ch() - Perform after state change actions that may sleep
1398 * @mdev: DRBD device.
1399 * @os: old state.
1400 * @ns: new state.
1401 * @flags: Flags
1402 */
1403static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
1404 union drbd_state ns, enum chg_state_flags flags)
1405{
1406 enum drbd_fencing_p fp;
Philipp Reisner67098932010-06-24 16:24:25 +02001407 enum drbd_req_event what = nothing;
Philipp Reisnerfb22c402010-09-08 23:20:21 +02001408 union drbd_state nsm = (union drbd_state){ .i = -1 };
Philipp Reisnerb411b362009-09-25 16:07:19 -07001409
1410 if (os.conn != C_CONNECTED && ns.conn == C_CONNECTED) {
1411 clear_bit(CRASHED_PRIMARY, &mdev->flags);
1412 if (mdev->p_uuid)
1413 mdev->p_uuid[UI_FLAGS] &= ~((u64)2);
1414 }
1415
1416 fp = FP_DONT_CARE;
1417 if (get_ldev(mdev)) {
1418 fp = mdev->ldev->dc.fencing;
1419 put_ldev(mdev);
1420 }
1421
1422 /* Inform userspace about the change... */
1423 drbd_bcast_state(mdev, ns);
1424
1425 if (!(os.role == R_PRIMARY && os.disk < D_UP_TO_DATE && os.pdsk < D_UP_TO_DATE) &&
1426 (ns.role == R_PRIMARY && ns.disk < D_UP_TO_DATE && ns.pdsk < D_UP_TO_DATE))
1427 drbd_khelper(mdev, "pri-on-incon-degr");
1428
1429 /* Here we have the actions that are performed after a
1430 state change. This function might sleep */
1431
Philipp Reisnerdfa8bed2011-06-29 14:06:08 +02001432 if (os.disk <= D_NEGOTIATING && ns.disk > D_NEGOTIATING)
1433 mod_timer(&mdev->request_timer, jiffies + HZ);
1434
Philipp Reisnerfb22c402010-09-08 23:20:21 +02001435 nsm.i = -1;
1436 if (ns.susp_nod) {
Philipp Reisner3f986882010-12-20 14:48:20 +01001437 if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED)
1438 what = resend;
Philipp Reisner265be2d2010-05-31 10:14:17 +02001439
Philipp Reisner79f16f52011-07-15 18:44:26 +02001440 if ((os.disk == D_ATTACHING || os.disk == D_NEGOTIATING) &&
1441 ns.disk > D_NEGOTIATING)
Philipp Reisner3f986882010-12-20 14:48:20 +01001442 what = restart_frozen_disk_io;
Philipp Reisnerfb22c402010-09-08 23:20:21 +02001443
Philipp Reisner3f986882010-12-20 14:48:20 +01001444 if (what != nothing)
1445 nsm.susp_nod = 0;
Philipp Reisner265be2d2010-05-31 10:14:17 +02001446 }
1447
Philipp Reisnerfb22c402010-09-08 23:20:21 +02001448 if (ns.susp_fen) {
Philipp Reisner43a51822010-06-11 11:26:34 +02001449 /* case1: The outdate peer handler is successful: */
1450 if (os.pdsk > D_OUTDATED && ns.pdsk <= D_OUTDATED) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001451 tl_clear(mdev);
Philipp Reisner43a51822010-06-11 11:26:34 +02001452 if (test_bit(NEW_CUR_UUID, &mdev->flags)) {
1453 drbd_uuid_new_current(mdev);
1454 clear_bit(NEW_CUR_UUID, &mdev->flags);
Philipp Reisner43a51822010-06-11 11:26:34 +02001455 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001456 spin_lock_irq(&mdev->req_lock);
Philipp Reisnerfb22c402010-09-08 23:20:21 +02001457 _drbd_set_state(_NS(mdev, susp_fen, 0), CS_VERBOSE, NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001458 spin_unlock_irq(&mdev->req_lock);
1459 }
Philipp Reisner43a51822010-06-11 11:26:34 +02001460 /* case2: The connection was established again: */
1461 if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED) {
1462 clear_bit(NEW_CUR_UUID, &mdev->flags);
Philipp Reisner67098932010-06-24 16:24:25 +02001463 what = resend;
Philipp Reisnerfb22c402010-09-08 23:20:21 +02001464 nsm.susp_fen = 0;
Philipp Reisner43a51822010-06-11 11:26:34 +02001465 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001466 }
Philipp Reisner67098932010-06-24 16:24:25 +02001467
1468 if (what != nothing) {
1469 spin_lock_irq(&mdev->req_lock);
1470 _tl_restart(mdev, what);
Philipp Reisnerfb22c402010-09-08 23:20:21 +02001471 nsm.i &= mdev->state.i;
1472 _drbd_set_state(mdev, nsm, CS_VERBOSE, NULL);
Philipp Reisner67098932010-06-24 16:24:25 +02001473 spin_unlock_irq(&mdev->req_lock);
1474 }
1475
Lars Ellenberg5a22db82010-12-17 21:14:23 +01001476 /* Became sync source. With protocol >= 96, we still need to send out
1477 * the sync uuid now. Need to do that before any drbd_send_state, or
1478 * the other side may go "paused sync" before receiving the sync uuids,
1479 * which is unexpected. */
1480 if ((os.conn != C_SYNC_SOURCE && os.conn != C_PAUSED_SYNC_S) &&
1481 (ns.conn == C_SYNC_SOURCE || ns.conn == C_PAUSED_SYNC_S) &&
1482 mdev->agreed_pro_version >= 96 && get_ldev(mdev)) {
1483 drbd_gen_and_send_sync_uuid(mdev);
1484 put_ldev(mdev);
1485 }
1486
Philipp Reisnerb411b362009-09-25 16:07:19 -07001487 /* Do not change the order of the if above and the two below... */
1488 if (os.pdsk == D_DISKLESS && ns.pdsk > D_DISKLESS) { /* attach on the peer */
1489 drbd_send_uuids(mdev);
Lars Ellenbergf479ea02011-10-27 16:52:30 +02001490 drbd_send_state(mdev, ns);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001491 }
Lars Ellenberg54b956a2011-01-20 10:47:53 +01001492 /* No point in queuing send_bitmap if we don't have a connection
1493 * anymore, so check also the _current_ state, not only the new state
1494 * at the time this work was queued. */
1495 if (os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S &&
1496 mdev->state.conn == C_WF_BITMAP_S)
1497 drbd_queue_bitmap_io(mdev, &drbd_send_bitmap, NULL,
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01001498 "send_bitmap (WFBitMapS)",
1499 BM_LOCKED_TEST_ALLOWED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001500
1501 /* Lost contact to peer's copy of the data */
1502 if ((os.pdsk >= D_INCONSISTENT &&
1503 os.pdsk != D_UNKNOWN &&
1504 os.pdsk != D_OUTDATED)
1505 && (ns.pdsk < D_INCONSISTENT ||
1506 ns.pdsk == D_UNKNOWN ||
1507 ns.pdsk == D_OUTDATED)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07001508 if (get_ldev(mdev)) {
1509 if ((ns.role == R_PRIMARY || ns.peer == R_PRIMARY) &&
Philipp Reisner2c8d1962010-05-25 14:32:03 +02001510 mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) {
Philipp Reisnerfb22c402010-09-08 23:20:21 +02001511 if (is_susp(mdev->state)) {
Philipp Reisner43a51822010-06-11 11:26:34 +02001512 set_bit(NEW_CUR_UUID, &mdev->flags);
1513 } else {
1514 drbd_uuid_new_current(mdev);
1515 drbd_send_uuids(mdev);
1516 }
Philipp Reisner2c8d1962010-05-25 14:32:03 +02001517 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001518 put_ldev(mdev);
1519 }
1520 }
1521
1522 if (ns.pdsk < D_INCONSISTENT && get_ldev(mdev)) {
Philipp Reisnerbca482e2011-07-15 12:14:27 +02001523 if (os.peer == R_SECONDARY && ns.peer == R_PRIMARY &&
1524 mdev->ldev->md.uuid[UI_BITMAP] == 0 && ns.disk >= D_UP_TO_DATE) {
Philipp Reisner2c8d1962010-05-25 14:32:03 +02001525 drbd_uuid_new_current(mdev);
Philipp Reisner18a50fa2010-06-21 14:14:15 +02001526 drbd_send_uuids(mdev);
1527 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07001528 /* D_DISKLESS Peer becomes secondary */
1529 if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY)
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01001530 /* We may still be Primary ourselves.
1531 * No harm done if the bitmap still changes,
1532 * redirtied pages will follow later. */
1533 drbd_bitmap_io_from_worker(mdev, &drbd_bm_write,
1534 "demote diskless peer", BM_LOCKED_SET_ALLOWED);
Lars Ellenberg19f843a2010-12-15 08:59:11 +01001535 put_ldev(mdev);
1536 }
1537
Lars Ellenberg06d33e92010-12-18 17:00:59 +01001538 /* Write out all changed bits on demote.
1539 * Though, no need to da that just yet
1540 * if there is a resync going on still */
1541 if (os.role == R_PRIMARY && ns.role == R_SECONDARY &&
1542 mdev->state.conn <= C_CONNECTED && get_ldev(mdev)) {
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01001543 /* No changes to the bitmap expected this time, so assert that,
1544 * even though no harm was done if it did change. */
1545 drbd_bitmap_io_from_worker(mdev, &drbd_bm_write,
1546 "demote", BM_LOCKED_TEST_ALLOWED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001547 put_ldev(mdev);
1548 }
1549
1550 /* Last part of the attaching process ... */
1551 if (ns.conn >= C_CONNECTED &&
1552 os.disk == D_ATTACHING && ns.disk == D_NEGOTIATING) {
Philipp Reisnere89b5912010-03-24 17:11:33 +01001553 drbd_send_sizes(mdev, 0, 0); /* to start sync... */
Philipp Reisnerb411b362009-09-25 16:07:19 -07001554 drbd_send_uuids(mdev);
Lars Ellenbergf479ea02011-10-27 16:52:30 +02001555 drbd_send_state(mdev, ns);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001556 }
1557
1558 /* We want to pause/continue resync, tell peer. */
1559 if (ns.conn >= C_CONNECTED &&
1560 ((os.aftr_isp != ns.aftr_isp) ||
1561 (os.user_isp != ns.user_isp)))
Lars Ellenbergf479ea02011-10-27 16:52:30 +02001562 drbd_send_state(mdev, ns);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001563
1564 /* In case one of the isp bits got set, suspend other devices. */
1565 if ((!os.aftr_isp && !os.peer_isp && !os.user_isp) &&
1566 (ns.aftr_isp || ns.peer_isp || ns.user_isp))
1567 suspend_other_sg(mdev);
1568
1569 /* Make sure the peer gets informed about eventual state
1570 changes (ISP bits) while we were in WFReportParams. */
1571 if (os.conn == C_WF_REPORT_PARAMS && ns.conn >= C_CONNECTED)
Lars Ellenbergf479ea02011-10-27 16:52:30 +02001572 drbd_send_state(mdev, ns);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001573
Philipp Reisner67531712010-10-27 12:21:30 +02001574 if (os.conn != C_AHEAD && ns.conn == C_AHEAD)
Lars Ellenbergf479ea02011-10-27 16:52:30 +02001575 drbd_send_state(mdev, ns);
Philipp Reisner67531712010-10-27 12:21:30 +02001576
Philipp Reisnerb411b362009-09-25 16:07:19 -07001577 /* We are in the progress to start a full sync... */
1578 if ((os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) ||
1579 (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S))
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01001580 /* no other bitmap changes expected during this phase */
1581 drbd_queue_bitmap_io(mdev,
1582 &drbd_bmio_set_n_write, &abw_start_sync,
1583 "set_n_write from StartingSync", BM_LOCKED_TEST_ALLOWED);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001584
1585 /* We are invalidating our self... */
1586 if (os.conn < C_CONNECTED && ns.conn < C_CONNECTED &&
1587 os.disk > D_INCONSISTENT && ns.disk == D_INCONSISTENT)
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01001588 /* other bitmap operation expected during this phase */
1589 drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, NULL,
1590 "set_n_write from invalidate", BM_LOCKED_MASK);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001591
Lars Ellenberg82f59cc2010-10-16 12:13:47 +02001592 /* first half of local IO error, failure to attach,
1593 * or administrative detach */
1594 if (os.disk != D_FAILED && ns.disk == D_FAILED) {
1595 enum drbd_io_error_p eh;
1596 int was_io_error;
1597 /* corresponding get_ldev was in __drbd_set_state, to serialize
1598 * our cleanup here with the transition to D_DISKLESS,
1599 * so it is safe to dreference ldev here. */
1600 eh = mdev->ldev->dc.on_io_error;
1601 was_io_error = test_and_clear_bit(WAS_IO_ERROR, &mdev->flags);
1602
Philipp Reisner2b4dd362011-03-14 13:01:50 +01001603 /* Immediately allow completion of all application IO, that waits
1604 for completion from the local disk. */
Philipp Reisnerfd2491f2011-07-18 16:25:15 +02001605 tl_abort_disk_io(mdev);
Philipp Reisner2b4dd362011-03-14 13:01:50 +01001606
Lars Ellenberg82f59cc2010-10-16 12:13:47 +02001607 /* current state still has to be D_FAILED,
1608 * there is only one way out: to D_DISKLESS,
1609 * and that may only happen after our put_ldev below. */
1610 if (mdev->state.disk != D_FAILED)
1611 dev_err(DEV,
1612 "ASSERT FAILED: disk is %s during detach\n",
1613 drbd_disk_str(mdev->state.disk));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001614
Lars Ellenbergf479ea02011-10-27 16:52:30 +02001615 if (drbd_send_state(mdev, ns))
Lars Ellenberg07667342011-06-21 01:13:37 +02001616 dev_info(DEV, "Notified peer that I am detaching my disk\n");
Lars Ellenberge9e6f3e2010-09-14 20:26:27 +02001617
1618 drbd_rs_cancel_all(mdev);
1619
Lars Ellenberg82f59cc2010-10-16 12:13:47 +02001620 /* In case we want to get something to stable storage still,
1621 * this may be the last chance.
1622 * Following put_ldev may transition to D_DISKLESS. */
1623 drbd_md_sync(mdev);
1624 put_ldev(mdev);
1625
1626 if (was_io_error && eh == EP_CALL_HELPER)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001627 drbd_khelper(mdev, "local-io-error");
1628 }
1629
Lars Ellenberg82f59cc2010-10-16 12:13:47 +02001630 /* second half of local IO error, failure to attach,
1631 * or administrative detach,
1632 * after local_cnt references have reached zero again */
1633 if (os.disk != D_DISKLESS && ns.disk == D_DISKLESS) {
1634 /* We must still be diskless,
1635 * re-attach has to be serialized with this! */
1636 if (mdev->state.disk != D_DISKLESS)
1637 dev_err(DEV,
1638 "ASSERT FAILED: disk is %s while going diskless\n",
1639 drbd_disk_str(mdev->state.disk));
Lars Ellenberge9e6f3e2010-09-14 20:26:27 +02001640
Lars Ellenberg82f59cc2010-10-16 12:13:47 +02001641 mdev->rs_total = 0;
1642 mdev->rs_failed = 0;
1643 atomic_set(&mdev->rs_pending_cnt, 0);
Lars Ellenberge9e6f3e2010-09-14 20:26:27 +02001644
Lars Ellenbergf479ea02011-10-27 16:52:30 +02001645 if (drbd_send_state(mdev, ns))
Lars Ellenberg07667342011-06-21 01:13:37 +02001646 dev_info(DEV, "Notified peer that I'm now diskless.\n");
Lars Ellenberg82f59cc2010-10-16 12:13:47 +02001647 /* corresponding get_ldev in __drbd_set_state
Lucas De Marchi25985ed2011-03-30 22:57:33 -03001648 * this may finally trigger drbd_ldev_destroy. */
Lars Ellenberg82f59cc2010-10-16 12:13:47 +02001649 put_ldev(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07001650 }
1651
Philipp Reisner738a84b2011-03-03 00:21:30 +01001652 /* Notify peer that I had a local IO error, and did not detached.. */
1653 if (os.disk == D_UP_TO_DATE && ns.disk == D_INCONSISTENT)
Lars Ellenbergf479ea02011-10-27 16:52:30 +02001654 drbd_send_state(mdev, ns);
Philipp Reisner738a84b2011-03-03 00:21:30 +01001655
Philipp Reisnerb411b362009-09-25 16:07:19 -07001656 /* Disks got bigger while they were detached */
1657 if (ns.disk > D_NEGOTIATING && ns.pdsk > D_NEGOTIATING &&
1658 test_and_clear_bit(RESYNC_AFTER_NEG, &mdev->flags)) {
1659 if (ns.conn == C_CONNECTED)
1660 resync_after_online_grow(mdev);
1661 }
1662
1663 /* A resync finished or aborted, wake paused devices... */
1664 if ((os.conn > C_CONNECTED && ns.conn <= C_CONNECTED) ||
1665 (os.peer_isp && !ns.peer_isp) ||
1666 (os.user_isp && !ns.user_isp))
1667 resume_next_sg(mdev);
1668
Lars Ellenbergaf85e8e2010-10-07 16:07:55 +02001669 /* sync target done with resync. Explicitly notify peer, even though
1670 * it should (at least for non-empty resyncs) already know itself. */
1671 if (os.disk < D_UP_TO_DATE && os.conn >= C_SYNC_SOURCE && ns.conn == C_CONNECTED)
Lars Ellenbergf479ea02011-10-27 16:52:30 +02001672 drbd_send_state(mdev, ns);
Lars Ellenbergaf85e8e2010-10-07 16:07:55 +02001673
Lars Ellenberg79a30d22011-01-20 10:32:05 +01001674 /* This triggers bitmap writeout of potentially still unwritten pages
1675 * if the resync finished cleanly, or aborted because of peer disk
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01001676 * failure, or because of connection loss.
Lars Ellenberg79a30d22011-01-20 10:32:05 +01001677 * For resync aborted because of local disk failure, we cannot do
1678 * any bitmap writeout anymore.
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01001679 * No harm done if some bits change during this phase.
Lars Ellenberg79a30d22011-01-20 10:32:05 +01001680 */
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01001681 if (os.conn > C_CONNECTED && ns.conn <= C_CONNECTED && get_ldev(mdev)) {
1682 drbd_queue_bitmap_io(mdev, &drbd_bm_write, NULL,
1683 "write from resync_finished", BM_LOCKED_SET_ALLOWED);
Lars Ellenberg79a30d22011-01-20 10:32:05 +01001684 put_ldev(mdev);
1685 }
Lars Ellenberg02851e92010-12-16 14:47:39 +01001686
Philipp Reisnerf70b35112010-06-24 14:34:40 +02001687 /* free tl_hash if we Got thawed and are C_STANDALONE */
Philipp Reisnerfb22c402010-09-08 23:20:21 +02001688 if (ns.conn == C_STANDALONE && !is_susp(ns) && mdev->tl_hash)
Philipp Reisnerf70b35112010-06-24 14:34:40 +02001689 drbd_free_tl_hash(mdev);
1690
Philipp Reisnerb411b362009-09-25 16:07:19 -07001691 /* Upon network connection, we need to start the receiver */
1692 if (os.conn == C_STANDALONE && ns.conn == C_UNCONNECTED)
1693 drbd_thread_start(&mdev->receiver);
1694
1695 /* Terminate worker thread if we are unconfigured - it will be
1696 restarted as needed... */
1697 if (ns.disk == D_DISKLESS &&
1698 ns.conn == C_STANDALONE &&
1699 ns.role == R_SECONDARY) {
1700 if (os.aftr_isp != ns.aftr_isp)
1701 resume_next_sg(mdev);
1702 /* set in __drbd_set_state, unless CONFIG_PENDING was set */
1703 if (test_bit(DEVICE_DYING, &mdev->flags))
1704 drbd_thread_stop_nowait(&mdev->worker);
1705 }
1706
1707 drbd_md_sync(mdev);
1708}
1709
1710
1711static int drbd_thread_setup(void *arg)
1712{
1713 struct drbd_thread *thi = (struct drbd_thread *) arg;
1714 struct drbd_conf *mdev = thi->mdev;
1715 unsigned long flags;
1716 int retval;
1717
1718restart:
1719 retval = thi->function(thi);
1720
1721 spin_lock_irqsave(&thi->t_lock, flags);
1722
1723 /* if the receiver has been "Exiting", the last thing it did
1724 * was set the conn state to "StandAlone",
1725 * if now a re-connect request comes in, conn state goes C_UNCONNECTED,
1726 * and receiver thread will be "started".
1727 * drbd_thread_start needs to set "Restarting" in that case.
1728 * t_state check and assignment needs to be within the same spinlock,
1729 * so either thread_start sees Exiting, and can remap to Restarting,
1730 * or thread_start see None, and can proceed as normal.
1731 */
1732
1733 if (thi->t_state == Restarting) {
1734 dev_info(DEV, "Restarting %s\n", current->comm);
1735 thi->t_state = Running;
1736 spin_unlock_irqrestore(&thi->t_lock, flags);
1737 goto restart;
1738 }
1739
1740 thi->task = NULL;
1741 thi->t_state = None;
1742 smp_mb();
1743 complete(&thi->stop);
1744 spin_unlock_irqrestore(&thi->t_lock, flags);
1745
1746 dev_info(DEV, "Terminating %s\n", current->comm);
1747
1748 /* Release mod reference taken when thread was started */
1749 module_put(THIS_MODULE);
1750 return retval;
1751}
1752
1753static void drbd_thread_init(struct drbd_conf *mdev, struct drbd_thread *thi,
1754 int (*func) (struct drbd_thread *))
1755{
1756 spin_lock_init(&thi->t_lock);
1757 thi->task = NULL;
1758 thi->t_state = None;
1759 thi->function = func;
1760 thi->mdev = mdev;
1761}
1762
1763int drbd_thread_start(struct drbd_thread *thi)
1764{
1765 struct drbd_conf *mdev = thi->mdev;
1766 struct task_struct *nt;
1767 unsigned long flags;
1768
1769 const char *me =
1770 thi == &mdev->receiver ? "receiver" :
1771 thi == &mdev->asender ? "asender" :
1772 thi == &mdev->worker ? "worker" : "NONSENSE";
1773
1774 /* is used from state engine doing drbd_thread_stop_nowait,
1775 * while holding the req lock irqsave */
1776 spin_lock_irqsave(&thi->t_lock, flags);
1777
1778 switch (thi->t_state) {
1779 case None:
1780 dev_info(DEV, "Starting %s thread (from %s [%d])\n",
1781 me, current->comm, current->pid);
1782
1783 /* Get ref on module for thread - this is released when thread exits */
1784 if (!try_module_get(THIS_MODULE)) {
1785 dev_err(DEV, "Failed to get module reference in drbd_thread_start\n");
1786 spin_unlock_irqrestore(&thi->t_lock, flags);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001787 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001788 }
1789
1790 init_completion(&thi->stop);
1791 D_ASSERT(thi->task == NULL);
1792 thi->reset_cpu_mask = 1;
1793 thi->t_state = Running;
1794 spin_unlock_irqrestore(&thi->t_lock, flags);
1795 flush_signals(current); /* otherw. may get -ERESTARTNOINTR */
1796
1797 nt = kthread_create(drbd_thread_setup, (void *) thi,
1798 "drbd%d_%s", mdev_to_minor(mdev), me);
1799
1800 if (IS_ERR(nt)) {
1801 dev_err(DEV, "Couldn't start thread\n");
1802
1803 module_put(THIS_MODULE);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001804 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001805 }
1806 spin_lock_irqsave(&thi->t_lock, flags);
1807 thi->task = nt;
1808 thi->t_state = Running;
1809 spin_unlock_irqrestore(&thi->t_lock, flags);
1810 wake_up_process(nt);
1811 break;
1812 case Exiting:
1813 thi->t_state = Restarting;
1814 dev_info(DEV, "Restarting %s thread (from %s [%d])\n",
1815 me, current->comm, current->pid);
1816 /* fall through */
1817 case Running:
1818 case Restarting:
1819 default:
1820 spin_unlock_irqrestore(&thi->t_lock, flags);
1821 break;
1822 }
1823
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001824 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001825}
1826
1827
1828void _drbd_thread_stop(struct drbd_thread *thi, int restart, int wait)
1829{
1830 unsigned long flags;
1831
1832 enum drbd_thread_state ns = restart ? Restarting : Exiting;
1833
1834 /* may be called from state engine, holding the req lock irqsave */
1835 spin_lock_irqsave(&thi->t_lock, flags);
1836
1837 if (thi->t_state == None) {
1838 spin_unlock_irqrestore(&thi->t_lock, flags);
1839 if (restart)
1840 drbd_thread_start(thi);
1841 return;
1842 }
1843
1844 if (thi->t_state != ns) {
1845 if (thi->task == NULL) {
1846 spin_unlock_irqrestore(&thi->t_lock, flags);
1847 return;
1848 }
1849
1850 thi->t_state = ns;
1851 smp_mb();
1852 init_completion(&thi->stop);
1853 if (thi->task != current)
1854 force_sig(DRBD_SIGKILL, thi->task);
1855
1856 }
1857
1858 spin_unlock_irqrestore(&thi->t_lock, flags);
1859
1860 if (wait)
1861 wait_for_completion(&thi->stop);
1862}
1863
1864#ifdef CONFIG_SMP
1865/**
1866 * drbd_calc_cpu_mask() - Generate CPU masks, spread over all CPUs
1867 * @mdev: DRBD device.
1868 *
1869 * Forces all threads of a device onto the same CPU. This is beneficial for
1870 * DRBD's performance. May be overwritten by user's configuration.
1871 */
1872void drbd_calc_cpu_mask(struct drbd_conf *mdev)
1873{
1874 int ord, cpu;
1875
1876 /* user override. */
1877 if (cpumask_weight(mdev->cpu_mask))
1878 return;
1879
1880 ord = mdev_to_minor(mdev) % cpumask_weight(cpu_online_mask);
1881 for_each_online_cpu(cpu) {
1882 if (ord-- == 0) {
1883 cpumask_set_cpu(cpu, mdev->cpu_mask);
1884 return;
1885 }
1886 }
1887 /* should not be reached */
1888 cpumask_setall(mdev->cpu_mask);
1889}
1890
1891/**
1892 * drbd_thread_current_set_cpu() - modifies the cpu mask of the _current_ thread
1893 * @mdev: DRBD device.
1894 *
1895 * call in the "main loop" of _all_ threads, no need for any mutex, current won't die
1896 * prematurely.
1897 */
1898void drbd_thread_current_set_cpu(struct drbd_conf *mdev)
1899{
1900 struct task_struct *p = current;
1901 struct drbd_thread *thi =
1902 p == mdev->asender.task ? &mdev->asender :
1903 p == mdev->receiver.task ? &mdev->receiver :
1904 p == mdev->worker.task ? &mdev->worker :
1905 NULL;
1906 ERR_IF(thi == NULL)
1907 return;
1908 if (!thi->reset_cpu_mask)
1909 return;
1910 thi->reset_cpu_mask = 0;
1911 set_cpus_allowed_ptr(p, mdev->cpu_mask);
1912}
1913#endif
1914
1915/* the appropriate socket mutex must be held already */
1916int _drbd_send_cmd(struct drbd_conf *mdev, struct socket *sock,
Philipp Reisner0b70a132010-08-20 13:36:10 +02001917 enum drbd_packets cmd, struct p_header80 *h,
Philipp Reisnerb411b362009-09-25 16:07:19 -07001918 size_t size, unsigned msg_flags)
1919{
1920 int sent, ok;
1921
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01001922 ERR_IF(!h) return false;
1923 ERR_IF(!size) return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001924
1925 h->magic = BE_DRBD_MAGIC;
1926 h->command = cpu_to_be16(cmd);
Philipp Reisner0b70a132010-08-20 13:36:10 +02001927 h->length = cpu_to_be16(size-sizeof(struct p_header80));
Philipp Reisnerb411b362009-09-25 16:07:19 -07001928
Philipp Reisnerb411b362009-09-25 16:07:19 -07001929 sent = drbd_send(mdev, sock, h, size, msg_flags);
1930
1931 ok = (sent == size);
Lars Ellenberg0ddc5542011-01-21 12:35:15 +01001932 if (!ok && !signal_pending(current))
1933 dev_warn(DEV, "short sent %s size=%d sent=%d\n",
Philipp Reisnerb411b362009-09-25 16:07:19 -07001934 cmdname(cmd), (int)size, sent);
1935 return ok;
1936}
1937
1938/* don't pass the socket. we may only look at it
1939 * when we hold the appropriate socket mutex.
1940 */
1941int drbd_send_cmd(struct drbd_conf *mdev, int use_data_socket,
Philipp Reisner0b70a132010-08-20 13:36:10 +02001942 enum drbd_packets cmd, struct p_header80 *h, size_t size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07001943{
1944 int ok = 0;
1945 struct socket *sock;
1946
1947 if (use_data_socket) {
1948 mutex_lock(&mdev->data.mutex);
1949 sock = mdev->data.socket;
1950 } else {
1951 mutex_lock(&mdev->meta.mutex);
1952 sock = mdev->meta.socket;
1953 }
1954
1955 /* drbd_disconnect() could have called drbd_free_sock()
1956 * while we were waiting in down()... */
1957 if (likely(sock != NULL))
1958 ok = _drbd_send_cmd(mdev, sock, cmd, h, size, 0);
1959
1960 if (use_data_socket)
1961 mutex_unlock(&mdev->data.mutex);
1962 else
1963 mutex_unlock(&mdev->meta.mutex);
1964 return ok;
1965}
1966
1967int drbd_send_cmd2(struct drbd_conf *mdev, enum drbd_packets cmd, char *data,
1968 size_t size)
1969{
Philipp Reisner0b70a132010-08-20 13:36:10 +02001970 struct p_header80 h;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001971 int ok;
1972
1973 h.magic = BE_DRBD_MAGIC;
1974 h.command = cpu_to_be16(cmd);
1975 h.length = cpu_to_be16(size);
1976
1977 if (!drbd_get_data_sock(mdev))
1978 return 0;
1979
Philipp Reisnerb411b362009-09-25 16:07:19 -07001980 ok = (sizeof(h) ==
1981 drbd_send(mdev, mdev->data.socket, &h, sizeof(h), 0));
1982 ok = ok && (size ==
1983 drbd_send(mdev, mdev->data.socket, data, size, 0));
1984
1985 drbd_put_data_sock(mdev);
1986
1987 return ok;
1988}
1989
1990int drbd_send_sync_param(struct drbd_conf *mdev, struct syncer_conf *sc)
1991{
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02001992 struct p_rs_param_95 *p;
Philipp Reisnerb411b362009-09-25 16:07:19 -07001993 struct socket *sock;
1994 int size, rv;
1995 const int apv = mdev->agreed_pro_version;
1996
1997 size = apv <= 87 ? sizeof(struct p_rs_param)
1998 : apv == 88 ? sizeof(struct p_rs_param)
1999 + strlen(mdev->sync_conf.verify_alg) + 1
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002000 : apv <= 94 ? sizeof(struct p_rs_param_89)
2001 : /* apv >= 95 */ sizeof(struct p_rs_param_95);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002002
2003 /* used from admin command context and receiver/worker context.
2004 * to avoid kmalloc, grab the socket right here,
2005 * then use the pre-allocated sbuf there */
2006 mutex_lock(&mdev->data.mutex);
2007 sock = mdev->data.socket;
2008
2009 if (likely(sock != NULL)) {
2010 enum drbd_packets cmd = apv >= 89 ? P_SYNC_PARAM89 : P_SYNC_PARAM;
2011
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002012 p = &mdev->data.sbuf.rs_param_95;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002013
2014 /* initialize verify_alg and csums_alg */
2015 memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX);
2016
2017 p->rate = cpu_to_be32(sc->rate);
Philipp Reisner8e26f9c2010-07-06 17:25:54 +02002018 p->c_plan_ahead = cpu_to_be32(sc->c_plan_ahead);
2019 p->c_delay_target = cpu_to_be32(sc->c_delay_target);
2020 p->c_fill_target = cpu_to_be32(sc->c_fill_target);
2021 p->c_max_rate = cpu_to_be32(sc->c_max_rate);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002022
2023 if (apv >= 88)
2024 strcpy(p->verify_alg, mdev->sync_conf.verify_alg);
2025 if (apv >= 89)
2026 strcpy(p->csums_alg, mdev->sync_conf.csums_alg);
2027
2028 rv = _drbd_send_cmd(mdev, sock, cmd, &p->head, size, 0);
2029 } else
2030 rv = 0; /* not ok */
2031
2032 mutex_unlock(&mdev->data.mutex);
2033
2034 return rv;
2035}
2036
2037int drbd_send_protocol(struct drbd_conf *mdev)
2038{
2039 struct p_protocol *p;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002040 int size, cf, rv;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002041
2042 size = sizeof(struct p_protocol);
2043
2044 if (mdev->agreed_pro_version >= 87)
2045 size += strlen(mdev->net_conf->integrity_alg) + 1;
2046
2047 /* we must not recurse into our own queue,
2048 * as that is blocked during handshake */
2049 p = kmalloc(size, GFP_NOIO);
2050 if (p == NULL)
2051 return 0;
2052
2053 p->protocol = cpu_to_be32(mdev->net_conf->wire_protocol);
2054 p->after_sb_0p = cpu_to_be32(mdev->net_conf->after_sb_0p);
2055 p->after_sb_1p = cpu_to_be32(mdev->net_conf->after_sb_1p);
2056 p->after_sb_2p = cpu_to_be32(mdev->net_conf->after_sb_2p);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002057 p->two_primaries = cpu_to_be32(mdev->net_conf->two_primaries);
2058
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002059 cf = 0;
2060 if (mdev->net_conf->want_lose)
2061 cf |= CF_WANT_LOSE;
2062 if (mdev->net_conf->dry_run) {
2063 if (mdev->agreed_pro_version >= 92)
2064 cf |= CF_DRY_RUN;
2065 else {
2066 dev_err(DEV, "--dry-run is not supported by peer");
Dan Carpenter7ac314c2010-04-22 14:27:23 +02002067 kfree(p);
Philipp Reisner148efa12011-01-15 00:21:15 +01002068 return -1;
Philipp Reisnercf14c2e2010-02-02 21:03:50 +01002069 }
2070 }
2071 p->conn_flags = cpu_to_be32(cf);
2072
Philipp Reisnerb411b362009-09-25 16:07:19 -07002073 if (mdev->agreed_pro_version >= 87)
2074 strcpy(p->integrity_alg, mdev->net_conf->integrity_alg);
2075
2076 rv = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_PROTOCOL,
Philipp Reisner0b70a132010-08-20 13:36:10 +02002077 (struct p_header80 *)p, size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002078 kfree(p);
2079 return rv;
2080}
2081
2082int _drbd_send_uuids(struct drbd_conf *mdev, u64 uuid_flags)
2083{
2084 struct p_uuids p;
2085 int i;
2086
2087 if (!get_ldev_if_state(mdev, D_NEGOTIATING))
2088 return 1;
2089
2090 for (i = UI_CURRENT; i < UI_SIZE; i++)
2091 p.uuid[i] = mdev->ldev ? cpu_to_be64(mdev->ldev->md.uuid[i]) : 0;
2092
2093 mdev->comm_bm_set = drbd_bm_total_weight(mdev);
2094 p.uuid[UI_SIZE] = cpu_to_be64(mdev->comm_bm_set);
2095 uuid_flags |= mdev->net_conf->want_lose ? 1 : 0;
2096 uuid_flags |= test_bit(CRASHED_PRIMARY, &mdev->flags) ? 2 : 0;
2097 uuid_flags |= mdev->new_state_tmp.disk == D_INCONSISTENT ? 4 : 0;
2098 p.uuid[UI_FLAGS] = cpu_to_be64(uuid_flags);
2099
2100 put_ldev(mdev);
2101
2102 return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_UUIDS,
Philipp Reisner0b70a132010-08-20 13:36:10 +02002103 (struct p_header80 *)&p, sizeof(p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07002104}
2105
2106int drbd_send_uuids(struct drbd_conf *mdev)
2107{
2108 return _drbd_send_uuids(mdev, 0);
2109}
2110
2111int drbd_send_uuids_skip_initial_sync(struct drbd_conf *mdev)
2112{
2113 return _drbd_send_uuids(mdev, 8);
2114}
2115
Lars Ellenberg62b0da32011-01-20 13:25:21 +01002116void drbd_print_uuids(struct drbd_conf *mdev, const char *text)
2117{
2118 if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
2119 u64 *uuid = mdev->ldev->md.uuid;
2120 dev_info(DEV, "%s %016llX:%016llX:%016llX:%016llX\n",
2121 text,
2122 (unsigned long long)uuid[UI_CURRENT],
2123 (unsigned long long)uuid[UI_BITMAP],
2124 (unsigned long long)uuid[UI_HISTORY_START],
2125 (unsigned long long)uuid[UI_HISTORY_END]);
2126 put_ldev(mdev);
2127 } else {
2128 dev_info(DEV, "%s effective data uuid: %016llX\n",
2129 text,
2130 (unsigned long long)mdev->ed_uuid);
2131 }
2132}
2133
Lars Ellenberg5a22db82010-12-17 21:14:23 +01002134int drbd_gen_and_send_sync_uuid(struct drbd_conf *mdev)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002135{
2136 struct p_rs_uuid p;
Lars Ellenberg5a22db82010-12-17 21:14:23 +01002137 u64 uuid;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002138
Lars Ellenberg5a22db82010-12-17 21:14:23 +01002139 D_ASSERT(mdev->state.disk == D_UP_TO_DATE);
2140
Philipp Reisner5ba3dac2011-10-05 15:54:18 +02002141 uuid = mdev->ldev->md.uuid[UI_BITMAP];
2142 if (uuid && uuid != UUID_JUST_CREATED)
2143 uuid = uuid + UUID_NEW_BM_OFFSET;
2144 else
2145 get_random_bytes(&uuid, sizeof(u64));
Lars Ellenberg5a22db82010-12-17 21:14:23 +01002146 drbd_uuid_set(mdev, UI_BITMAP, uuid);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01002147 drbd_print_uuids(mdev, "updated sync UUID");
Lars Ellenberg5a22db82010-12-17 21:14:23 +01002148 drbd_md_sync(mdev);
2149 p.uuid = cpu_to_be64(uuid);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002150
2151 return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SYNC_UUID,
Philipp Reisner0b70a132010-08-20 13:36:10 +02002152 (struct p_header80 *)&p, sizeof(p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07002153}
2154
Philipp Reisnere89b5912010-03-24 17:11:33 +01002155int drbd_send_sizes(struct drbd_conf *mdev, int trigger_reply, enum dds_flags flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002156{
2157 struct p_sizes p;
2158 sector_t d_size, u_size;
Philipp Reisner99432fc2011-05-20 16:39:13 +02002159 int q_order_type, max_bio_size;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002160 int ok;
2161
2162 if (get_ldev_if_state(mdev, D_NEGOTIATING)) {
2163 D_ASSERT(mdev->ldev->backing_bdev);
2164 d_size = drbd_get_max_capacity(mdev->ldev);
2165 u_size = mdev->ldev->dc.disk_size;
2166 q_order_type = drbd_queue_order_type(mdev);
Philipp Reisner99432fc2011-05-20 16:39:13 +02002167 max_bio_size = queue_max_hw_sectors(mdev->ldev->backing_bdev->bd_disk->queue) << 9;
2168 max_bio_size = min_t(int, max_bio_size, DRBD_MAX_BIO_SIZE);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002169 put_ldev(mdev);
2170 } else {
2171 d_size = 0;
2172 u_size = 0;
2173 q_order_type = QUEUE_ORDERED_NONE;
Philipp Reisner99432fc2011-05-20 16:39:13 +02002174 max_bio_size = DRBD_MAX_BIO_SIZE; /* ... multiple BIOs per peer_request */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002175 }
2176
Philipp Reisner68093842011-06-30 15:43:06 +02002177 /* Never allow old drbd (up to 8.3.7) to see more than 32KiB */
2178 if (mdev->agreed_pro_version <= 94)
2179 max_bio_size = min_t(int, max_bio_size, DRBD_MAX_SIZE_H80_PACKET);
2180
Philipp Reisnerb411b362009-09-25 16:07:19 -07002181 p.d_size = cpu_to_be64(d_size);
2182 p.u_size = cpu_to_be64(u_size);
2183 p.c_size = cpu_to_be64(trigger_reply ? 0 : drbd_get_capacity(mdev->this_bdev));
Philipp Reisner99432fc2011-05-20 16:39:13 +02002184 p.max_bio_size = cpu_to_be32(max_bio_size);
Philipp Reisnere89b5912010-03-24 17:11:33 +01002185 p.queue_order_type = cpu_to_be16(q_order_type);
2186 p.dds_flags = cpu_to_be16(flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002187
2188 ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_SIZES,
Philipp Reisner0b70a132010-08-20 13:36:10 +02002189 (struct p_header80 *)&p, sizeof(p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07002190 return ok;
2191}
2192
2193/**
Lars Ellenbergf479ea02011-10-27 16:52:30 +02002194 * drbd_send_current_state() - Sends the drbd state to the peer
Philipp Reisnerb411b362009-09-25 16:07:19 -07002195 * @mdev: DRBD device.
2196 */
Lars Ellenbergf479ea02011-10-27 16:52:30 +02002197int drbd_send_current_state(struct drbd_conf *mdev)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002198{
2199 struct socket *sock;
2200 struct p_state p;
2201 int ok = 0;
2202
2203 /* Grab state lock so we wont send state if we're in the middle
2204 * of a cluster wide state change on another thread */
2205 drbd_state_lock(mdev);
2206
2207 mutex_lock(&mdev->data.mutex);
2208
2209 p.state = cpu_to_be32(mdev->state.i); /* Within the send mutex */
2210 sock = mdev->data.socket;
2211
2212 if (likely(sock != NULL)) {
2213 ok = _drbd_send_cmd(mdev, sock, P_STATE,
Philipp Reisner0b70a132010-08-20 13:36:10 +02002214 (struct p_header80 *)&p, sizeof(p), 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002215 }
2216
2217 mutex_unlock(&mdev->data.mutex);
2218
2219 drbd_state_unlock(mdev);
2220 return ok;
2221}
2222
Lars Ellenbergf479ea02011-10-27 16:52:30 +02002223/**
2224 * drbd_send_state() - After a state change, sends the new state to the peer
2225 * @mdev: DRBD device.
2226 * @state: the state to send, not necessarily the current state.
2227 *
2228 * Each state change queues an "after_state_ch" work, which will eventually
2229 * send the resulting new state to the peer. If more state changes happen
2230 * between queuing and processing of the after_state_ch work, we still
2231 * want to send each intermediary state in the order it occurred.
2232 */
2233int drbd_send_state(struct drbd_conf *mdev, union drbd_state state)
2234{
2235 struct socket *sock;
2236 struct p_state p;
2237 int ok = 0;
2238
2239 mutex_lock(&mdev->data.mutex);
2240
2241 p.state = cpu_to_be32(state.i);
2242 sock = mdev->data.socket;
2243
2244 if (likely(sock != NULL)) {
2245 ok = _drbd_send_cmd(mdev, sock, P_STATE,
2246 (struct p_header80 *)&p, sizeof(p), 0);
2247 }
2248
2249 mutex_unlock(&mdev->data.mutex);
2250
2251 return ok;
2252}
2253
Philipp Reisnerb411b362009-09-25 16:07:19 -07002254int drbd_send_state_req(struct drbd_conf *mdev,
2255 union drbd_state mask, union drbd_state val)
2256{
2257 struct p_req_state p;
2258
2259 p.mask = cpu_to_be32(mask.i);
2260 p.val = cpu_to_be32(val.i);
2261
2262 return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_STATE_CHG_REQ,
Philipp Reisner0b70a132010-08-20 13:36:10 +02002263 (struct p_header80 *)&p, sizeof(p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07002264}
2265
Andreas Gruenbacherbf885f82010-12-08 00:39:32 +01002266int drbd_send_sr_reply(struct drbd_conf *mdev, enum drbd_state_rv retcode)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002267{
2268 struct p_req_state_reply p;
2269
2270 p.retcode = cpu_to_be32(retcode);
2271
2272 return drbd_send_cmd(mdev, USE_META_SOCKET, P_STATE_CHG_REPLY,
Philipp Reisner0b70a132010-08-20 13:36:10 +02002273 (struct p_header80 *)&p, sizeof(p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07002274}
2275
2276int fill_bitmap_rle_bits(struct drbd_conf *mdev,
2277 struct p_compressed_bm *p,
2278 struct bm_xfer_ctx *c)
2279{
2280 struct bitstream bs;
2281 unsigned long plain_bits;
2282 unsigned long tmp;
2283 unsigned long rl;
2284 unsigned len;
2285 unsigned toggle;
2286 int bits;
2287
2288 /* may we use this feature? */
2289 if ((mdev->sync_conf.use_rle == 0) ||
2290 (mdev->agreed_pro_version < 90))
2291 return 0;
2292
2293 if (c->bit_offset >= c->bm_bits)
2294 return 0; /* nothing to do. */
2295
2296 /* use at most thus many bytes */
2297 bitstream_init(&bs, p->code, BM_PACKET_VLI_BYTES_MAX, 0);
2298 memset(p->code, 0, BM_PACKET_VLI_BYTES_MAX);
2299 /* plain bits covered in this code string */
2300 plain_bits = 0;
2301
2302 /* p->encoding & 0x80 stores whether the first run length is set.
2303 * bit offset is implicit.
2304 * start with toggle == 2 to be able to tell the first iteration */
2305 toggle = 2;
2306
2307 /* see how much plain bits we can stuff into one packet
2308 * using RLE and VLI. */
2309 do {
2310 tmp = (toggle == 0) ? _drbd_bm_find_next_zero(mdev, c->bit_offset)
2311 : _drbd_bm_find_next(mdev, c->bit_offset);
2312 if (tmp == -1UL)
2313 tmp = c->bm_bits;
2314 rl = tmp - c->bit_offset;
2315
2316 if (toggle == 2) { /* first iteration */
2317 if (rl == 0) {
2318 /* the first checked bit was set,
2319 * store start value, */
2320 DCBP_set_start(p, 1);
2321 /* but skip encoding of zero run length */
2322 toggle = !toggle;
2323 continue;
2324 }
2325 DCBP_set_start(p, 0);
2326 }
2327
2328 /* paranoia: catch zero runlength.
2329 * can only happen if bitmap is modified while we scan it. */
2330 if (rl == 0) {
2331 dev_err(DEV, "unexpected zero runlength while encoding bitmap "
2332 "t:%u bo:%lu\n", toggle, c->bit_offset);
2333 return -1;
2334 }
2335
2336 bits = vli_encode_bits(&bs, rl);
2337 if (bits == -ENOBUFS) /* buffer full */
2338 break;
2339 if (bits <= 0) {
2340 dev_err(DEV, "error while encoding bitmap: %d\n", bits);
2341 return 0;
2342 }
2343
2344 toggle = !toggle;
2345 plain_bits += rl;
2346 c->bit_offset = tmp;
2347 } while (c->bit_offset < c->bm_bits);
2348
2349 len = bs.cur.b - p->code + !!bs.cur.bit;
2350
2351 if (plain_bits < (len << 3)) {
2352 /* incompressible with this method.
2353 * we need to rewind both word and bit position. */
2354 c->bit_offset -= plain_bits;
2355 bm_xfer_ctx_bit_to_word_offset(c);
2356 c->bit_offset = c->word_offset * BITS_PER_LONG;
2357 return 0;
2358 }
2359
2360 /* RLE + VLI was able to compress it just fine.
2361 * update c->word_offset. */
2362 bm_xfer_ctx_bit_to_word_offset(c);
2363
2364 /* store pad_bits */
2365 DCBP_set_pad_bits(p, (8 - bs.cur.bit) & 0x7);
2366
2367 return len;
2368}
2369
Andreas Gruenbacherf70af112010-12-11 18:51:50 +01002370/**
2371 * send_bitmap_rle_or_plain
2372 *
2373 * Return 0 when done, 1 when another iteration is needed, and a negative error
2374 * code upon failure.
2375 */
2376static int
Philipp Reisnerb411b362009-09-25 16:07:19 -07002377send_bitmap_rle_or_plain(struct drbd_conf *mdev,
Andreas Gruenbacherf70af112010-12-11 18:51:50 +01002378 struct p_header80 *h, struct bm_xfer_ctx *c)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002379{
2380 struct p_compressed_bm *p = (void*)h;
2381 unsigned long num_words;
2382 int len;
2383 int ok;
2384
2385 len = fill_bitmap_rle_bits(mdev, p, c);
2386
2387 if (len < 0)
Andreas Gruenbacherf70af112010-12-11 18:51:50 +01002388 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002389
2390 if (len) {
2391 DCBP_set_code(p, RLE_VLI_Bits);
2392 ok = _drbd_send_cmd(mdev, mdev->data.socket, P_COMPRESSED_BITMAP, h,
2393 sizeof(*p) + len, 0);
2394
2395 c->packets[0]++;
2396 c->bytes[0] += sizeof(*p) + len;
2397
2398 if (c->bit_offset >= c->bm_bits)
2399 len = 0; /* DONE */
2400 } else {
2401 /* was not compressible.
2402 * send a buffer full of plain text bits instead. */
2403 num_words = min_t(size_t, BM_PACKET_WORDS, c->bm_words - c->word_offset);
2404 len = num_words * sizeof(long);
2405 if (len)
2406 drbd_bm_get_lel(mdev, c->word_offset, num_words, (unsigned long*)h->payload);
2407 ok = _drbd_send_cmd(mdev, mdev->data.socket, P_BITMAP,
Philipp Reisner0b70a132010-08-20 13:36:10 +02002408 h, sizeof(struct p_header80) + len, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002409 c->word_offset += num_words;
2410 c->bit_offset = c->word_offset * BITS_PER_LONG;
2411
2412 c->packets[1]++;
Philipp Reisner0b70a132010-08-20 13:36:10 +02002413 c->bytes[1] += sizeof(struct p_header80) + len;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002414
2415 if (c->bit_offset > c->bm_bits)
2416 c->bit_offset = c->bm_bits;
2417 }
Andreas Gruenbacherf70af112010-12-11 18:51:50 +01002418 if (ok) {
2419 if (len == 0) {
2420 INFO_bm_xfer_stats(mdev, "send", c);
2421 return 0;
2422 } else
2423 return 1;
2424 }
2425 return -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002426}
2427
2428/* See the comment at receive_bitmap() */
2429int _drbd_send_bitmap(struct drbd_conf *mdev)
2430{
2431 struct bm_xfer_ctx c;
Philipp Reisner0b70a132010-08-20 13:36:10 +02002432 struct p_header80 *p;
Andreas Gruenbacherf70af112010-12-11 18:51:50 +01002433 int err;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002434
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002435 ERR_IF(!mdev->bitmap) return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002436
2437 /* maybe we should use some per thread scratch page,
2438 * and allocate that during initial device creation? */
Philipp Reisner0b70a132010-08-20 13:36:10 +02002439 p = (struct p_header80 *) __get_free_page(GFP_NOIO);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002440 if (!p) {
2441 dev_err(DEV, "failed to allocate one page buffer in %s\n", __func__);
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002442 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002443 }
2444
2445 if (get_ldev(mdev)) {
2446 if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC)) {
2447 dev_info(DEV, "Writing the whole bitmap, MDF_FullSync was set.\n");
2448 drbd_bm_set_all(mdev);
2449 if (drbd_bm_write(mdev)) {
2450 /* write_bm did fail! Leave full sync flag set in Meta P_DATA
2451 * but otherwise process as per normal - need to tell other
2452 * side that a full resync is required! */
2453 dev_err(DEV, "Failed to write bitmap to disk!\n");
2454 } else {
2455 drbd_md_clear_flag(mdev, MDF_FULL_SYNC);
2456 drbd_md_sync(mdev);
2457 }
2458 }
2459 put_ldev(mdev);
2460 }
2461
2462 c = (struct bm_xfer_ctx) {
2463 .bm_bits = drbd_bm_bits(mdev),
2464 .bm_words = drbd_bm_words(mdev),
2465 };
2466
2467 do {
Andreas Gruenbacherf70af112010-12-11 18:51:50 +01002468 err = send_bitmap_rle_or_plain(mdev, p, &c);
2469 } while (err > 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002470
2471 free_page((unsigned long) p);
Andreas Gruenbacherf70af112010-12-11 18:51:50 +01002472 return err == 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002473}
2474
2475int drbd_send_bitmap(struct drbd_conf *mdev)
2476{
2477 int err;
2478
2479 if (!drbd_get_data_sock(mdev))
2480 return -1;
2481 err = !_drbd_send_bitmap(mdev);
2482 drbd_put_data_sock(mdev);
2483 return err;
2484}
2485
2486int drbd_send_b_ack(struct drbd_conf *mdev, u32 barrier_nr, u32 set_size)
2487{
2488 int ok;
2489 struct p_barrier_ack p;
2490
2491 p.barrier = barrier_nr;
2492 p.set_size = cpu_to_be32(set_size);
2493
2494 if (mdev->state.conn < C_CONNECTED)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002495 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002496 ok = drbd_send_cmd(mdev, USE_META_SOCKET, P_BARRIER_ACK,
Philipp Reisner0b70a132010-08-20 13:36:10 +02002497 (struct p_header80 *)&p, sizeof(p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07002498 return ok;
2499}
2500
2501/**
2502 * _drbd_send_ack() - Sends an ack packet
2503 * @mdev: DRBD device.
2504 * @cmd: Packet command code.
2505 * @sector: sector, needs to be in big endian byte order
2506 * @blksize: size in byte, needs to be in big endian byte order
2507 * @block_id: Id, big endian byte order
2508 */
2509static int _drbd_send_ack(struct drbd_conf *mdev, enum drbd_packets cmd,
2510 u64 sector,
2511 u32 blksize,
2512 u64 block_id)
2513{
2514 int ok;
2515 struct p_block_ack p;
2516
2517 p.sector = sector;
2518 p.block_id = block_id;
2519 p.blksize = blksize;
2520 p.seq_num = cpu_to_be32(atomic_add_return(1, &mdev->packet_seq));
2521
2522 if (!mdev->meta.socket || mdev->state.conn < C_CONNECTED)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002523 return false;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002524 ok = drbd_send_cmd(mdev, USE_META_SOCKET, cmd,
Philipp Reisner0b70a132010-08-20 13:36:10 +02002525 (struct p_header80 *)&p, sizeof(p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07002526 return ok;
2527}
2528
Lars Ellenberg2b2bf212010-10-06 11:46:55 +02002529/* dp->sector and dp->block_id already/still in network byte order,
2530 * data_size is payload size according to dp->head,
2531 * and may need to be corrected for digest size. */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002532int drbd_send_ack_dp(struct drbd_conf *mdev, enum drbd_packets cmd,
Lars Ellenberg2b2bf212010-10-06 11:46:55 +02002533 struct p_data *dp, int data_size)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002534{
Lars Ellenberg2b2bf212010-10-06 11:46:55 +02002535 data_size -= (mdev->agreed_pro_version >= 87 && mdev->integrity_r_tfm) ?
2536 crypto_hash_digestsize(mdev->integrity_r_tfm) : 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002537 return _drbd_send_ack(mdev, cmd, dp->sector, cpu_to_be32(data_size),
2538 dp->block_id);
2539}
2540
2541int drbd_send_ack_rp(struct drbd_conf *mdev, enum drbd_packets cmd,
2542 struct p_block_req *rp)
2543{
2544 return _drbd_send_ack(mdev, cmd, rp->sector, rp->blksize, rp->block_id);
2545}
2546
2547/**
2548 * drbd_send_ack() - Sends an ack packet
2549 * @mdev: DRBD device.
2550 * @cmd: Packet command code.
2551 * @e: Epoch entry.
2552 */
2553int drbd_send_ack(struct drbd_conf *mdev,
2554 enum drbd_packets cmd, struct drbd_epoch_entry *e)
2555{
2556 return _drbd_send_ack(mdev, cmd,
2557 cpu_to_be64(e->sector),
2558 cpu_to_be32(e->size),
2559 e->block_id);
2560}
2561
2562/* This function misuses the block_id field to signal if the blocks
2563 * are is sync or not. */
2564int drbd_send_ack_ex(struct drbd_conf *mdev, enum drbd_packets cmd,
2565 sector_t sector, int blksize, u64 block_id)
2566{
2567 return _drbd_send_ack(mdev, cmd,
2568 cpu_to_be64(sector),
2569 cpu_to_be32(blksize),
2570 cpu_to_be64(block_id));
2571}
2572
2573int drbd_send_drequest(struct drbd_conf *mdev, int cmd,
2574 sector_t sector, int size, u64 block_id)
2575{
2576 int ok;
2577 struct p_block_req p;
2578
2579 p.sector = cpu_to_be64(sector);
2580 p.block_id = block_id;
2581 p.blksize = cpu_to_be32(size);
2582
2583 ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, cmd,
Philipp Reisner0b70a132010-08-20 13:36:10 +02002584 (struct p_header80 *)&p, sizeof(p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07002585 return ok;
2586}
2587
2588int drbd_send_drequest_csum(struct drbd_conf *mdev,
2589 sector_t sector, int size,
2590 void *digest, int digest_size,
2591 enum drbd_packets cmd)
2592{
2593 int ok;
2594 struct p_block_req p;
2595
2596 p.sector = cpu_to_be64(sector);
2597 p.block_id = BE_DRBD_MAGIC + 0xbeef;
2598 p.blksize = cpu_to_be32(size);
2599
2600 p.head.magic = BE_DRBD_MAGIC;
2601 p.head.command = cpu_to_be16(cmd);
Philipp Reisner0b70a132010-08-20 13:36:10 +02002602 p.head.length = cpu_to_be16(sizeof(p) - sizeof(struct p_header80) + digest_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002603
2604 mutex_lock(&mdev->data.mutex);
2605
2606 ok = (sizeof(p) == drbd_send(mdev, mdev->data.socket, &p, sizeof(p), 0));
2607 ok = ok && (digest_size == drbd_send(mdev, mdev->data.socket, digest, digest_size, 0));
2608
2609 mutex_unlock(&mdev->data.mutex);
2610
2611 return ok;
2612}
2613
2614int drbd_send_ov_request(struct drbd_conf *mdev, sector_t sector, int size)
2615{
2616 int ok;
2617 struct p_block_req p;
2618
2619 p.sector = cpu_to_be64(sector);
2620 p.block_id = BE_DRBD_MAGIC + 0xbabe;
2621 p.blksize = cpu_to_be32(size);
2622
2623 ok = drbd_send_cmd(mdev, USE_DATA_SOCKET, P_OV_REQUEST,
Philipp Reisner0b70a132010-08-20 13:36:10 +02002624 (struct p_header80 *)&p, sizeof(p));
Philipp Reisnerb411b362009-09-25 16:07:19 -07002625 return ok;
2626}
2627
2628/* called on sndtimeo
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002629 * returns false if we should retry,
2630 * true if we think connection is dead
Philipp Reisnerb411b362009-09-25 16:07:19 -07002631 */
2632static int we_should_drop_the_connection(struct drbd_conf *mdev, struct socket *sock)
2633{
2634 int drop_it;
2635 /* long elapsed = (long)(jiffies - mdev->last_received); */
2636
2637 drop_it = mdev->meta.socket == sock
2638 || !mdev->asender.task
2639 || get_t_state(&mdev->asender) != Running
2640 || mdev->state.conn < C_CONNECTED;
2641
2642 if (drop_it)
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01002643 return true;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002644
2645 drop_it = !--mdev->ko_count;
2646 if (!drop_it) {
2647 dev_err(DEV, "[%s/%d] sock_sendmsg time expired, ko = %u\n",
2648 current->comm, current->pid, mdev->ko_count);
2649 request_ping(mdev);
2650 }
2651
2652 return drop_it; /* && (mdev->state == R_PRIMARY) */;
2653}
2654
2655/* The idea of sendpage seems to be to put some kind of reference
2656 * to the page into the skb, and to hand it over to the NIC. In
2657 * this process get_page() gets called.
2658 *
2659 * As soon as the page was really sent over the network put_page()
2660 * gets called by some part of the network layer. [ NIC driver? ]
2661 *
2662 * [ get_page() / put_page() increment/decrement the count. If count
2663 * reaches 0 the page will be freed. ]
2664 *
2665 * This works nicely with pages from FSs.
2666 * But this means that in protocol A we might signal IO completion too early!
2667 *
2668 * In order not to corrupt data during a resync we must make sure
2669 * that we do not reuse our own buffer pages (EEs) to early, therefore
2670 * we have the net_ee list.
2671 *
2672 * XFS seems to have problems, still, it submits pages with page_count == 0!
2673 * As a workaround, we disable sendpage on pages
2674 * with page_count == 0 or PageSlab.
2675 */
2676static int _drbd_no_send_page(struct drbd_conf *mdev, struct page *page,
Lars Ellenbergba11ad92010-05-25 16:26:16 +02002677 int offset, size_t size, unsigned msg_flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002678{
Lars Ellenbergba11ad92010-05-25 16:26:16 +02002679 int sent = drbd_send(mdev, mdev->data.socket, kmap(page) + offset, size, msg_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002680 kunmap(page);
2681 if (sent == size)
2682 mdev->send_cnt += size>>9;
2683 return sent == size;
2684}
2685
2686static int _drbd_send_page(struct drbd_conf *mdev, struct page *page,
Lars Ellenbergba11ad92010-05-25 16:26:16 +02002687 int offset, size_t size, unsigned msg_flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002688{
2689 mm_segment_t oldfs = get_fs();
2690 int sent, ok;
2691 int len = size;
2692
2693 /* e.g. XFS meta- & log-data is in slab pages, which have a
2694 * page_count of 0 and/or have PageSlab() set.
2695 * we cannot use send_page for those, as that does get_page();
2696 * put_page(); and would cause either a VM_BUG directly, or
2697 * __page_cache_release a page that would actually still be referenced
2698 * by someone, leading to some obscure delayed Oops somewhere else. */
2699 if (disable_sendpage || (page_count(page) < 1) || PageSlab(page))
Lars Ellenbergba11ad92010-05-25 16:26:16 +02002700 return _drbd_no_send_page(mdev, page, offset, size, msg_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002701
Lars Ellenbergba11ad92010-05-25 16:26:16 +02002702 msg_flags |= MSG_NOSIGNAL;
Philipp Reisnerb411b362009-09-25 16:07:19 -07002703 drbd_update_congested(mdev);
2704 set_fs(KERNEL_DS);
2705 do {
2706 sent = mdev->data.socket->ops->sendpage(mdev->data.socket, page,
2707 offset, len,
Lars Ellenbergba11ad92010-05-25 16:26:16 +02002708 msg_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002709 if (sent == -EAGAIN) {
2710 if (we_should_drop_the_connection(mdev,
2711 mdev->data.socket))
2712 break;
2713 else
2714 continue;
2715 }
2716 if (sent <= 0) {
2717 dev_warn(DEV, "%s: size=%d len=%d sent=%d\n",
2718 __func__, (int)size, len, sent);
2719 break;
2720 }
2721 len -= sent;
2722 offset += sent;
2723 } while (len > 0 /* THINK && mdev->cstate >= C_CONNECTED*/);
2724 set_fs(oldfs);
2725 clear_bit(NET_CONGESTED, &mdev->flags);
2726
2727 ok = (len == 0);
2728 if (likely(ok))
2729 mdev->send_cnt += size>>9;
2730 return ok;
2731}
2732
2733static int _drbd_send_bio(struct drbd_conf *mdev, struct bio *bio)
2734{
2735 struct bio_vec *bvec;
2736 int i;
Lars Ellenbergba11ad92010-05-25 16:26:16 +02002737 /* hint all but last page with MSG_MORE */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002738 __bio_for_each_segment(bvec, bio, i, 0) {
2739 if (!_drbd_no_send_page(mdev, bvec->bv_page,
Lars Ellenbergba11ad92010-05-25 16:26:16 +02002740 bvec->bv_offset, bvec->bv_len,
2741 i == bio->bi_vcnt -1 ? 0 : MSG_MORE))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002742 return 0;
2743 }
2744 return 1;
2745}
2746
2747static int _drbd_send_zc_bio(struct drbd_conf *mdev, struct bio *bio)
2748{
2749 struct bio_vec *bvec;
2750 int i;
Lars Ellenbergba11ad92010-05-25 16:26:16 +02002751 /* hint all but last page with MSG_MORE */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002752 __bio_for_each_segment(bvec, bio, i, 0) {
2753 if (!_drbd_send_page(mdev, bvec->bv_page,
Lars Ellenbergba11ad92010-05-25 16:26:16 +02002754 bvec->bv_offset, bvec->bv_len,
2755 i == bio->bi_vcnt -1 ? 0 : MSG_MORE))
Philipp Reisnerb411b362009-09-25 16:07:19 -07002756 return 0;
2757 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002758 return 1;
2759}
2760
Lars Ellenberg45bb9122010-05-14 17:10:48 +02002761static int _drbd_send_zc_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e)
2762{
2763 struct page *page = e->pages;
2764 unsigned len = e->size;
Lars Ellenbergba11ad92010-05-25 16:26:16 +02002765 /* hint all but last page with MSG_MORE */
Lars Ellenberg45bb9122010-05-14 17:10:48 +02002766 page_chain_for_each(page) {
2767 unsigned l = min_t(unsigned, len, PAGE_SIZE);
Lars Ellenbergba11ad92010-05-25 16:26:16 +02002768 if (!_drbd_send_page(mdev, page, 0, l,
2769 page_chain_next(page) ? MSG_MORE : 0))
Lars Ellenberg45bb9122010-05-14 17:10:48 +02002770 return 0;
2771 len -= l;
2772 }
2773 return 1;
2774}
2775
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02002776static u32 bio_flags_to_wire(struct drbd_conf *mdev, unsigned long bi_rw)
2777{
2778 if (mdev->agreed_pro_version >= 95)
2779 return (bi_rw & REQ_SYNC ? DP_RW_SYNC : 0) |
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02002780 (bi_rw & REQ_FUA ? DP_FUA : 0) |
2781 (bi_rw & REQ_FLUSH ? DP_FLUSH : 0) |
2782 (bi_rw & REQ_DISCARD ? DP_DISCARD : 0);
2783 else
Jens Axboe721a9602011-03-09 11:56:30 +01002784 return bi_rw & REQ_SYNC ? DP_RW_SYNC : 0;
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02002785}
2786
Philipp Reisnerb411b362009-09-25 16:07:19 -07002787/* Used to send write requests
2788 * R_PRIMARY -> Peer (P_DATA)
2789 */
2790int drbd_send_dblock(struct drbd_conf *mdev, struct drbd_request *req)
2791{
2792 int ok = 1;
2793 struct p_data p;
2794 unsigned int dp_flags = 0;
2795 void *dgb;
2796 int dgs;
2797
2798 if (!drbd_get_data_sock(mdev))
2799 return 0;
2800
2801 dgs = (mdev->agreed_pro_version >= 87 && mdev->integrity_w_tfm) ?
2802 crypto_hash_digestsize(mdev->integrity_w_tfm) : 0;
2803
Philipp Reisnerd5373382010-08-23 15:18:33 +02002804 if (req->size <= DRBD_MAX_SIZE_H80_PACKET) {
Philipp Reisner0b70a132010-08-20 13:36:10 +02002805 p.head.h80.magic = BE_DRBD_MAGIC;
2806 p.head.h80.command = cpu_to_be16(P_DATA);
2807 p.head.h80.length =
2808 cpu_to_be16(sizeof(p) - sizeof(union p_header) + dgs + req->size);
2809 } else {
2810 p.head.h95.magic = BE_DRBD_MAGIC_BIG;
2811 p.head.h95.command = cpu_to_be16(P_DATA);
2812 p.head.h95.length =
2813 cpu_to_be32(sizeof(p) - sizeof(union p_header) + dgs + req->size);
2814 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002815
2816 p.sector = cpu_to_be64(req->sector);
2817 p.block_id = (unsigned long)req;
2818 p.seq_num = cpu_to_be32(req->seq_num =
2819 atomic_add_return(1, &mdev->packet_seq));
Philipp Reisnerb411b362009-09-25 16:07:19 -07002820
Philipp Reisner76d2e7e2010-08-25 11:58:05 +02002821 dp_flags = bio_flags_to_wire(mdev, req->master_bio->bi_rw);
2822
Philipp Reisnerb411b362009-09-25 16:07:19 -07002823 if (mdev->state.conn >= C_SYNC_SOURCE &&
2824 mdev->state.conn <= C_PAUSED_SYNC_T)
2825 dp_flags |= DP_MAY_SET_IN_SYNC;
2826
2827 p.dp_flags = cpu_to_be32(dp_flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002828 set_bit(UNPLUG_REMOTE, &mdev->flags);
2829 ok = (sizeof(p) ==
Lars Ellenbergba11ad92010-05-25 16:26:16 +02002830 drbd_send(mdev, mdev->data.socket, &p, sizeof(p), dgs ? MSG_MORE : 0));
Philipp Reisnerb411b362009-09-25 16:07:19 -07002831 if (ok && dgs) {
2832 dgb = mdev->int_dig_out;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02002833 drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, dgb);
Andreas Gruenbachercab2f742010-12-09 16:08:46 +01002834 ok = dgs == drbd_send(mdev, mdev->data.socket, dgb, dgs, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002835 }
2836 if (ok) {
Lars Ellenberg470be442010-11-10 10:36:52 +01002837 /* For protocol A, we have to memcpy the payload into
2838 * socket buffers, as we may complete right away
2839 * as soon as we handed it over to tcp, at which point the data
2840 * pages may become invalid.
2841 *
2842 * For data-integrity enabled, we copy it as well, so we can be
2843 * sure that even if the bio pages may still be modified, it
2844 * won't change the data on the wire, thus if the digest checks
2845 * out ok after sending on this side, but does not fit on the
2846 * receiving side, we sure have detected corruption elsewhere.
2847 */
2848 if (mdev->net_conf->wire_protocol == DRBD_PROT_A || dgs)
Philipp Reisnerb411b362009-09-25 16:07:19 -07002849 ok = _drbd_send_bio(mdev, req->master_bio);
2850 else
2851 ok = _drbd_send_zc_bio(mdev, req->master_bio);
Lars Ellenberg470be442010-11-10 10:36:52 +01002852
2853 /* double check digest, sometimes buffers have been modified in flight. */
2854 if (dgs > 0 && dgs <= 64) {
Bart Van Assche24c48302011-05-21 18:32:29 +02002855 /* 64 byte, 512 bit, is the largest digest size
Lars Ellenberg470be442010-11-10 10:36:52 +01002856 * currently supported in kernel crypto. */
2857 unsigned char digest[64];
2858 drbd_csum_bio(mdev, mdev->integrity_w_tfm, req->master_bio, digest);
2859 if (memcmp(mdev->int_dig_out, digest, dgs)) {
2860 dev_warn(DEV,
2861 "Digest mismatch, buffer modified by upper layers during write: %llus +%u\n",
2862 (unsigned long long)req->sector, req->size);
2863 }
2864 } /* else if (dgs > 64) {
2865 ... Be noisy about digest too large ...
2866 } */
Philipp Reisnerb411b362009-09-25 16:07:19 -07002867 }
2868
2869 drbd_put_data_sock(mdev);
Philipp Reisnerbd26bfc2010-05-04 12:33:58 +02002870
Philipp Reisnerb411b362009-09-25 16:07:19 -07002871 return ok;
2872}
2873
2874/* answer packet, used to send data back for read requests:
2875 * Peer -> (diskless) R_PRIMARY (P_DATA_REPLY)
2876 * C_SYNC_SOURCE -> C_SYNC_TARGET (P_RS_DATA_REPLY)
2877 */
2878int drbd_send_block(struct drbd_conf *mdev, enum drbd_packets cmd,
2879 struct drbd_epoch_entry *e)
2880{
2881 int ok;
2882 struct p_data p;
2883 void *dgb;
2884 int dgs;
2885
2886 dgs = (mdev->agreed_pro_version >= 87 && mdev->integrity_w_tfm) ?
2887 crypto_hash_digestsize(mdev->integrity_w_tfm) : 0;
2888
Philipp Reisnerd5373382010-08-23 15:18:33 +02002889 if (e->size <= DRBD_MAX_SIZE_H80_PACKET) {
Philipp Reisner0b70a132010-08-20 13:36:10 +02002890 p.head.h80.magic = BE_DRBD_MAGIC;
2891 p.head.h80.command = cpu_to_be16(cmd);
2892 p.head.h80.length =
2893 cpu_to_be16(sizeof(p) - sizeof(struct p_header80) + dgs + e->size);
2894 } else {
2895 p.head.h95.magic = BE_DRBD_MAGIC_BIG;
2896 p.head.h95.command = cpu_to_be16(cmd);
2897 p.head.h95.length =
2898 cpu_to_be32(sizeof(p) - sizeof(struct p_header80) + dgs + e->size);
2899 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07002900
2901 p.sector = cpu_to_be64(e->sector);
2902 p.block_id = e->block_id;
2903 /* p.seq_num = 0; No sequence numbers here.. */
2904
2905 /* Only called by our kernel thread.
2906 * This one may be interrupted by DRBD_SIG and/or DRBD_SIGKILL
2907 * in response to admin command or module unload.
2908 */
2909 if (!drbd_get_data_sock(mdev))
2910 return 0;
2911
Philipp Reisner0b70a132010-08-20 13:36:10 +02002912 ok = sizeof(p) == drbd_send(mdev, mdev->data.socket, &p, sizeof(p), dgs ? MSG_MORE : 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002913 if (ok && dgs) {
2914 dgb = mdev->int_dig_out;
Lars Ellenberg45bb9122010-05-14 17:10:48 +02002915 drbd_csum_ee(mdev, mdev->integrity_w_tfm, e, dgb);
Andreas Gruenbachercab2f742010-12-09 16:08:46 +01002916 ok = dgs == drbd_send(mdev, mdev->data.socket, dgb, dgs, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002917 }
2918 if (ok)
Lars Ellenberg45bb9122010-05-14 17:10:48 +02002919 ok = _drbd_send_zc_ee(mdev, e);
Philipp Reisnerb411b362009-09-25 16:07:19 -07002920
2921 drbd_put_data_sock(mdev);
Philipp Reisnerbd26bfc2010-05-04 12:33:58 +02002922
Philipp Reisnerb411b362009-09-25 16:07:19 -07002923 return ok;
2924}
2925
Philipp Reisner73a01a12010-10-27 14:33:00 +02002926int drbd_send_oos(struct drbd_conf *mdev, struct drbd_request *req)
2927{
2928 struct p_block_desc p;
2929
2930 p.sector = cpu_to_be64(req->sector);
2931 p.blksize = cpu_to_be32(req->size);
2932
2933 return drbd_send_cmd(mdev, USE_DATA_SOCKET, P_OUT_OF_SYNC, &p.head, sizeof(p));
2934}
2935
Philipp Reisnerb411b362009-09-25 16:07:19 -07002936/*
2937 drbd_send distinguishes two cases:
2938
2939 Packets sent via the data socket "sock"
2940 and packets sent via the meta data socket "msock"
2941
2942 sock msock
2943 -----------------+-------------------------+------------------------------
2944 timeout conf.timeout / 2 conf.timeout / 2
2945 timeout action send a ping via msock Abort communication
2946 and close all sockets
2947*/
2948
2949/*
2950 * you must have down()ed the appropriate [m]sock_mutex elsewhere!
2951 */
2952int drbd_send(struct drbd_conf *mdev, struct socket *sock,
2953 void *buf, size_t size, unsigned msg_flags)
2954{
2955 struct kvec iov;
2956 struct msghdr msg;
2957 int rv, sent = 0;
2958
2959 if (!sock)
2960 return -1000;
2961
2962 /* THINK if (signal_pending) return ... ? */
2963
2964 iov.iov_base = buf;
2965 iov.iov_len = size;
2966
2967 msg.msg_name = NULL;
2968 msg.msg_namelen = 0;
2969 msg.msg_control = NULL;
2970 msg.msg_controllen = 0;
2971 msg.msg_flags = msg_flags | MSG_NOSIGNAL;
2972
2973 if (sock == mdev->data.socket) {
2974 mdev->ko_count = mdev->net_conf->ko_count;
2975 drbd_update_congested(mdev);
2976 }
2977 do {
2978 /* STRANGE
2979 * tcp_sendmsg does _not_ use its size parameter at all ?
2980 *
2981 * -EAGAIN on timeout, -EINTR on signal.
2982 */
2983/* THINK
2984 * do we need to block DRBD_SIG if sock == &meta.socket ??
2985 * otherwise wake_asender() might interrupt some send_*Ack !
2986 */
2987 rv = kernel_sendmsg(sock, &msg, &iov, 1, size);
2988 if (rv == -EAGAIN) {
2989 if (we_should_drop_the_connection(mdev, sock))
2990 break;
2991 else
2992 continue;
2993 }
2994 D_ASSERT(rv != 0);
2995 if (rv == -EINTR) {
2996 flush_signals(current);
2997 rv = 0;
2998 }
2999 if (rv < 0)
3000 break;
3001 sent += rv;
3002 iov.iov_base += rv;
3003 iov.iov_len -= rv;
3004 } while (sent < size);
3005
3006 if (sock == mdev->data.socket)
3007 clear_bit(NET_CONGESTED, &mdev->flags);
3008
3009 if (rv <= 0) {
3010 if (rv != -EAGAIN) {
3011 dev_err(DEV, "%s_sendmsg returned %d\n",
3012 sock == mdev->meta.socket ? "msock" : "sock",
3013 rv);
3014 drbd_force_state(mdev, NS(conn, C_BROKEN_PIPE));
3015 } else
3016 drbd_force_state(mdev, NS(conn, C_TIMEOUT));
3017 }
3018
3019 return sent;
3020}
3021
3022static int drbd_open(struct block_device *bdev, fmode_t mode)
3023{
3024 struct drbd_conf *mdev = bdev->bd_disk->private_data;
3025 unsigned long flags;
3026 int rv = 0;
3027
Arnd Bergmann2a48fc02010-06-02 14:28:52 +02003028 mutex_lock(&drbd_main_mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003029 spin_lock_irqsave(&mdev->req_lock, flags);
3030 /* to have a stable mdev->state.role
3031 * and no race with updating open_cnt */
3032
3033 if (mdev->state.role != R_PRIMARY) {
3034 if (mode & FMODE_WRITE)
3035 rv = -EROFS;
3036 else if (!allow_oos)
3037 rv = -EMEDIUMTYPE;
3038 }
3039
3040 if (!rv)
3041 mdev->open_cnt++;
3042 spin_unlock_irqrestore(&mdev->req_lock, flags);
Arnd Bergmann2a48fc02010-06-02 14:28:52 +02003043 mutex_unlock(&drbd_main_mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003044
3045 return rv;
3046}
3047
3048static int drbd_release(struct gendisk *gd, fmode_t mode)
3049{
3050 struct drbd_conf *mdev = gd->private_data;
Arnd Bergmann2a48fc02010-06-02 14:28:52 +02003051 mutex_lock(&drbd_main_mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003052 mdev->open_cnt--;
Arnd Bergmann2a48fc02010-06-02 14:28:52 +02003053 mutex_unlock(&drbd_main_mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003054 return 0;
3055}
3056
Philipp Reisnerb411b362009-09-25 16:07:19 -07003057static void drbd_set_defaults(struct drbd_conf *mdev)
3058{
Philipp Reisner85f4cc12010-06-29 17:35:34 +02003059 /* This way we get a compile error when sync_conf grows,
3060 and we forgot to initialize it here */
3061 mdev->sync_conf = (struct syncer_conf) {
3062 /* .rate = */ DRBD_RATE_DEF,
3063 /* .after = */ DRBD_AFTER_DEF,
3064 /* .al_extents = */ DRBD_AL_EXTENTS_DEF,
Philipp Reisner85f4cc12010-06-29 17:35:34 +02003065 /* .verify_alg = */ {}, 0,
3066 /* .cpu_mask = */ {}, 0,
3067 /* .csums_alg = */ {}, 0,
Philipp Reisnere7564142010-06-29 17:35:34 +02003068 /* .use_rle = */ 0,
Philipp Reisner9a31d712010-07-05 13:42:03 +02003069 /* .on_no_data = */ DRBD_ON_NO_DATA_DEF,
3070 /* .c_plan_ahead = */ DRBD_C_PLAN_AHEAD_DEF,
3071 /* .c_delay_target = */ DRBD_C_DELAY_TARGET_DEF,
3072 /* .c_fill_target = */ DRBD_C_FILL_TARGET_DEF,
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02003073 /* .c_max_rate = */ DRBD_C_MAX_RATE_DEF,
3074 /* .c_min_rate = */ DRBD_C_MIN_RATE_DEF
Philipp Reisner85f4cc12010-06-29 17:35:34 +02003075 };
3076
3077 /* Have to use that way, because the layout differs between
3078 big endian and little endian */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003079 mdev->state = (union drbd_state) {
3080 { .role = R_SECONDARY,
3081 .peer = R_UNKNOWN,
3082 .conn = C_STANDALONE,
3083 .disk = D_DISKLESS,
3084 .pdsk = D_UNKNOWN,
Philipp Reisnerfb22c402010-09-08 23:20:21 +02003085 .susp = 0,
3086 .susp_nod = 0,
3087 .susp_fen = 0
Philipp Reisnerb411b362009-09-25 16:07:19 -07003088 } };
3089}
3090
3091void drbd_init_set_defaults(struct drbd_conf *mdev)
3092{
3093 /* the memset(,0,) did most of this.
3094 * note: only assignments, no allocation in here */
3095
3096 drbd_set_defaults(mdev);
3097
Philipp Reisnerb411b362009-09-25 16:07:19 -07003098 atomic_set(&mdev->ap_bio_cnt, 0);
3099 atomic_set(&mdev->ap_pending_cnt, 0);
3100 atomic_set(&mdev->rs_pending_cnt, 0);
3101 atomic_set(&mdev->unacked_cnt, 0);
3102 atomic_set(&mdev->local_cnt, 0);
3103 atomic_set(&mdev->net_cnt, 0);
3104 atomic_set(&mdev->packet_seq, 0);
3105 atomic_set(&mdev->pp_in_use, 0);
Lars Ellenberg435f0742010-09-06 12:30:25 +02003106 atomic_set(&mdev->pp_in_use_by_net, 0);
Philipp Reisner778f2712010-07-06 11:14:00 +02003107 atomic_set(&mdev->rs_sect_in, 0);
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02003108 atomic_set(&mdev->rs_sect_ev, 0);
Philipp Reisner759fbdf2010-10-26 16:02:27 +02003109 atomic_set(&mdev->ap_in_flight, 0);
Philipp Reisnere1711732011-06-27 11:51:46 +02003110 atomic_set(&mdev->md_io_in_use, 0);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003111
Philipp Reisnerb411b362009-09-25 16:07:19 -07003112 mutex_init(&mdev->data.mutex);
3113 mutex_init(&mdev->meta.mutex);
3114 sema_init(&mdev->data.work.s, 0);
3115 sema_init(&mdev->meta.work.s, 0);
3116 mutex_init(&mdev->state_mutex);
3117
3118 spin_lock_init(&mdev->data.work.q_lock);
3119 spin_lock_init(&mdev->meta.work.q_lock);
3120
3121 spin_lock_init(&mdev->al_lock);
3122 spin_lock_init(&mdev->req_lock);
3123 spin_lock_init(&mdev->peer_seq_lock);
3124 spin_lock_init(&mdev->epoch_lock);
3125
3126 INIT_LIST_HEAD(&mdev->active_ee);
3127 INIT_LIST_HEAD(&mdev->sync_ee);
3128 INIT_LIST_HEAD(&mdev->done_ee);
3129 INIT_LIST_HEAD(&mdev->read_ee);
3130 INIT_LIST_HEAD(&mdev->net_ee);
3131 INIT_LIST_HEAD(&mdev->resync_reads);
3132 INIT_LIST_HEAD(&mdev->data.work.q);
3133 INIT_LIST_HEAD(&mdev->meta.work.q);
3134 INIT_LIST_HEAD(&mdev->resync_work.list);
3135 INIT_LIST_HEAD(&mdev->unplug_work.list);
Lars Ellenberge9e6f3e2010-09-14 20:26:27 +02003136 INIT_LIST_HEAD(&mdev->go_diskless.list);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003137 INIT_LIST_HEAD(&mdev->md_sync_work.list);
Philipp Reisnerc4752ef2010-10-27 17:32:36 +02003138 INIT_LIST_HEAD(&mdev->start_resync_work.list);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003139 INIT_LIST_HEAD(&mdev->bm_io_work.w.list);
Philipp Reisner0ced55a2010-04-30 15:26:20 +02003140
Philipp Reisner794abb72010-12-27 11:51:23 +01003141 mdev->resync_work.cb = w_resync_timer;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003142 mdev->unplug_work.cb = w_send_write_hint;
Lars Ellenberge9e6f3e2010-09-14 20:26:27 +02003143 mdev->go_diskless.cb = w_go_diskless;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003144 mdev->md_sync_work.cb = w_md_sync;
3145 mdev->bm_io_work.w.cb = w_bitmap_io;
Philipp Reisner370a43e2011-01-14 16:03:11 +01003146 mdev->start_resync_work.cb = w_start_resync;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003147 init_timer(&mdev->resync_timer);
3148 init_timer(&mdev->md_sync_timer);
Philipp Reisner370a43e2011-01-14 16:03:11 +01003149 init_timer(&mdev->start_resync_timer);
Philipp Reisner7fde2be2011-03-01 11:08:28 +01003150 init_timer(&mdev->request_timer);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003151 mdev->resync_timer.function = resync_timer_fn;
3152 mdev->resync_timer.data = (unsigned long) mdev;
3153 mdev->md_sync_timer.function = md_sync_timer_fn;
3154 mdev->md_sync_timer.data = (unsigned long) mdev;
Philipp Reisner370a43e2011-01-14 16:03:11 +01003155 mdev->start_resync_timer.function = start_resync_timer_fn;
3156 mdev->start_resync_timer.data = (unsigned long) mdev;
Philipp Reisner7fde2be2011-03-01 11:08:28 +01003157 mdev->request_timer.function = request_timer_fn;
3158 mdev->request_timer.data = (unsigned long) mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003159
3160 init_waitqueue_head(&mdev->misc_wait);
3161 init_waitqueue_head(&mdev->state_wait);
Philipp Reisner84dfb9f2010-06-23 11:20:05 +02003162 init_waitqueue_head(&mdev->net_cnt_wait);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003163 init_waitqueue_head(&mdev->ee_wait);
3164 init_waitqueue_head(&mdev->al_wait);
3165 init_waitqueue_head(&mdev->seq_wait);
3166
3167 drbd_thread_init(mdev, &mdev->receiver, drbdd_init);
3168 drbd_thread_init(mdev, &mdev->worker, drbd_worker);
3169 drbd_thread_init(mdev, &mdev->asender, drbd_asender);
3170
3171 mdev->agreed_pro_version = PRO_VERSION_MAX;
Philipp Reisner2451fc32010-08-24 13:43:11 +02003172 mdev->write_ordering = WO_bdev_flush;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003173 mdev->resync_wenr = LC_FREE;
Philipp Reisner99432fc2011-05-20 16:39:13 +02003174 mdev->peer_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE;
3175 mdev->local_max_bio_size = DRBD_MAX_BIO_SIZE_SAFE;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003176}
3177
3178void drbd_mdev_cleanup(struct drbd_conf *mdev)
3179{
Lars Ellenberg1d7734a2010-08-11 21:21:50 +02003180 int i;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003181 if (mdev->receiver.t_state != None)
3182 dev_err(DEV, "ASSERT FAILED: receiver t_state == %d expected 0.\n",
3183 mdev->receiver.t_state);
3184
3185 /* no need to lock it, I'm the only thread alive */
3186 if (atomic_read(&mdev->current_epoch->epoch_size) != 0)
3187 dev_err(DEV, "epoch_size:%d\n", atomic_read(&mdev->current_epoch->epoch_size));
3188 mdev->al_writ_cnt =
3189 mdev->bm_writ_cnt =
3190 mdev->read_cnt =
3191 mdev->recv_cnt =
3192 mdev->send_cnt =
3193 mdev->writ_cnt =
3194 mdev->p_size =
3195 mdev->rs_start =
3196 mdev->rs_total =
Lars Ellenberg1d7734a2010-08-11 21:21:50 +02003197 mdev->rs_failed = 0;
3198 mdev->rs_last_events = 0;
Lars Ellenberg0f0601f2010-08-11 23:40:24 +02003199 mdev->rs_last_sect_ev = 0;
Lars Ellenberg1d7734a2010-08-11 21:21:50 +02003200 for (i = 0; i < DRBD_SYNC_MARKS; i++) {
3201 mdev->rs_mark_left[i] = 0;
3202 mdev->rs_mark_time[i] = 0;
3203 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07003204 D_ASSERT(mdev->net_conf == NULL);
3205
3206 drbd_set_my_capacity(mdev, 0);
3207 if (mdev->bitmap) {
3208 /* maybe never allocated. */
Philipp Reisner02d9a942010-03-24 16:23:03 +01003209 drbd_bm_resize(mdev, 0, 1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003210 drbd_bm_cleanup(mdev);
3211 }
3212
3213 drbd_free_resources(mdev);
Philipp Reisner07782862010-08-31 12:00:50 +02003214 clear_bit(AL_SUSPENDED, &mdev->flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003215
3216 /*
3217 * currently we drbd_init_ee only on module load, so
3218 * we may do drbd_release_ee only on module unload!
3219 */
3220 D_ASSERT(list_empty(&mdev->active_ee));
3221 D_ASSERT(list_empty(&mdev->sync_ee));
3222 D_ASSERT(list_empty(&mdev->done_ee));
3223 D_ASSERT(list_empty(&mdev->read_ee));
3224 D_ASSERT(list_empty(&mdev->net_ee));
3225 D_ASSERT(list_empty(&mdev->resync_reads));
3226 D_ASSERT(list_empty(&mdev->data.work.q));
3227 D_ASSERT(list_empty(&mdev->meta.work.q));
3228 D_ASSERT(list_empty(&mdev->resync_work.list));
3229 D_ASSERT(list_empty(&mdev->unplug_work.list));
Lars Ellenberge9e6f3e2010-09-14 20:26:27 +02003230 D_ASSERT(list_empty(&mdev->go_diskless.list));
Lars Ellenberg2265b472010-12-16 15:41:26 +01003231
3232 drbd_set_defaults(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003233}
3234
3235
3236static void drbd_destroy_mempools(void)
3237{
3238 struct page *page;
3239
3240 while (drbd_pp_pool) {
3241 page = drbd_pp_pool;
3242 drbd_pp_pool = (struct page *)page_private(page);
3243 __free_page(page);
3244 drbd_pp_vacant--;
3245 }
3246
3247 /* D_ASSERT(atomic_read(&drbd_pp_vacant)==0); */
3248
3249 if (drbd_ee_mempool)
3250 mempool_destroy(drbd_ee_mempool);
3251 if (drbd_request_mempool)
3252 mempool_destroy(drbd_request_mempool);
3253 if (drbd_ee_cache)
3254 kmem_cache_destroy(drbd_ee_cache);
3255 if (drbd_request_cache)
3256 kmem_cache_destroy(drbd_request_cache);
3257 if (drbd_bm_ext_cache)
3258 kmem_cache_destroy(drbd_bm_ext_cache);
3259 if (drbd_al_ext_cache)
3260 kmem_cache_destroy(drbd_al_ext_cache);
3261
3262 drbd_ee_mempool = NULL;
3263 drbd_request_mempool = NULL;
3264 drbd_ee_cache = NULL;
3265 drbd_request_cache = NULL;
3266 drbd_bm_ext_cache = NULL;
3267 drbd_al_ext_cache = NULL;
3268
3269 return;
3270}
3271
3272static int drbd_create_mempools(void)
3273{
3274 struct page *page;
Lars Ellenberg1816a2b2010-11-11 15:19:07 +01003275 const int number = (DRBD_MAX_BIO_SIZE/PAGE_SIZE) * minor_count;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003276 int i;
3277
3278 /* prepare our caches and mempools */
3279 drbd_request_mempool = NULL;
3280 drbd_ee_cache = NULL;
3281 drbd_request_cache = NULL;
3282 drbd_bm_ext_cache = NULL;
3283 drbd_al_ext_cache = NULL;
3284 drbd_pp_pool = NULL;
3285
3286 /* caches */
3287 drbd_request_cache = kmem_cache_create(
3288 "drbd_req", sizeof(struct drbd_request), 0, 0, NULL);
3289 if (drbd_request_cache == NULL)
3290 goto Enomem;
3291
3292 drbd_ee_cache = kmem_cache_create(
3293 "drbd_ee", sizeof(struct drbd_epoch_entry), 0, 0, NULL);
3294 if (drbd_ee_cache == NULL)
3295 goto Enomem;
3296
3297 drbd_bm_ext_cache = kmem_cache_create(
3298 "drbd_bm", sizeof(struct bm_extent), 0, 0, NULL);
3299 if (drbd_bm_ext_cache == NULL)
3300 goto Enomem;
3301
3302 drbd_al_ext_cache = kmem_cache_create(
3303 "drbd_al", sizeof(struct lc_element), 0, 0, NULL);
3304 if (drbd_al_ext_cache == NULL)
3305 goto Enomem;
3306
3307 /* mempools */
3308 drbd_request_mempool = mempool_create(number,
3309 mempool_alloc_slab, mempool_free_slab, drbd_request_cache);
3310 if (drbd_request_mempool == NULL)
3311 goto Enomem;
3312
3313 drbd_ee_mempool = mempool_create(number,
3314 mempool_alloc_slab, mempool_free_slab, drbd_ee_cache);
Nicolas Kaiser2027ae12010-10-28 06:15:26 -06003315 if (drbd_ee_mempool == NULL)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003316 goto Enomem;
3317
3318 /* drbd's page pool */
3319 spin_lock_init(&drbd_pp_lock);
3320
3321 for (i = 0; i < number; i++) {
3322 page = alloc_page(GFP_HIGHUSER);
3323 if (!page)
3324 goto Enomem;
3325 set_page_private(page, (unsigned long)drbd_pp_pool);
3326 drbd_pp_pool = page;
3327 }
3328 drbd_pp_vacant = number;
3329
3330 return 0;
3331
3332Enomem:
3333 drbd_destroy_mempools(); /* in case we allocated some */
3334 return -ENOMEM;
3335}
3336
3337static int drbd_notify_sys(struct notifier_block *this, unsigned long code,
3338 void *unused)
3339{
3340 /* just so we have it. you never know what interesting things we
3341 * might want to do here some day...
3342 */
3343
3344 return NOTIFY_DONE;
3345}
3346
3347static struct notifier_block drbd_notifier = {
3348 .notifier_call = drbd_notify_sys,
3349};
3350
3351static void drbd_release_ee_lists(struct drbd_conf *mdev)
3352{
3353 int rr;
3354
3355 rr = drbd_release_ee(mdev, &mdev->active_ee);
3356 if (rr)
3357 dev_err(DEV, "%d EEs in active list found!\n", rr);
3358
3359 rr = drbd_release_ee(mdev, &mdev->sync_ee);
3360 if (rr)
3361 dev_err(DEV, "%d EEs in sync list found!\n", rr);
3362
3363 rr = drbd_release_ee(mdev, &mdev->read_ee);
3364 if (rr)
3365 dev_err(DEV, "%d EEs in read list found!\n", rr);
3366
3367 rr = drbd_release_ee(mdev, &mdev->done_ee);
3368 if (rr)
3369 dev_err(DEV, "%d EEs in done list found!\n", rr);
3370
3371 rr = drbd_release_ee(mdev, &mdev->net_ee);
3372 if (rr)
3373 dev_err(DEV, "%d EEs in net list found!\n", rr);
3374}
3375
3376/* caution. no locking.
3377 * currently only used from module cleanup code. */
3378static void drbd_delete_device(unsigned int minor)
3379{
3380 struct drbd_conf *mdev = minor_to_mdev(minor);
3381
3382 if (!mdev)
3383 return;
3384
Philipp Reisnerdfa8bed2011-06-29 14:06:08 +02003385 del_timer_sync(&mdev->request_timer);
3386
Philipp Reisnerb411b362009-09-25 16:07:19 -07003387 /* paranoia asserts */
3388 if (mdev->open_cnt != 0)
3389 dev_err(DEV, "open_cnt = %d in %s:%u", mdev->open_cnt,
3390 __FILE__ , __LINE__);
3391
3392 ERR_IF (!list_empty(&mdev->data.work.q)) {
3393 struct list_head *lp;
3394 list_for_each(lp, &mdev->data.work.q) {
3395 dev_err(DEV, "lp = %p\n", lp);
3396 }
3397 };
3398 /* end paranoia asserts */
3399
3400 del_gendisk(mdev->vdisk);
3401
3402 /* cleanup stuff that may have been allocated during
3403 * device (re-)configuration or state changes */
3404
3405 if (mdev->this_bdev)
3406 bdput(mdev->this_bdev);
3407
3408 drbd_free_resources(mdev);
3409
3410 drbd_release_ee_lists(mdev);
3411
Bart Van Assche24c48302011-05-21 18:32:29 +02003412 /* should be freed on disconnect? */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003413 kfree(mdev->ee_hash);
3414 /*
3415 mdev->ee_hash_s = 0;
3416 mdev->ee_hash = NULL;
3417 */
3418
3419 lc_destroy(mdev->act_log);
3420 lc_destroy(mdev->resync);
3421
3422 kfree(mdev->p_uuid);
3423 /* mdev->p_uuid = NULL; */
3424
3425 kfree(mdev->int_dig_out);
3426 kfree(mdev->int_dig_in);
3427 kfree(mdev->int_dig_vv);
3428
3429 /* cleanup the rest that has been
3430 * allocated from drbd_new_device
3431 * and actually free the mdev itself */
3432 drbd_free_mdev(mdev);
3433}
3434
3435static void drbd_cleanup(void)
3436{
3437 unsigned int i;
3438
3439 unregister_reboot_notifier(&drbd_notifier);
3440
Lars Ellenberg17a93f302010-11-24 10:37:35 +01003441 /* first remove proc,
3442 * drbdsetup uses it's presence to detect
3443 * whether DRBD is loaded.
3444 * If we would get stuck in proc removal,
3445 * but have netlink already deregistered,
3446 * some drbdsetup commands may wait forever
3447 * for an answer.
3448 */
3449 if (drbd_proc)
3450 remove_proc_entry("drbd", NULL);
3451
Philipp Reisnerb411b362009-09-25 16:07:19 -07003452 drbd_nl_cleanup();
3453
3454 if (minor_table) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003455 i = minor_count;
3456 while (i--)
3457 drbd_delete_device(i);
3458 drbd_destroy_mempools();
3459 }
3460
3461 kfree(minor_table);
3462
3463 unregister_blkdev(DRBD_MAJOR, "drbd");
3464
3465 printk(KERN_INFO "drbd: module cleanup done.\n");
3466}
3467
3468/**
3469 * drbd_congested() - Callback for pdflush
3470 * @congested_data: User data
3471 * @bdi_bits: Bits pdflush is currently interested in
3472 *
3473 * Returns 1<<BDI_async_congested and/or 1<<BDI_sync_congested if we are congested.
3474 */
3475static int drbd_congested(void *congested_data, int bdi_bits)
3476{
3477 struct drbd_conf *mdev = congested_data;
3478 struct request_queue *q;
3479 char reason = '-';
3480 int r = 0;
3481
Andreas Gruenbacher1b881ef2010-12-13 18:03:38 +01003482 if (!may_inc_ap_bio(mdev)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003483 /* DRBD has frozen IO */
3484 r = bdi_bits;
3485 reason = 'd';
3486 goto out;
3487 }
3488
3489 if (get_ldev(mdev)) {
3490 q = bdev_get_queue(mdev->ldev->backing_bdev);
3491 r = bdi_congested(&q->backing_dev_info, bdi_bits);
3492 put_ldev(mdev);
3493 if (r)
3494 reason = 'b';
3495 }
3496
3497 if (bdi_bits & (1 << BDI_async_congested) && test_bit(NET_CONGESTED, &mdev->flags)) {
3498 r |= (1 << BDI_async_congested);
3499 reason = reason == 'b' ? 'a' : 'n';
3500 }
3501
3502out:
3503 mdev->congestion_reason = reason;
3504 return r;
3505}
3506
3507struct drbd_conf *drbd_new_device(unsigned int minor)
3508{
3509 struct drbd_conf *mdev;
3510 struct gendisk *disk;
3511 struct request_queue *q;
3512
3513 /* GFP_KERNEL, we are outside of all write-out paths */
3514 mdev = kzalloc(sizeof(struct drbd_conf), GFP_KERNEL);
3515 if (!mdev)
3516 return NULL;
3517 if (!zalloc_cpumask_var(&mdev->cpu_mask, GFP_KERNEL))
3518 goto out_no_cpumask;
3519
3520 mdev->minor = minor;
3521
3522 drbd_init_set_defaults(mdev);
3523
3524 q = blk_alloc_queue(GFP_KERNEL);
3525 if (!q)
3526 goto out_no_q;
3527 mdev->rq_queue = q;
3528 q->queuedata = mdev;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003529
3530 disk = alloc_disk(1);
3531 if (!disk)
3532 goto out_no_disk;
3533 mdev->vdisk = disk;
3534
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003535 set_disk_ro(disk, true);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003536
3537 disk->queue = q;
3538 disk->major = DRBD_MAJOR;
3539 disk->first_minor = minor;
3540 disk->fops = &drbd_ops;
3541 sprintf(disk->disk_name, "drbd%d", minor);
3542 disk->private_data = mdev;
3543
3544 mdev->this_bdev = bdget(MKDEV(DRBD_MAJOR, minor));
3545 /* we have no partitions. we contain only ourselves. */
3546 mdev->this_bdev->bd_contains = mdev->this_bdev;
3547
3548 q->backing_dev_info.congested_fn = drbd_congested;
3549 q->backing_dev_info.congested_data = mdev;
3550
Andreas Gruenbacher2f58dcf2010-12-13 17:48:19 +01003551 blk_queue_make_request(q, drbd_make_request);
Philipp Reisner99432fc2011-05-20 16:39:13 +02003552 /* Setting the max_hw_sectors to an odd value of 8kibyte here
3553 This triggers a max_bio_size message upon first attach or connect */
3554 blk_queue_max_hw_sectors(q, DRBD_MAX_BIO_SIZE_SAFE >> 8);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003555 blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
3556 blk_queue_merge_bvec(q, drbd_merge_bvec);
Jens Axboe7eaceac2011-03-10 08:52:07 +01003557 q->queue_lock = &mdev->req_lock;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003558
3559 mdev->md_io_page = alloc_page(GFP_KERNEL);
3560 if (!mdev->md_io_page)
3561 goto out_no_io_page;
3562
3563 if (drbd_bm_init(mdev))
3564 goto out_no_bitmap;
3565 /* no need to lock access, we are still initializing this minor device. */
3566 if (!tl_init(mdev))
3567 goto out_no_tl;
3568
3569 mdev->app_reads_hash = kzalloc(APP_R_HSIZE*sizeof(void *), GFP_KERNEL);
3570 if (!mdev->app_reads_hash)
3571 goto out_no_app_reads;
3572
3573 mdev->current_epoch = kzalloc(sizeof(struct drbd_epoch), GFP_KERNEL);
3574 if (!mdev->current_epoch)
3575 goto out_no_epoch;
3576
3577 INIT_LIST_HEAD(&mdev->current_epoch->list);
3578 mdev->epochs = 1;
3579
3580 return mdev;
3581
3582/* out_whatever_else:
3583 kfree(mdev->current_epoch); */
3584out_no_epoch:
3585 kfree(mdev->app_reads_hash);
3586out_no_app_reads:
3587 tl_cleanup(mdev);
3588out_no_tl:
3589 drbd_bm_cleanup(mdev);
3590out_no_bitmap:
3591 __free_page(mdev->md_io_page);
3592out_no_io_page:
3593 put_disk(disk);
3594out_no_disk:
3595 blk_cleanup_queue(q);
3596out_no_q:
3597 free_cpumask_var(mdev->cpu_mask);
3598out_no_cpumask:
3599 kfree(mdev);
3600 return NULL;
3601}
3602
3603/* counterpart of drbd_new_device.
3604 * last part of drbd_delete_device. */
3605void drbd_free_mdev(struct drbd_conf *mdev)
3606{
3607 kfree(mdev->current_epoch);
3608 kfree(mdev->app_reads_hash);
3609 tl_cleanup(mdev);
3610 if (mdev->bitmap) /* should no longer be there. */
3611 drbd_bm_cleanup(mdev);
3612 __free_page(mdev->md_io_page);
3613 put_disk(mdev->vdisk);
3614 blk_cleanup_queue(mdev->rq_queue);
3615 free_cpumask_var(mdev->cpu_mask);
Philipp Reisner37190942010-11-10 12:08:37 +01003616 drbd_free_tl_hash(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003617 kfree(mdev);
3618}
3619
3620
3621int __init drbd_init(void)
3622{
3623 int err;
3624
3625 if (sizeof(struct p_handshake) != 80) {
3626 printk(KERN_ERR
3627 "drbd: never change the size or layout "
3628 "of the HandShake packet.\n");
3629 return -EINVAL;
3630 }
3631
Philipp Reisner2b8a90b2011-01-10 11:15:17 +01003632 if (minor_count < DRBD_MINOR_COUNT_MIN || minor_count > DRBD_MINOR_COUNT_MAX) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003633 printk(KERN_ERR
3634 "drbd: invalid minor_count (%d)\n", minor_count);
3635#ifdef MODULE
3636 return -EINVAL;
3637#else
3638 minor_count = 8;
3639#endif
3640 }
3641
3642 err = drbd_nl_init();
3643 if (err)
3644 return err;
3645
3646 err = register_blkdev(DRBD_MAJOR, "drbd");
3647 if (err) {
3648 printk(KERN_ERR
3649 "drbd: unable to register block device major %d\n",
3650 DRBD_MAJOR);
3651 return err;
3652 }
3653
3654 register_reboot_notifier(&drbd_notifier);
3655
3656 /*
3657 * allocate all necessary structs
3658 */
3659 err = -ENOMEM;
3660
3661 init_waitqueue_head(&drbd_pp_wait);
3662
3663 drbd_proc = NULL; /* play safe for drbd_cleanup */
3664 minor_table = kzalloc(sizeof(struct drbd_conf *)*minor_count,
3665 GFP_KERNEL);
3666 if (!minor_table)
3667 goto Enomem;
3668
3669 err = drbd_create_mempools();
3670 if (err)
3671 goto Enomem;
3672
Lars Ellenberg8c484ee2010-03-11 16:47:58 +01003673 drbd_proc = proc_create_data("drbd", S_IFREG | S_IRUGO , NULL, &drbd_proc_fops, NULL);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003674 if (!drbd_proc) {
3675 printk(KERN_ERR "drbd: unable to register proc file\n");
3676 goto Enomem;
3677 }
3678
3679 rwlock_init(&global_state_lock);
3680
3681 printk(KERN_INFO "drbd: initialized. "
3682 "Version: " REL_VERSION " (api:%d/proto:%d-%d)\n",
3683 API_VERSION, PRO_VERSION_MIN, PRO_VERSION_MAX);
3684 printk(KERN_INFO "drbd: %s\n", drbd_buildtag());
3685 printk(KERN_INFO "drbd: registered as block device major %d\n",
3686 DRBD_MAJOR);
3687 printk(KERN_INFO "drbd: minor_table @ 0x%p\n", minor_table);
3688
3689 return 0; /* Success! */
3690
3691Enomem:
3692 drbd_cleanup();
3693 if (err == -ENOMEM)
3694 /* currently always the case */
3695 printk(KERN_ERR "drbd: ran out of memory\n");
3696 else
3697 printk(KERN_ERR "drbd: initialization failure\n");
3698 return err;
3699}
3700
3701void drbd_free_bc(struct drbd_backing_dev *ldev)
3702{
3703 if (ldev == NULL)
3704 return;
3705
Tejun Heoe525fd82010-11-13 11:55:17 +01003706 blkdev_put(ldev->backing_bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
3707 blkdev_put(ldev->md_bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003708
3709 kfree(ldev);
3710}
3711
3712void drbd_free_sock(struct drbd_conf *mdev)
3713{
3714 if (mdev->data.socket) {
Lars Ellenberg4589d7f2010-03-03 02:25:33 +01003715 mutex_lock(&mdev->data.mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003716 kernel_sock_shutdown(mdev->data.socket, SHUT_RDWR);
3717 sock_release(mdev->data.socket);
3718 mdev->data.socket = NULL;
Lars Ellenberg4589d7f2010-03-03 02:25:33 +01003719 mutex_unlock(&mdev->data.mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003720 }
3721 if (mdev->meta.socket) {
Lars Ellenberg4589d7f2010-03-03 02:25:33 +01003722 mutex_lock(&mdev->meta.mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003723 kernel_sock_shutdown(mdev->meta.socket, SHUT_RDWR);
3724 sock_release(mdev->meta.socket);
3725 mdev->meta.socket = NULL;
Lars Ellenberg4589d7f2010-03-03 02:25:33 +01003726 mutex_unlock(&mdev->meta.mutex);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003727 }
3728}
3729
3730
3731void drbd_free_resources(struct drbd_conf *mdev)
3732{
3733 crypto_free_hash(mdev->csums_tfm);
3734 mdev->csums_tfm = NULL;
3735 crypto_free_hash(mdev->verify_tfm);
3736 mdev->verify_tfm = NULL;
3737 crypto_free_hash(mdev->cram_hmac_tfm);
3738 mdev->cram_hmac_tfm = NULL;
3739 crypto_free_hash(mdev->integrity_w_tfm);
3740 mdev->integrity_w_tfm = NULL;
3741 crypto_free_hash(mdev->integrity_r_tfm);
3742 mdev->integrity_r_tfm = NULL;
3743
3744 drbd_free_sock(mdev);
3745
3746 __no_warn(local,
3747 drbd_free_bc(mdev->ldev);
3748 mdev->ldev = NULL;);
3749}
3750
3751/* meta data management */
3752
3753struct meta_data_on_disk {
3754 u64 la_size; /* last agreed size. */
3755 u64 uuid[UI_SIZE]; /* UUIDs. */
3756 u64 device_uuid;
3757 u64 reserved_u64_1;
3758 u32 flags; /* MDF */
3759 u32 magic;
3760 u32 md_size_sect;
3761 u32 al_offset; /* offset to this block */
3762 u32 al_nr_extents; /* important for restoring the AL */
3763 /* `-- act_log->nr_elements <-- sync_conf.al_extents */
3764 u32 bm_offset; /* offset to the bitmap, from here */
3765 u32 bm_bytes_per_bit; /* BM_BLOCK_SIZE */
Philipp Reisner99432fc2011-05-20 16:39:13 +02003766 u32 la_peer_max_bio_size; /* last peer max_bio_size */
3767 u32 reserved_u32[3];
Philipp Reisnerb411b362009-09-25 16:07:19 -07003768
3769} __packed;
3770
3771/**
3772 * drbd_md_sync() - Writes the meta data super block if the MD_DIRTY flag bit is set
3773 * @mdev: DRBD device.
3774 */
3775void drbd_md_sync(struct drbd_conf *mdev)
3776{
3777 struct meta_data_on_disk *buffer;
3778 sector_t sector;
3779 int i;
3780
Lars Ellenbergee15b032010-09-03 10:00:09 +02003781 del_timer(&mdev->md_sync_timer);
3782 /* timer may be rearmed by drbd_md_mark_dirty() now. */
Philipp Reisnerb411b362009-09-25 16:07:19 -07003783 if (!test_and_clear_bit(MD_DIRTY, &mdev->flags))
3784 return;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003785
3786 /* We use here D_FAILED and not D_ATTACHING because we try to write
3787 * metadata even if we detach due to a disk failure! */
3788 if (!get_ldev_if_state(mdev, D_FAILED))
3789 return;
3790
Philipp Reisnere1711732011-06-27 11:51:46 +02003791 buffer = drbd_md_get_buffer(mdev);
3792 if (!buffer)
3793 goto out;
3794
Philipp Reisnerb411b362009-09-25 16:07:19 -07003795 memset(buffer, 0, 512);
3796
3797 buffer->la_size = cpu_to_be64(drbd_get_capacity(mdev->this_bdev));
3798 for (i = UI_CURRENT; i < UI_SIZE; i++)
3799 buffer->uuid[i] = cpu_to_be64(mdev->ldev->md.uuid[i]);
3800 buffer->flags = cpu_to_be32(mdev->ldev->md.flags);
3801 buffer->magic = cpu_to_be32(DRBD_MD_MAGIC);
3802
3803 buffer->md_size_sect = cpu_to_be32(mdev->ldev->md.md_size_sect);
3804 buffer->al_offset = cpu_to_be32(mdev->ldev->md.al_offset);
3805 buffer->al_nr_extents = cpu_to_be32(mdev->act_log->nr_elements);
3806 buffer->bm_bytes_per_bit = cpu_to_be32(BM_BLOCK_SIZE);
3807 buffer->device_uuid = cpu_to_be64(mdev->ldev->md.device_uuid);
3808
3809 buffer->bm_offset = cpu_to_be32(mdev->ldev->md.bm_offset);
Philipp Reisner99432fc2011-05-20 16:39:13 +02003810 buffer->la_peer_max_bio_size = cpu_to_be32(mdev->peer_max_bio_size);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003811
3812 D_ASSERT(drbd_md_ss__(mdev, mdev->ldev) == mdev->ldev->md.md_offset);
3813 sector = mdev->ldev->md.md_offset;
3814
Lars Ellenberg3f3a9b82010-09-01 15:12:12 +02003815 if (!drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07003816 /* this was a try anyways ... */
3817 dev_err(DEV, "meta data update failed!\n");
Andreas Gruenbacher81e84652010-12-09 15:03:57 +01003818 drbd_chk_io_error(mdev, 1, true);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003819 }
3820
3821 /* Update mdev->ldev->md.la_size_sect,
3822 * since we updated it on metadata. */
3823 mdev->ldev->md.la_size_sect = drbd_get_capacity(mdev->this_bdev);
3824
Philipp Reisnere1711732011-06-27 11:51:46 +02003825 drbd_md_put_buffer(mdev);
3826out:
Philipp Reisnerb411b362009-09-25 16:07:19 -07003827 put_ldev(mdev);
3828}
3829
3830/**
3831 * drbd_md_read() - Reads in the meta data super block
3832 * @mdev: DRBD device.
3833 * @bdev: Device from which the meta data should be read in.
3834 *
Andreas Gruenbacher116676c2010-12-08 13:33:11 +01003835 * Return 0 (NO_ERROR) on success, and an enum drbd_ret_code in case
Philipp Reisnerb411b362009-09-25 16:07:19 -07003836 * something goes wrong. Currently only: ERR_IO_MD_DISK, ERR_MD_INVALID.
3837 */
3838int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
3839{
3840 struct meta_data_on_disk *buffer;
3841 int i, rv = NO_ERROR;
3842
3843 if (!get_ldev_if_state(mdev, D_ATTACHING))
3844 return ERR_IO_MD_DISK;
3845
Philipp Reisnere1711732011-06-27 11:51:46 +02003846 buffer = drbd_md_get_buffer(mdev);
3847 if (!buffer)
3848 goto out;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003849
3850 if (!drbd_md_sync_page_io(mdev, bdev, bdev->md.md_offset, READ)) {
Lucas De Marchi25985ed2011-03-30 22:57:33 -03003851 /* NOTE: can't do normal error processing here as this is
Philipp Reisnerb411b362009-09-25 16:07:19 -07003852 called BEFORE disk is attached */
3853 dev_err(DEV, "Error while reading metadata.\n");
3854 rv = ERR_IO_MD_DISK;
3855 goto err;
3856 }
3857
3858 if (be32_to_cpu(buffer->magic) != DRBD_MD_MAGIC) {
3859 dev_err(DEV, "Error while reading metadata, magic not found.\n");
3860 rv = ERR_MD_INVALID;
3861 goto err;
3862 }
3863 if (be32_to_cpu(buffer->al_offset) != bdev->md.al_offset) {
3864 dev_err(DEV, "unexpected al_offset: %d (expected %d)\n",
3865 be32_to_cpu(buffer->al_offset), bdev->md.al_offset);
3866 rv = ERR_MD_INVALID;
3867 goto err;
3868 }
3869 if (be32_to_cpu(buffer->bm_offset) != bdev->md.bm_offset) {
3870 dev_err(DEV, "unexpected bm_offset: %d (expected %d)\n",
3871 be32_to_cpu(buffer->bm_offset), bdev->md.bm_offset);
3872 rv = ERR_MD_INVALID;
3873 goto err;
3874 }
3875 if (be32_to_cpu(buffer->md_size_sect) != bdev->md.md_size_sect) {
3876 dev_err(DEV, "unexpected md_size: %u (expected %u)\n",
3877 be32_to_cpu(buffer->md_size_sect), bdev->md.md_size_sect);
3878 rv = ERR_MD_INVALID;
3879 goto err;
3880 }
3881
3882 if (be32_to_cpu(buffer->bm_bytes_per_bit) != BM_BLOCK_SIZE) {
3883 dev_err(DEV, "unexpected bm_bytes_per_bit: %u (expected %u)\n",
3884 be32_to_cpu(buffer->bm_bytes_per_bit), BM_BLOCK_SIZE);
3885 rv = ERR_MD_INVALID;
3886 goto err;
3887 }
3888
3889 bdev->md.la_size_sect = be64_to_cpu(buffer->la_size);
3890 for (i = UI_CURRENT; i < UI_SIZE; i++)
3891 bdev->md.uuid[i] = be64_to_cpu(buffer->uuid[i]);
3892 bdev->md.flags = be32_to_cpu(buffer->flags);
3893 mdev->sync_conf.al_extents = be32_to_cpu(buffer->al_nr_extents);
3894 bdev->md.device_uuid = be64_to_cpu(buffer->device_uuid);
3895
Philipp Reisner99432fc2011-05-20 16:39:13 +02003896 spin_lock_irq(&mdev->req_lock);
3897 if (mdev->state.conn < C_CONNECTED) {
3898 int peer;
3899 peer = be32_to_cpu(buffer->la_peer_max_bio_size);
3900 peer = max_t(int, peer, DRBD_MAX_BIO_SIZE_SAFE);
3901 mdev->peer_max_bio_size = peer;
3902 }
3903 spin_unlock_irq(&mdev->req_lock);
3904
Philipp Reisnerb411b362009-09-25 16:07:19 -07003905 if (mdev->sync_conf.al_extents < 7)
3906 mdev->sync_conf.al_extents = 127;
3907
3908 err:
Philipp Reisnere1711732011-06-27 11:51:46 +02003909 drbd_md_put_buffer(mdev);
3910 out:
Philipp Reisnerb411b362009-09-25 16:07:19 -07003911 put_ldev(mdev);
3912
3913 return rv;
3914}
3915
3916/**
3917 * drbd_md_mark_dirty() - Mark meta data super block as dirty
3918 * @mdev: DRBD device.
3919 *
3920 * Call this function if you change anything that should be written to
3921 * the meta-data super block. This function sets MD_DIRTY, and starts a
3922 * timer that ensures that within five seconds you have to call drbd_md_sync().
3923 */
Lars Ellenbergca0e6092010-10-14 15:01:21 +02003924#ifdef DEBUG
Lars Ellenbergee15b032010-09-03 10:00:09 +02003925void drbd_md_mark_dirty_(struct drbd_conf *mdev, unsigned int line, const char *func)
3926{
3927 if (!test_and_set_bit(MD_DIRTY, &mdev->flags)) {
3928 mod_timer(&mdev->md_sync_timer, jiffies + HZ);
3929 mdev->last_md_mark_dirty.line = line;
3930 mdev->last_md_mark_dirty.func = func;
3931 }
3932}
3933#else
Philipp Reisnerb411b362009-09-25 16:07:19 -07003934void drbd_md_mark_dirty(struct drbd_conf *mdev)
3935{
Lars Ellenbergee15b032010-09-03 10:00:09 +02003936 if (!test_and_set_bit(MD_DIRTY, &mdev->flags))
Lars Ellenbergca0e6092010-10-14 15:01:21 +02003937 mod_timer(&mdev->md_sync_timer, jiffies + 5*HZ);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003938}
Lars Ellenbergee15b032010-09-03 10:00:09 +02003939#endif
Philipp Reisnerb411b362009-09-25 16:07:19 -07003940
3941static void drbd_uuid_move_history(struct drbd_conf *mdev) __must_hold(local)
3942{
3943 int i;
3944
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003945 for (i = UI_HISTORY_START; i < UI_HISTORY_END; i++)
Philipp Reisnerb411b362009-09-25 16:07:19 -07003946 mdev->ldev->md.uuid[i+1] = mdev->ldev->md.uuid[i];
Philipp Reisnerb411b362009-09-25 16:07:19 -07003947}
3948
3949void _drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local)
3950{
3951 if (idx == UI_CURRENT) {
3952 if (mdev->state.role == R_PRIMARY)
3953 val |= 1;
3954 else
3955 val &= ~((u64)1);
3956
3957 drbd_set_ed_uuid(mdev, val);
3958 }
3959
3960 mdev->ldev->md.uuid[idx] = val;
Philipp Reisnerb411b362009-09-25 16:07:19 -07003961 drbd_md_mark_dirty(mdev);
3962}
3963
3964
3965void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local)
3966{
3967 if (mdev->ldev->md.uuid[idx]) {
3968 drbd_uuid_move_history(mdev);
3969 mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[idx];
Philipp Reisnerb411b362009-09-25 16:07:19 -07003970 }
3971 _drbd_uuid_set(mdev, idx, val);
3972}
3973
3974/**
3975 * drbd_uuid_new_current() - Creates a new current UUID
3976 * @mdev: DRBD device.
3977 *
3978 * Creates a new current UUID, and rotates the old current UUID into
3979 * the bitmap slot. Causes an incremental resync upon next connect.
3980 */
3981void drbd_uuid_new_current(struct drbd_conf *mdev) __must_hold(local)
3982{
3983 u64 val;
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003984 unsigned long long bm_uuid = mdev->ldev->md.uuid[UI_BITMAP];
Philipp Reisnerb411b362009-09-25 16:07:19 -07003985
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003986 if (bm_uuid)
3987 dev_warn(DEV, "bm UUID was already set: %llX\n", bm_uuid);
3988
Philipp Reisnerb411b362009-09-25 16:07:19 -07003989 mdev->ldev->md.uuid[UI_BITMAP] = mdev->ldev->md.uuid[UI_CURRENT];
Philipp Reisnerb411b362009-09-25 16:07:19 -07003990
3991 get_random_bytes(&val, sizeof(u64));
3992 _drbd_uuid_set(mdev, UI_CURRENT, val);
Lars Ellenberg62b0da32011-01-20 13:25:21 +01003993 drbd_print_uuids(mdev, "new current UUID");
Lars Ellenbergaaa8e2b2010-10-15 13:16:53 +02003994 /* get it to stable storage _now_ */
3995 drbd_md_sync(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07003996}
3997
3998void drbd_uuid_set_bm(struct drbd_conf *mdev, u64 val) __must_hold(local)
3999{
4000 if (mdev->ldev->md.uuid[UI_BITMAP] == 0 && val == 0)
4001 return;
4002
4003 if (val == 0) {
4004 drbd_uuid_move_history(mdev);
4005 mdev->ldev->md.uuid[UI_HISTORY_START] = mdev->ldev->md.uuid[UI_BITMAP];
4006 mdev->ldev->md.uuid[UI_BITMAP] = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004007 } else {
Lars Ellenberg62b0da32011-01-20 13:25:21 +01004008 unsigned long long bm_uuid = mdev->ldev->md.uuid[UI_BITMAP];
4009 if (bm_uuid)
4010 dev_warn(DEV, "bm UUID was already set: %llX\n", bm_uuid);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004011
Lars Ellenberg62b0da32011-01-20 13:25:21 +01004012 mdev->ldev->md.uuid[UI_BITMAP] = val & ~((u64)1);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004013 }
4014 drbd_md_mark_dirty(mdev);
4015}
4016
4017/**
4018 * drbd_bmio_set_n_write() - io_fn for drbd_queue_bitmap_io() or drbd_bitmap_io()
4019 * @mdev: DRBD device.
4020 *
4021 * Sets all bits in the bitmap and writes the whole bitmap to stable storage.
4022 */
4023int drbd_bmio_set_n_write(struct drbd_conf *mdev)
4024{
4025 int rv = -EIO;
4026
4027 if (get_ldev_if_state(mdev, D_ATTACHING)) {
4028 drbd_md_set_flag(mdev, MDF_FULL_SYNC);
4029 drbd_md_sync(mdev);
4030 drbd_bm_set_all(mdev);
4031
4032 rv = drbd_bm_write(mdev);
4033
4034 if (!rv) {
4035 drbd_md_clear_flag(mdev, MDF_FULL_SYNC);
4036 drbd_md_sync(mdev);
4037 }
4038
4039 put_ldev(mdev);
4040 }
4041
4042 return rv;
4043}
4044
4045/**
4046 * drbd_bmio_clear_n_write() - io_fn for drbd_queue_bitmap_io() or drbd_bitmap_io()
4047 * @mdev: DRBD device.
4048 *
4049 * Clears all bits in the bitmap and writes the whole bitmap to stable storage.
4050 */
4051int drbd_bmio_clear_n_write(struct drbd_conf *mdev)
4052{
4053 int rv = -EIO;
4054
Philipp Reisner07782862010-08-31 12:00:50 +02004055 drbd_resume_al(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004056 if (get_ldev_if_state(mdev, D_ATTACHING)) {
4057 drbd_bm_clear_all(mdev);
4058 rv = drbd_bm_write(mdev);
4059 put_ldev(mdev);
4060 }
4061
4062 return rv;
4063}
4064
4065static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused)
4066{
4067 struct bm_io_work *work = container_of(w, struct bm_io_work, w);
Lars Ellenberg02851e92010-12-16 14:47:39 +01004068 int rv = -EIO;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004069
4070 D_ASSERT(atomic_read(&mdev->ap_bio_cnt) == 0);
4071
Lars Ellenberg02851e92010-12-16 14:47:39 +01004072 if (get_ldev(mdev)) {
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004073 drbd_bm_lock(mdev, work->why, work->flags);
Lars Ellenberg02851e92010-12-16 14:47:39 +01004074 rv = work->io_fn(mdev);
4075 drbd_bm_unlock(mdev);
4076 put_ldev(mdev);
4077 }
Philipp Reisnerb411b362009-09-25 16:07:19 -07004078
4079 clear_bit(BITMAP_IO, &mdev->flags);
Philipp Reisner127b3172010-11-16 10:07:53 +01004080 smp_mb__after_clear_bit();
Philipp Reisnerb411b362009-09-25 16:07:19 -07004081 wake_up(&mdev->misc_wait);
4082
4083 if (work->done)
4084 work->done(mdev, rv);
4085
4086 clear_bit(BITMAP_IO_QUEUED, &mdev->flags);
4087 work->why = NULL;
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004088 work->flags = 0;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004089
4090 return 1;
4091}
4092
Lars Ellenberg82f59cc2010-10-16 12:13:47 +02004093void drbd_ldev_destroy(struct drbd_conf *mdev)
4094{
4095 lc_destroy(mdev->resync);
4096 mdev->resync = NULL;
4097 lc_destroy(mdev->act_log);
4098 mdev->act_log = NULL;
4099 __no_warn(local,
4100 drbd_free_bc(mdev->ldev);
4101 mdev->ldev = NULL;);
4102
4103 if (mdev->md_io_tmpp) {
4104 __free_page(mdev->md_io_tmpp);
4105 mdev->md_io_tmpp = NULL;
4106 }
4107 clear_bit(GO_DISKLESS, &mdev->flags);
4108}
4109
Lars Ellenberge9e6f3e2010-09-14 20:26:27 +02004110static int w_go_diskless(struct drbd_conf *mdev, struct drbd_work *w, int unused)
4111{
4112 D_ASSERT(mdev->state.disk == D_FAILED);
Lars Ellenberg9d282872010-10-14 13:57:07 +02004113 /* we cannot assert local_cnt == 0 here, as get_ldev_if_state will
4114 * inc/dec it frequently. Once we are D_DISKLESS, no one will touch
Lars Ellenberg82f59cc2010-10-16 12:13:47 +02004115 * the protected members anymore, though, so once put_ldev reaches zero
4116 * again, it will be safe to free them. */
Lars Ellenberge9e6f3e2010-09-14 20:26:27 +02004117 drbd_force_state(mdev, NS(disk, D_DISKLESS));
Lars Ellenberge9e6f3e2010-09-14 20:26:27 +02004118 return 1;
4119}
4120
4121void drbd_go_diskless(struct drbd_conf *mdev)
4122{
4123 D_ASSERT(mdev->state.disk == D_FAILED);
4124 if (!test_and_set_bit(GO_DISKLESS, &mdev->flags))
Lars Ellenberg9d282872010-10-14 13:57:07 +02004125 drbd_queue_work(&mdev->data.work, &mdev->go_diskless);
Lars Ellenberge9e6f3e2010-09-14 20:26:27 +02004126}
4127
Philipp Reisnerb411b362009-09-25 16:07:19 -07004128/**
4129 * drbd_queue_bitmap_io() - Queues an IO operation on the whole bitmap
4130 * @mdev: DRBD device.
4131 * @io_fn: IO callback to be called when bitmap IO is possible
4132 * @done: callback to be called after the bitmap IO was performed
4133 * @why: Descriptive text of the reason for doing the IO
4134 *
4135 * While IO on the bitmap happens we freeze application IO thus we ensure
4136 * that drbd_set_out_of_sync() can not be called. This function MAY ONLY be
4137 * called from worker context. It MUST NOT be used while a previous such
4138 * work is still pending!
4139 */
4140void drbd_queue_bitmap_io(struct drbd_conf *mdev,
4141 int (*io_fn)(struct drbd_conf *),
4142 void (*done)(struct drbd_conf *, int),
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004143 char *why, enum bm_flag flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004144{
4145 D_ASSERT(current == mdev->worker.task);
4146
4147 D_ASSERT(!test_bit(BITMAP_IO_QUEUED, &mdev->flags));
4148 D_ASSERT(!test_bit(BITMAP_IO, &mdev->flags));
4149 D_ASSERT(list_empty(&mdev->bm_io_work.w.list));
4150 if (mdev->bm_io_work.why)
4151 dev_err(DEV, "FIXME going to queue '%s' but '%s' still pending?\n",
4152 why, mdev->bm_io_work.why);
4153
4154 mdev->bm_io_work.io_fn = io_fn;
4155 mdev->bm_io_work.done = done;
4156 mdev->bm_io_work.why = why;
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004157 mdev->bm_io_work.flags = flags;
Philipp Reisnerb411b362009-09-25 16:07:19 -07004158
Philipp Reisner22afd7e2010-11-16 15:30:44 +01004159 spin_lock_irq(&mdev->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004160 set_bit(BITMAP_IO, &mdev->flags);
4161 if (atomic_read(&mdev->ap_bio_cnt) == 0) {
Philipp Reisner127b3172010-11-16 10:07:53 +01004162 if (!test_and_set_bit(BITMAP_IO_QUEUED, &mdev->flags))
Philipp Reisnerb411b362009-09-25 16:07:19 -07004163 drbd_queue_work(&mdev->data.work, &mdev->bm_io_work.w);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004164 }
Philipp Reisner22afd7e2010-11-16 15:30:44 +01004165 spin_unlock_irq(&mdev->req_lock);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004166}
4167
4168/**
4169 * drbd_bitmap_io() - Does an IO operation on the whole bitmap
4170 * @mdev: DRBD device.
4171 * @io_fn: IO callback to be called when bitmap IO is possible
4172 * @why: Descriptive text of the reason for doing the IO
4173 *
4174 * freezes application IO while that the actual IO operations runs. This
4175 * functions MAY NOT be called from worker context.
4176 */
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004177int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *),
4178 char *why, enum bm_flag flags)
Philipp Reisnerb411b362009-09-25 16:07:19 -07004179{
4180 int rv;
4181
4182 D_ASSERT(current != mdev->worker.task);
4183
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004184 if ((flags & BM_LOCKED_SET_ALLOWED) == 0)
4185 drbd_suspend_io(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004186
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004187 drbd_bm_lock(mdev, why, flags);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004188 rv = io_fn(mdev);
4189 drbd_bm_unlock(mdev);
4190
Lars Ellenberg20ceb2b2011-01-21 10:56:44 +01004191 if ((flags & BM_LOCKED_SET_ALLOWED) == 0)
4192 drbd_resume_io(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004193
4194 return rv;
4195}
4196
4197void drbd_md_set_flag(struct drbd_conf *mdev, int flag) __must_hold(local)
4198{
4199 if ((mdev->ldev->md.flags & flag) != flag) {
4200 drbd_md_mark_dirty(mdev);
4201 mdev->ldev->md.flags |= flag;
4202 }
4203}
4204
4205void drbd_md_clear_flag(struct drbd_conf *mdev, int flag) __must_hold(local)
4206{
4207 if ((mdev->ldev->md.flags & flag) != 0) {
4208 drbd_md_mark_dirty(mdev);
4209 mdev->ldev->md.flags &= ~flag;
4210 }
4211}
4212int drbd_md_test_flag(struct drbd_backing_dev *bdev, int flag)
4213{
4214 return (bdev->md.flags & flag) != 0;
4215}
4216
4217static void md_sync_timer_fn(unsigned long data)
4218{
4219 struct drbd_conf *mdev = (struct drbd_conf *) data;
4220
4221 drbd_queue_work_front(&mdev->data.work, &mdev->md_sync_work);
4222}
4223
4224static int w_md_sync(struct drbd_conf *mdev, struct drbd_work *w, int unused)
4225{
4226 dev_warn(DEV, "md_sync_timer expired! Worker calls drbd_md_sync().\n");
Lars Ellenbergee15b032010-09-03 10:00:09 +02004227#ifdef DEBUG
4228 dev_warn(DEV, "last md_mark_dirty: %s:%u\n",
4229 mdev->last_md_mark_dirty.func, mdev->last_md_mark_dirty.line);
4230#endif
Philipp Reisnerb411b362009-09-25 16:07:19 -07004231 drbd_md_sync(mdev);
Philipp Reisnerb411b362009-09-25 16:07:19 -07004232 return 1;
4233}
4234
4235#ifdef CONFIG_DRBD_FAULT_INJECTION
4236/* Fault insertion support including random number generator shamelessly
4237 * stolen from kernel/rcutorture.c */
4238struct fault_random_state {
4239 unsigned long state;
4240 unsigned long count;
4241};
4242
4243#define FAULT_RANDOM_MULT 39916801 /* prime */
4244#define FAULT_RANDOM_ADD 479001701 /* prime */
4245#define FAULT_RANDOM_REFRESH 10000
4246
4247/*
4248 * Crude but fast random-number generator. Uses a linear congruential
4249 * generator, with occasional help from get_random_bytes().
4250 */
4251static unsigned long
4252_drbd_fault_random(struct fault_random_state *rsp)
4253{
4254 long refresh;
4255
Roel Kluin49829ea2009-12-15 22:55:44 +01004256 if (!rsp->count--) {
Philipp Reisnerb411b362009-09-25 16:07:19 -07004257 get_random_bytes(&refresh, sizeof(refresh));
4258 rsp->state += refresh;
4259 rsp->count = FAULT_RANDOM_REFRESH;
4260 }
4261 rsp->state = rsp->state * FAULT_RANDOM_MULT + FAULT_RANDOM_ADD;
4262 return swahw32(rsp->state);
4263}
4264
4265static char *
4266_drbd_fault_str(unsigned int type) {
4267 static char *_faults[] = {
4268 [DRBD_FAULT_MD_WR] = "Meta-data write",
4269 [DRBD_FAULT_MD_RD] = "Meta-data read",
4270 [DRBD_FAULT_RS_WR] = "Resync write",
4271 [DRBD_FAULT_RS_RD] = "Resync read",
4272 [DRBD_FAULT_DT_WR] = "Data write",
4273 [DRBD_FAULT_DT_RD] = "Data read",
4274 [DRBD_FAULT_DT_RA] = "Data read ahead",
4275 [DRBD_FAULT_BM_ALLOC] = "BM allocation",
Philipp Reisner6b4388a2010-04-26 14:11:45 +02004276 [DRBD_FAULT_AL_EE] = "EE allocation",
4277 [DRBD_FAULT_RECEIVE] = "receive data corruption",
Philipp Reisnerb411b362009-09-25 16:07:19 -07004278 };
4279
4280 return (type < DRBD_FAULT_MAX) ? _faults[type] : "**Unknown**";
4281}
4282
4283unsigned int
4284_drbd_insert_fault(struct drbd_conf *mdev, unsigned int type)
4285{
4286 static struct fault_random_state rrs = {0, 0};
4287
4288 unsigned int ret = (
4289 (fault_devs == 0 ||
4290 ((1 << mdev_to_minor(mdev)) & fault_devs) != 0) &&
4291 (((_drbd_fault_random(&rrs) % 100) + 1) <= fault_rate));
4292
4293 if (ret) {
4294 fault_count++;
4295
Lars Ellenberg73835062010-05-27 11:51:56 +02004296 if (__ratelimit(&drbd_ratelimit_state))
Philipp Reisnerb411b362009-09-25 16:07:19 -07004297 dev_warn(DEV, "***Simulating %s failure\n",
4298 _drbd_fault_str(type));
4299 }
4300
4301 return ret;
4302}
4303#endif
4304
4305const char *drbd_buildtag(void)
4306{
4307 /* DRBD built from external sources has here a reference to the
4308 git hash of the source code. */
4309
4310 static char buildtag[38] = "\0uilt-in";
4311
4312 if (buildtag[0] == 0) {
4313#ifdef CONFIG_MODULES
4314 if (THIS_MODULE != NULL)
4315 sprintf(buildtag, "srcversion: %-24s", THIS_MODULE->srcversion);
4316 else
4317#endif
4318 buildtag[0] = 'b';
4319 }
4320
4321 return buildtag;
4322}
4323
4324module_init(drbd_init)
4325module_exit(drbd_cleanup)
4326
Philipp Reisnerb411b362009-09-25 16:07:19 -07004327EXPORT_SYMBOL(drbd_conn_str);
4328EXPORT_SYMBOL(drbd_role_str);
4329EXPORT_SYMBOL(drbd_disk_str);
4330EXPORT_SYMBOL(drbd_set_st_err_str);