blob: 26133f05ae3a7690911815358ad1f350343564b6 [file] [log] [blame]
David Teiglande7fd4172006-01-18 09:30:29 +00001/******************************************************************************
2*******************************************************************************
3**
David Teigland46b43ee2008-01-08 16:24:00 -06004** Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved.
David Teiglande7fd4172006-01-18 09:30:29 +00005**
6** This copyrighted material is made available to anyone wishing to use,
7** modify, copy, or redistribute it subject to the terms and conditions
8** of the GNU General Public License v.2.
9**
10*******************************************************************************
11******************************************************************************/
12
13#include "dlm_internal.h"
14#include "lockspace.h"
15#include "member.h"
16#include "recoverd.h"
17#include "recover.h"
David Teiglande7fd4172006-01-18 09:30:29 +000018#include "rcom.h"
19#include "config.h"
20
David Teiglande7fd4172006-01-18 09:30:29 +000021static void add_ordered_member(struct dlm_ls *ls, struct dlm_member *new)
22{
23 struct dlm_member *memb = NULL;
24 struct list_head *tmp;
25 struct list_head *newlist = &new->list;
26 struct list_head *head = &ls->ls_nodes;
27
28 list_for_each(tmp, head) {
29 memb = list_entry(tmp, struct dlm_member, list);
30 if (new->nodeid < memb->nodeid)
31 break;
32 }
33
34 if (!memb)
35 list_add_tail(newlist, head);
36 else {
37 /* FIXME: can use list macro here */
38 newlist->prev = tmp->prev;
39 newlist->next = tmp;
40 tmp->prev->next = newlist;
41 tmp->prev = newlist;
42 }
43}
44
45static int dlm_add_member(struct dlm_ls *ls, int nodeid)
46{
47 struct dlm_member *memb;
48 int w;
49
David Teigland90135922006-01-20 08:47:07 +000050 memb = kzalloc(sizeof(struct dlm_member), GFP_KERNEL);
David Teiglande7fd4172006-01-18 09:30:29 +000051 if (!memb)
52 return -ENOMEM;
53
54 w = dlm_node_weight(ls->ls_name, nodeid);
Jesper Juhl1a2bf2e2007-07-19 00:27:43 +020055 if (w < 0) {
56 kfree(memb);
David Teiglande7fd4172006-01-18 09:30:29 +000057 return w;
Jesper Juhl1a2bf2e2007-07-19 00:27:43 +020058 }
David Teiglande7fd4172006-01-18 09:30:29 +000059
60 memb->nodeid = nodeid;
61 memb->weight = w;
62 add_ordered_member(ls, memb);
63 ls->ls_num_nodes++;
64 return 0;
65}
66
67static void dlm_remove_member(struct dlm_ls *ls, struct dlm_member *memb)
68{
69 list_move(&memb->list, &ls->ls_nodes_gone);
70 ls->ls_num_nodes--;
71}
72
David Teigland46b43ee2008-01-08 16:24:00 -060073int dlm_is_member(struct dlm_ls *ls, int nodeid)
David Teiglande7fd4172006-01-18 09:30:29 +000074{
75 struct dlm_member *memb;
76
77 list_for_each_entry(memb, &ls->ls_nodes, list) {
78 if (memb->nodeid == nodeid)
David Teigland90135922006-01-20 08:47:07 +000079 return 1;
David Teiglande7fd4172006-01-18 09:30:29 +000080 }
David Teigland90135922006-01-20 08:47:07 +000081 return 0;
David Teiglande7fd4172006-01-18 09:30:29 +000082}
83
84int dlm_is_removed(struct dlm_ls *ls, int nodeid)
85{
86 struct dlm_member *memb;
87
88 list_for_each_entry(memb, &ls->ls_nodes_gone, list) {
89 if (memb->nodeid == nodeid)
David Teigland90135922006-01-20 08:47:07 +000090 return 1;
David Teiglande7fd4172006-01-18 09:30:29 +000091 }
David Teigland90135922006-01-20 08:47:07 +000092 return 0;
David Teiglande7fd4172006-01-18 09:30:29 +000093}
94
95static void clear_memb_list(struct list_head *head)
96{
97 struct dlm_member *memb;
98
99 while (!list_empty(head)) {
100 memb = list_entry(head->next, struct dlm_member, list);
101 list_del(&memb->list);
102 kfree(memb);
103 }
104}
105
106void dlm_clear_members(struct dlm_ls *ls)
107{
108 clear_memb_list(&ls->ls_nodes);
109 ls->ls_num_nodes = 0;
110}
111
112void dlm_clear_members_gone(struct dlm_ls *ls)
113{
114 clear_memb_list(&ls->ls_nodes_gone);
115}
116
117static void make_member_array(struct dlm_ls *ls)
118{
119 struct dlm_member *memb;
120 int i, w, x = 0, total = 0, all_zero = 0, *array;
121
122 kfree(ls->ls_node_array);
123 ls->ls_node_array = NULL;
124
125 list_for_each_entry(memb, &ls->ls_nodes, list) {
126 if (memb->weight)
127 total += memb->weight;
128 }
129
130 /* all nodes revert to weight of 1 if all have weight 0 */
131
132 if (!total) {
133 total = ls->ls_num_nodes;
134 all_zero = 1;
135 }
136
137 ls->ls_total_weight = total;
138
139 array = kmalloc(sizeof(int) * total, GFP_KERNEL);
140 if (!array)
141 return;
142
143 list_for_each_entry(memb, &ls->ls_nodes, list) {
144 if (!all_zero && !memb->weight)
145 continue;
146
147 if (all_zero)
148 w = 1;
149 else
150 w = memb->weight;
151
152 DLM_ASSERT(x < total, printk("total %d x %d\n", total, x););
153
154 for (i = 0; i < w; i++)
155 array[x++] = memb->nodeid;
156 }
157
158 ls->ls_node_array = array;
159}
160
161/* send a status request to all members just to establish comms connections */
162
David Teiglandf6db1b82006-08-08 17:06:07 -0500163static int ping_members(struct dlm_ls *ls)
David Teiglande7fd4172006-01-18 09:30:29 +0000164{
165 struct dlm_member *memb;
David Teiglandf6db1b82006-08-08 17:06:07 -0500166 int error = 0;
167
168 list_for_each_entry(memb, &ls->ls_nodes, list) {
169 error = dlm_recovery_stopped(ls);
170 if (error)
171 break;
172 error = dlm_rcom_status(ls, memb->nodeid);
173 if (error)
174 break;
175 }
176 if (error)
David Teiglandfaa0f262006-08-08 17:08:42 -0500177 log_debug(ls, "ping_members aborted %d last nodeid %d",
178 error, ls->ls_recover_nodeid);
David Teiglandf6db1b82006-08-08 17:06:07 -0500179 return error;
David Teiglande7fd4172006-01-18 09:30:29 +0000180}
181
182int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
183{
184 struct dlm_member *memb, *safe;
185 int i, error, found, pos = 0, neg = 0, low = -1;
186
David Teigland91c0dc92006-10-31 11:56:01 -0600187 /* previously removed members that we've not finished removing need to
188 count as a negative change so the "neg" recovery steps will happen */
189
190 list_for_each_entry(memb, &ls->ls_nodes_gone, list) {
191 log_debug(ls, "prev removed member %d", memb->nodeid);
192 neg++;
193 }
194
David Teiglande7fd4172006-01-18 09:30:29 +0000195 /* move departed members from ls_nodes to ls_nodes_gone */
196
197 list_for_each_entry_safe(memb, safe, &ls->ls_nodes, list) {
David Teigland90135922006-01-20 08:47:07 +0000198 found = 0;
David Teiglande7fd4172006-01-18 09:30:29 +0000199 for (i = 0; i < rv->node_count; i++) {
200 if (memb->nodeid == rv->nodeids[i]) {
David Teigland90135922006-01-20 08:47:07 +0000201 found = 1;
David Teiglande7fd4172006-01-18 09:30:29 +0000202 break;
203 }
204 }
205
206 if (!found) {
207 neg++;
208 dlm_remove_member(ls, memb);
209 log_debug(ls, "remove member %d", memb->nodeid);
210 }
211 }
212
David Teiglandd44e0fc2008-03-18 14:22:11 -0500213 /* Add an entry to ls_nodes_gone for members that were removed and
214 then added again, so that previous state for these nodes will be
215 cleared during recovery. */
216
217 for (i = 0; i < rv->new_count; i++) {
218 if (!dlm_is_member(ls, rv->new[i]))
219 continue;
220 log_debug(ls, "new nodeid %d is a re-added member", rv->new[i]);
221
222 memb = kzalloc(sizeof(struct dlm_member), GFP_KERNEL);
223 if (!memb)
224 return -ENOMEM;
225 memb->nodeid = rv->new[i];
226 list_add_tail(&memb->list, &ls->ls_nodes_gone);
227 neg++;
228 }
229
David Teiglande7fd4172006-01-18 09:30:29 +0000230 /* add new members to ls_nodes */
231
232 for (i = 0; i < rv->node_count; i++) {
233 if (dlm_is_member(ls, rv->nodeids[i]))
234 continue;
235 dlm_add_member(ls, rv->nodeids[i]);
236 pos++;
237 log_debug(ls, "add member %d", rv->nodeids[i]);
238 }
239
240 list_for_each_entry(memb, &ls->ls_nodes, list) {
241 if (low == -1 || memb->nodeid < low)
242 low = memb->nodeid;
243 }
244 ls->ls_low_nodeid = low;
245
246 make_member_array(ls);
247 dlm_set_recover_status(ls, DLM_RS_NODES);
248 *neg_out = neg;
249
David Teiglandf6db1b82006-08-08 17:06:07 -0500250 error = ping_members(ls);
David Teigland8b0e7b22007-05-18 09:03:35 -0500251 if (!error || error == -EPROTO) {
252 /* new_lockspace() may be waiting to know if the config
253 is good or bad */
254 ls->ls_members_result = error;
255 complete(&ls->ls_members_done);
256 }
David Teiglandf6db1b82006-08-08 17:06:07 -0500257 if (error)
258 goto out;
David Teiglande7fd4172006-01-18 09:30:29 +0000259
260 error = dlm_recover_members_wait(ls);
David Teiglandf6db1b82006-08-08 17:06:07 -0500261 out:
262 log_debug(ls, "total members %d error %d", ls->ls_num_nodes, error);
David Teiglande7fd4172006-01-18 09:30:29 +0000263 return error;
264}
265
David Teiglandc36258b2007-09-27 15:53:38 -0500266/* Userspace guarantees that dlm_ls_stop() has completed on all nodes before
267 dlm_ls_start() is called on any of them to start the new recovery. */
David Teiglande7fd4172006-01-18 09:30:29 +0000268
269int dlm_ls_stop(struct dlm_ls *ls)
270{
271 int new;
272
273 /*
David Teiglandc36258b2007-09-27 15:53:38 -0500274 * Prevent dlm_recv from being in the middle of something when we do
275 * the stop. This includes ensuring dlm_recv isn't processing a
276 * recovery message (rcom), while dlm_recoverd is aborting and
277 * resetting things from an in-progress recovery. i.e. we want
278 * dlm_recoverd to abort its recovery without worrying about dlm_recv
279 * processing an rcom at the same time. Stopping dlm_recv also makes
280 * it easy for dlm_receive_message() to check locking stopped and add a
281 * message to the requestqueue without races.
282 */
283
284 down_write(&ls->ls_recv_active);
285
286 /*
287 * Abort any recovery that's in progress (see RECOVERY_STOP,
288 * dlm_recovery_stopped()) and tell any other threads running in the
289 * dlm to quit any processing (see RUNNING, dlm_locking_stopped()).
David Teiglande7fd4172006-01-18 09:30:29 +0000290 */
291
292 spin_lock(&ls->ls_recover_lock);
293 set_bit(LSFL_RECOVERY_STOP, &ls->ls_flags);
294 new = test_and_clear_bit(LSFL_RUNNING, &ls->ls_flags);
295 ls->ls_recover_seq++;
296 spin_unlock(&ls->ls_recover_lock);
297
298 /*
David Teiglandc36258b2007-09-27 15:53:38 -0500299 * Let dlm_recv run again, now any normal messages will be saved on the
300 * requestqueue for later.
301 */
302
303 up_write(&ls->ls_recv_active);
304
305 /*
David Teiglande7fd4172006-01-18 09:30:29 +0000306 * This in_recovery lock does two things:
David Teiglande7fd4172006-01-18 09:30:29 +0000307 * 1) Keeps this function from returning until all threads are out
308 * of locking routines and locking is truely stopped.
309 * 2) Keeps any new requests from being processed until it's unlocked
310 * when recovery is complete.
311 */
312
313 if (new)
314 down_write(&ls->ls_in_recovery);
315
316 /*
317 * The recoverd suspend/resume makes sure that dlm_recoverd (if
David Teiglandc36258b2007-09-27 15:53:38 -0500318 * running) has noticed RECOVERY_STOP above and quit processing the
319 * previous recovery.
David Teiglande7fd4172006-01-18 09:30:29 +0000320 */
321
322 dlm_recoverd_suspend(ls);
323 ls->ls_recover_status = 0;
324 dlm_recoverd_resume(ls);
David Teigland3ae1acf2007-05-18 08:59:31 -0500325
326 if (!ls->ls_recover_begin)
327 ls->ls_recover_begin = jiffies;
David Teiglande7fd4172006-01-18 09:30:29 +0000328 return 0;
329}
330
331int dlm_ls_start(struct dlm_ls *ls)
332{
333 struct dlm_recover *rv = NULL, *rv_old;
David Teiglandd44e0fc2008-03-18 14:22:11 -0500334 int *ids = NULL, *new = NULL;
335 int error, ids_count = 0, new_count = 0;
David Teiglande7fd4172006-01-18 09:30:29 +0000336
David Teigland90135922006-01-20 08:47:07 +0000337 rv = kzalloc(sizeof(struct dlm_recover), GFP_KERNEL);
David Teiglande7fd4172006-01-18 09:30:29 +0000338 if (!rv)
339 return -ENOMEM;
David Teiglande7fd4172006-01-18 09:30:29 +0000340
David Teiglandd44e0fc2008-03-18 14:22:11 -0500341 error = dlm_nodeid_list(ls->ls_name, &ids, &ids_count,
342 &new, &new_count);
343 if (error < 0)
David Teiglande7fd4172006-01-18 09:30:29 +0000344 goto fail;
345
346 spin_lock(&ls->ls_recover_lock);
347
348 /* the lockspace needs to be stopped before it can be started */
349
350 if (!dlm_locking_stopped(ls)) {
351 spin_unlock(&ls->ls_recover_lock);
352 log_error(ls, "start ignored: lockspace running");
353 error = -EINVAL;
354 goto fail;
355 }
356
357 rv->nodeids = ids;
David Teiglandd44e0fc2008-03-18 14:22:11 -0500358 rv->node_count = ids_count;
359 rv->new = new;
360 rv->new_count = new_count;
David Teiglande7fd4172006-01-18 09:30:29 +0000361 rv->seq = ++ls->ls_recover_seq;
362 rv_old = ls->ls_recover_args;
363 ls->ls_recover_args = rv;
364 spin_unlock(&ls->ls_recover_lock);
365
366 if (rv_old) {
David Teiglandd44e0fc2008-03-18 14:22:11 -0500367 log_error(ls, "unused recovery %llx %d",
368 (unsigned long long)rv_old->seq, rv_old->node_count);
David Teiglande7fd4172006-01-18 09:30:29 +0000369 kfree(rv_old->nodeids);
David Teiglandd44e0fc2008-03-18 14:22:11 -0500370 kfree(rv_old->new);
David Teiglande7fd4172006-01-18 09:30:29 +0000371 kfree(rv_old);
372 }
373
374 dlm_recoverd_kick(ls);
375 return 0;
376
377 fail:
378 kfree(rv);
379 kfree(ids);
David Teiglandd44e0fc2008-03-18 14:22:11 -0500380 kfree(new);
David Teiglande7fd4172006-01-18 09:30:29 +0000381 return error;
382}
383