blob: 278a75cda4463006d4b2c2e85d75c7728d5b4526 [file] [log] [blame]
David Teiglande7fd4172006-01-18 09:30:29 +00001/******************************************************************************
2*******************************************************************************
3**
4** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
5** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved.
6**
7** This copyrighted material is made available to anyone wishing to use,
8** modify, copy, or redistribute it subject to the terms and conditions
9** of the GNU General Public License v.2.
10**
11*******************************************************************************
12******************************************************************************/
13
14#include "dlm_internal.h"
15#include "lockspace.h"
16#include "member.h"
17#include "lowcomms.h"
18#include "rcom.h"
19#include "config.h"
20#include "memory.h"
21#include "recover.h"
22#include "util.h"
23#include "lock.h"
24#include "dir.h"
25
David Teiglande7fd4172006-01-18 09:30:29 +000026/*
27 * We use the upper 16 bits of the hash value to select the directory node.
28 * Low bits are used for distribution of rsb's among hash buckets on each node.
29 *
30 * To give the exact range wanted (0 to num_nodes-1), we apply a modulus of
31 * num_nodes to the hash value. This value in the desired range is used as an
32 * offset into the sorted list of nodeid's to give the particular nodeid.
33 */
34
35int dlm_hash2nodeid(struct dlm_ls *ls, uint32_t hash)
36{
David Teiglandc04fecb2012-05-10 10:18:07 -050037 uint32_t node;
David Teiglande7fd4172006-01-18 09:30:29 +000038
David Teiglandc04fecb2012-05-10 10:18:07 -050039 if (ls->ls_num_nodes == 1)
40 return dlm_our_nodeid();
41 else {
David Teiglande7fd4172006-01-18 09:30:29 +000042 node = (hash >> 16) % ls->ls_total_weight;
David Teiglandc04fecb2012-05-10 10:18:07 -050043 return ls->ls_node_array[node];
David Teiglande7fd4172006-01-18 09:30:29 +000044 }
David Teiglande7fd4172006-01-18 09:30:29 +000045}
46
47int dlm_dir_nodeid(struct dlm_rsb *r)
48{
David Teiglandc04fecb2012-05-10 10:18:07 -050049 return r->res_dir_nodeid;
David Teiglande7fd4172006-01-18 09:30:29 +000050}
51
David Teiglandc04fecb2012-05-10 10:18:07 -050052void dlm_recover_dir_nodeid(struct dlm_ls *ls)
David Teiglande7fd4172006-01-18 09:30:29 +000053{
David Teiglandc04fecb2012-05-10 10:18:07 -050054 struct dlm_rsb *r;
David Teiglande7fd4172006-01-18 09:30:29 +000055
David Teiglandc04fecb2012-05-10 10:18:07 -050056 down_read(&ls->ls_root_sem);
57 list_for_each_entry(r, &ls->ls_root_list, res_root_list) {
58 r->res_dir_nodeid = dlm_hash2nodeid(ls, r->res_hash);
David Teiglande7fd4172006-01-18 09:30:29 +000059 }
David Teiglandc04fecb2012-05-10 10:18:07 -050060 up_read(&ls->ls_root_sem);
David Teiglande7fd4172006-01-18 09:30:29 +000061}
62
63int dlm_recover_directory(struct dlm_ls *ls)
64{
65 struct dlm_member *memb;
David Teiglande7fd4172006-01-18 09:30:29 +000066 char *b, *last_name = NULL;
David Teiglandc04fecb2012-05-10 10:18:07 -050067 int error = -ENOMEM, last_len, nodeid, result;
David Teiglande7fd4172006-01-18 09:30:29 +000068 uint16_t namelen;
David Teiglandc04fecb2012-05-10 10:18:07 -050069 unsigned int count = 0, count_match = 0, count_bad = 0, count_add = 0;
David Teiglande7fd4172006-01-18 09:30:29 +000070
71 log_debug(ls, "dlm_recover_directory");
72
73 if (dlm_no_directory(ls))
74 goto out_status;
75
David Teigland573c24c2009-11-30 16:34:43 -060076 last_name = kmalloc(DLM_RESNAME_MAXLEN, GFP_NOFS);
David Teiglande7fd4172006-01-18 09:30:29 +000077 if (!last_name)
78 goto out;
79
80 list_for_each_entry(memb, &ls->ls_nodes, list) {
David Teiglandc04fecb2012-05-10 10:18:07 -050081 if (memb->nodeid == dlm_our_nodeid())
82 continue;
83
David Teiglande7fd4172006-01-18 09:30:29 +000084 memset(last_name, 0, DLM_RESNAME_MAXLEN);
85 last_len = 0;
86
87 for (;;) {
Al Virocd9df1a2008-01-25 04:08:09 -050088 int left;
David Teiglande7fd4172006-01-18 09:30:29 +000089 error = dlm_recovery_stopped(ls);
90 if (error)
91 goto out_free;
92
93 error = dlm_rcom_names(ls, memb->nodeid,
94 last_name, last_len);
95 if (error)
96 goto out_free;
97
David Teiglandc04fecb2012-05-10 10:18:07 -050098 cond_resched();
David Teiglande7fd4172006-01-18 09:30:29 +000099
100 /*
101 * pick namelen/name pairs out of received buffer
102 */
103
Al Viro40076852008-01-25 03:01:51 -0500104 b = ls->ls_recover_buf->rc_buf;
Al Virocd9df1a2008-01-25 04:08:09 -0500105 left = ls->ls_recover_buf->rc_header.h_length;
106 left -= sizeof(struct dlm_rcom);
David Teiglande7fd4172006-01-18 09:30:29 +0000107
108 for (;;) {
Al Virocd9df1a2008-01-25 04:08:09 -0500109 __be16 v;
110
111 error = -EINVAL;
112 if (left < sizeof(__be16))
113 goto out_free;
114
115 memcpy(&v, b, sizeof(__be16));
116 namelen = be16_to_cpu(v);
117 b += sizeof(__be16);
118 left -= sizeof(__be16);
David Teiglande7fd4172006-01-18 09:30:29 +0000119
120 /* namelen of 0xFFFFF marks end of names for
121 this node; namelen of 0 marks end of the
122 buffer */
123
124 if (namelen == 0xFFFF)
125 goto done;
126 if (!namelen)
127 break;
128
Al Virocd9df1a2008-01-25 04:08:09 -0500129 if (namelen > left)
130 goto out_free;
131
132 if (namelen > DLM_RESNAME_MAXLEN)
133 goto out_free;
134
David Teiglandc04fecb2012-05-10 10:18:07 -0500135 error = dlm_master_lookup(ls, memb->nodeid,
136 b, namelen,
137 DLM_LU_RECOVER_DIR,
138 &nodeid, &result);
139 if (error) {
140 log_error(ls, "recover_dir lookup %d",
141 error);
David Teiglande7fd4172006-01-18 09:30:29 +0000142 goto out_free;
David Teiglandc04fecb2012-05-10 10:18:07 -0500143 }
David Teiglande7fd4172006-01-18 09:30:29 +0000144
David Teiglandc04fecb2012-05-10 10:18:07 -0500145 /* The name was found in rsbtbl, but the
146 * master nodeid is different from
147 * memb->nodeid which says it is the master.
148 * This should not happen. */
149
150 if (result == DLM_LU_MATCH &&
151 nodeid != memb->nodeid) {
152 count_bad++;
153 log_error(ls, "recover_dir lookup %d "
154 "nodeid %d memb %d bad %u",
155 result, nodeid, memb->nodeid,
156 count_bad);
157 print_hex_dump_bytes("dlm_recover_dir ",
158 DUMP_PREFIX_NONE,
159 b, namelen);
160 }
161
162 /* The name was found in rsbtbl, and the
163 * master nodeid matches memb->nodeid. */
164
165 if (result == DLM_LU_MATCH &&
166 nodeid == memb->nodeid) {
167 count_match++;
168 }
169
170 /* The name was not found in rsbtbl and was
171 * added with memb->nodeid as the master. */
172
173 if (result == DLM_LU_ADD) {
174 count_add++;
175 }
176
David Teiglande7fd4172006-01-18 09:30:29 +0000177 last_len = namelen;
David Teiglande7fd4172006-01-18 09:30:29 +0000178 memcpy(last_name, b, namelen);
179 b += namelen;
Al Virocd9df1a2008-01-25 04:08:09 -0500180 left -= namelen;
David Teiglande7fd4172006-01-18 09:30:29 +0000181 count++;
182 }
183 }
David Teiglandc04fecb2012-05-10 10:18:07 -0500184 done:
David Teiglande7fd4172006-01-18 09:30:29 +0000185 ;
186 }
187
188 out_status:
189 error = 0;
David Teiglandc04fecb2012-05-10 10:18:07 -0500190 dlm_set_recover_status(ls, DLM_RS_DIR);
191
192 log_debug(ls, "dlm_recover_directory %u in %u new",
193 count, count_add);
David Teiglande7fd4172006-01-18 09:30:29 +0000194 out_free:
195 kfree(last_name);
196 out:
David Teiglande7fd4172006-01-18 09:30:29 +0000197 return error;
198}
199
David Teigland85f03792008-01-16 13:02:31 -0600200static struct dlm_rsb *find_rsb_root(struct dlm_ls *ls, char *name, int len)
201{
202 struct dlm_rsb *r;
David Teigland7210cb72012-03-08 12:37:12 -0600203 uint32_t hash, bucket;
204 int rv;
205
206 hash = jhash(name, len, 0);
207 bucket = hash & (ls->ls_rsbtbl_size - 1);
208
209 spin_lock(&ls->ls_rsbtbl[bucket].lock);
David Teiglandc04fecb2012-05-10 10:18:07 -0500210 rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[bucket].keep, name, len, &r);
David Teigland7210cb72012-03-08 12:37:12 -0600211 if (rv)
212 rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[bucket].toss,
David Teiglandc04fecb2012-05-10 10:18:07 -0500213 name, len, &r);
David Teigland7210cb72012-03-08 12:37:12 -0600214 spin_unlock(&ls->ls_rsbtbl[bucket].lock);
215
216 if (!rv)
217 return r;
David Teigland85f03792008-01-16 13:02:31 -0600218
219 down_read(&ls->ls_root_sem);
220 list_for_each_entry(r, &ls->ls_root_list, res_root_list) {
221 if (len == r->res_length && !memcmp(name, r->res_name, len)) {
222 up_read(&ls->ls_root_sem);
David Teiglandc04fecb2012-05-10 10:18:07 -0500223 log_debug(ls, "find_rsb_root revert to root_list %s",
David Teigland7210cb72012-03-08 12:37:12 -0600224 r->res_name);
David Teigland85f03792008-01-16 13:02:31 -0600225 return r;
226 }
227 }
228 up_read(&ls->ls_root_sem);
229 return NULL;
230}
231
232/* Find the rsb where we left off (or start again), then send rsb names
233 for rsb's we're master of and whose directory node matches the requesting
234 node. inbuf is the rsb name last sent, inlen is the name's length */
David Teiglande7fd4172006-01-18 09:30:29 +0000235
236void dlm_copy_master_names(struct dlm_ls *ls, char *inbuf, int inlen,
237 char *outbuf, int outlen, int nodeid)
238{
239 struct list_head *list;
David Teigland85f03792008-01-16 13:02:31 -0600240 struct dlm_rsb *r;
241 int offset = 0, dir_nodeid;
Harvey Harrisoncd8e4672008-11-12 16:28:43 -0600242 __be16 be_namelen;
David Teiglande7fd4172006-01-18 09:30:29 +0000243
David Teiglande7fd4172006-01-18 09:30:29 +0000244 down_read(&ls->ls_root_sem);
David Teigland85f03792008-01-16 13:02:31 -0600245
246 if (inlen > 1) {
247 r = find_rsb_root(ls, inbuf, inlen);
248 if (!r) {
249 inbuf[inlen - 1] = '\0';
250 log_error(ls, "copy_master_names from %d start %d %s",
251 nodeid, inlen, inbuf);
252 goto out;
253 }
254 list = r->res_root_list.next;
255 } else {
David Teiglande7fd4172006-01-18 09:30:29 +0000256 list = ls->ls_root_list.next;
David Teigland85f03792008-01-16 13:02:31 -0600257 }
David Teiglande7fd4172006-01-18 09:30:29 +0000258
259 for (offset = 0; list != &ls->ls_root_list; list = list->next) {
260 r = list_entry(list, struct dlm_rsb, res_root_list);
261 if (r->res_nodeid)
262 continue;
263
264 dir_nodeid = dlm_dir_nodeid(r);
265 if (dir_nodeid != nodeid)
266 continue;
267
268 /*
269 * The block ends when we can't fit the following in the
270 * remaining buffer space:
271 * namelen (uint16_t) +
272 * name (r->res_length) +
273 * end-of-block record 0x0000 (uint16_t)
274 */
275
276 if (offset + sizeof(uint16_t)*2 + r->res_length > outlen) {
277 /* Write end-of-block record */
Harvey Harrisoncd8e4672008-11-12 16:28:43 -0600278 be_namelen = cpu_to_be16(0);
279 memcpy(outbuf + offset, &be_namelen, sizeof(__be16));
280 offset += sizeof(__be16);
David Teiglandc04fecb2012-05-10 10:18:07 -0500281 ls->ls_recover_dir_sent_msg++;
David Teiglande7fd4172006-01-18 09:30:29 +0000282 goto out;
283 }
284
285 be_namelen = cpu_to_be16(r->res_length);
Harvey Harrisoncd8e4672008-11-12 16:28:43 -0600286 memcpy(outbuf + offset, &be_namelen, sizeof(__be16));
287 offset += sizeof(__be16);
David Teiglande7fd4172006-01-18 09:30:29 +0000288 memcpy(outbuf + offset, r->res_name, r->res_length);
289 offset += r->res_length;
David Teiglandc04fecb2012-05-10 10:18:07 -0500290 ls->ls_recover_dir_sent_res++;
David Teiglande7fd4172006-01-18 09:30:29 +0000291 }
292
293 /*
294 * If we've reached the end of the list (and there's room) write a
295 * terminating record.
296 */
297
298 if ((list == &ls->ls_root_list) &&
299 (offset + sizeof(uint16_t) <= outlen)) {
Harvey Harrisoncd8e4672008-11-12 16:28:43 -0600300 be_namelen = cpu_to_be16(0xFFFF);
301 memcpy(outbuf + offset, &be_namelen, sizeof(__be16));
302 offset += sizeof(__be16);
David Teiglandc04fecb2012-05-10 10:18:07 -0500303 ls->ls_recover_dir_sent_msg++;
David Teiglande7fd4172006-01-18 09:30:29 +0000304 }
David Teiglande7fd4172006-01-18 09:30:29 +0000305 out:
306 up_read(&ls->ls_root_sem);
307}
308