David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 1 | /****************************************************************************** |
| 2 | ******************************************************************************* |
| 3 | ** |
| 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
| 5 | ** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. |
| 6 | ** |
| 7 | ** This copyrighted material is made available to anyone wishing to use, |
| 8 | ** modify, copy, or redistribute it subject to the terms and conditions |
| 9 | ** of the GNU General Public License v.2. |
| 10 | ** |
| 11 | ******************************************************************************* |
| 12 | ******************************************************************************/ |
| 13 | |
| 14 | #include "dlm_internal.h" |
| 15 | #include "lockspace.h" |
| 16 | #include "member.h" |
| 17 | #include "lowcomms.h" |
| 18 | #include "rcom.h" |
| 19 | #include "config.h" |
| 20 | #include "memory.h" |
| 21 | #include "recover.h" |
| 22 | #include "util.h" |
| 23 | #include "lock.h" |
| 24 | #include "dir.h" |
| 25 | |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 26 | /* |
| 27 | * We use the upper 16 bits of the hash value to select the directory node. |
| 28 | * Low bits are used for distribution of rsb's among hash buckets on each node. |
| 29 | * |
| 30 | * To give the exact range wanted (0 to num_nodes-1), we apply a modulus of |
| 31 | * num_nodes to the hash value. This value in the desired range is used as an |
| 32 | * offset into the sorted list of nodeid's to give the particular nodeid. |
| 33 | */ |
| 34 | |
| 35 | int dlm_hash2nodeid(struct dlm_ls *ls, uint32_t hash) |
| 36 | { |
David Teigland | c04fecb | 2012-05-10 10:18:07 -0500 | [diff] [blame] | 37 | uint32_t node; |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 38 | |
David Teigland | c04fecb | 2012-05-10 10:18:07 -0500 | [diff] [blame] | 39 | if (ls->ls_num_nodes == 1) |
| 40 | return dlm_our_nodeid(); |
| 41 | else { |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 42 | node = (hash >> 16) % ls->ls_total_weight; |
David Teigland | c04fecb | 2012-05-10 10:18:07 -0500 | [diff] [blame] | 43 | return ls->ls_node_array[node]; |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 44 | } |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 45 | } |
| 46 | |
| 47 | int dlm_dir_nodeid(struct dlm_rsb *r) |
| 48 | { |
David Teigland | c04fecb | 2012-05-10 10:18:07 -0500 | [diff] [blame] | 49 | return r->res_dir_nodeid; |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 50 | } |
| 51 | |
David Teigland | c04fecb | 2012-05-10 10:18:07 -0500 | [diff] [blame] | 52 | void dlm_recover_dir_nodeid(struct dlm_ls *ls) |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 53 | { |
David Teigland | c04fecb | 2012-05-10 10:18:07 -0500 | [diff] [blame] | 54 | struct dlm_rsb *r; |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 55 | |
David Teigland | c04fecb | 2012-05-10 10:18:07 -0500 | [diff] [blame] | 56 | down_read(&ls->ls_root_sem); |
| 57 | list_for_each_entry(r, &ls->ls_root_list, res_root_list) { |
| 58 | r->res_dir_nodeid = dlm_hash2nodeid(ls, r->res_hash); |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 59 | } |
David Teigland | c04fecb | 2012-05-10 10:18:07 -0500 | [diff] [blame] | 60 | up_read(&ls->ls_root_sem); |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 61 | } |
| 62 | |
| 63 | int dlm_recover_directory(struct dlm_ls *ls) |
| 64 | { |
| 65 | struct dlm_member *memb; |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 66 | char *b, *last_name = NULL; |
David Teigland | c04fecb | 2012-05-10 10:18:07 -0500 | [diff] [blame] | 67 | int error = -ENOMEM, last_len, nodeid, result; |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 68 | uint16_t namelen; |
David Teigland | c04fecb | 2012-05-10 10:18:07 -0500 | [diff] [blame] | 69 | unsigned int count = 0, count_match = 0, count_bad = 0, count_add = 0; |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 70 | |
| 71 | log_debug(ls, "dlm_recover_directory"); |
| 72 | |
| 73 | if (dlm_no_directory(ls)) |
| 74 | goto out_status; |
| 75 | |
David Teigland | 573c24c | 2009-11-30 16:34:43 -0600 | [diff] [blame] | 76 | last_name = kmalloc(DLM_RESNAME_MAXLEN, GFP_NOFS); |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 77 | if (!last_name) |
| 78 | goto out; |
| 79 | |
| 80 | list_for_each_entry(memb, &ls->ls_nodes, list) { |
David Teigland | c04fecb | 2012-05-10 10:18:07 -0500 | [diff] [blame] | 81 | if (memb->nodeid == dlm_our_nodeid()) |
| 82 | continue; |
| 83 | |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 84 | memset(last_name, 0, DLM_RESNAME_MAXLEN); |
| 85 | last_len = 0; |
| 86 | |
| 87 | for (;;) { |
Al Viro | cd9df1a | 2008-01-25 04:08:09 -0500 | [diff] [blame] | 88 | int left; |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 89 | error = dlm_recovery_stopped(ls); |
| 90 | if (error) |
| 91 | goto out_free; |
| 92 | |
| 93 | error = dlm_rcom_names(ls, memb->nodeid, |
| 94 | last_name, last_len); |
| 95 | if (error) |
| 96 | goto out_free; |
| 97 | |
David Teigland | c04fecb | 2012-05-10 10:18:07 -0500 | [diff] [blame] | 98 | cond_resched(); |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 99 | |
| 100 | /* |
| 101 | * pick namelen/name pairs out of received buffer |
| 102 | */ |
| 103 | |
Al Viro | 4007685 | 2008-01-25 03:01:51 -0500 | [diff] [blame] | 104 | b = ls->ls_recover_buf->rc_buf; |
Al Viro | cd9df1a | 2008-01-25 04:08:09 -0500 | [diff] [blame] | 105 | left = ls->ls_recover_buf->rc_header.h_length; |
| 106 | left -= sizeof(struct dlm_rcom); |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 107 | |
| 108 | for (;;) { |
Al Viro | cd9df1a | 2008-01-25 04:08:09 -0500 | [diff] [blame] | 109 | __be16 v; |
| 110 | |
| 111 | error = -EINVAL; |
| 112 | if (left < sizeof(__be16)) |
| 113 | goto out_free; |
| 114 | |
| 115 | memcpy(&v, b, sizeof(__be16)); |
| 116 | namelen = be16_to_cpu(v); |
| 117 | b += sizeof(__be16); |
| 118 | left -= sizeof(__be16); |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 119 | |
| 120 | /* namelen of 0xFFFFF marks end of names for |
| 121 | this node; namelen of 0 marks end of the |
| 122 | buffer */ |
| 123 | |
| 124 | if (namelen == 0xFFFF) |
| 125 | goto done; |
| 126 | if (!namelen) |
| 127 | break; |
| 128 | |
Al Viro | cd9df1a | 2008-01-25 04:08:09 -0500 | [diff] [blame] | 129 | if (namelen > left) |
| 130 | goto out_free; |
| 131 | |
| 132 | if (namelen > DLM_RESNAME_MAXLEN) |
| 133 | goto out_free; |
| 134 | |
David Teigland | c04fecb | 2012-05-10 10:18:07 -0500 | [diff] [blame] | 135 | error = dlm_master_lookup(ls, memb->nodeid, |
| 136 | b, namelen, |
| 137 | DLM_LU_RECOVER_DIR, |
| 138 | &nodeid, &result); |
| 139 | if (error) { |
| 140 | log_error(ls, "recover_dir lookup %d", |
| 141 | error); |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 142 | goto out_free; |
David Teigland | c04fecb | 2012-05-10 10:18:07 -0500 | [diff] [blame] | 143 | } |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 144 | |
David Teigland | c04fecb | 2012-05-10 10:18:07 -0500 | [diff] [blame] | 145 | /* The name was found in rsbtbl, but the |
| 146 | * master nodeid is different from |
| 147 | * memb->nodeid which says it is the master. |
| 148 | * This should not happen. */ |
| 149 | |
| 150 | if (result == DLM_LU_MATCH && |
| 151 | nodeid != memb->nodeid) { |
| 152 | count_bad++; |
| 153 | log_error(ls, "recover_dir lookup %d " |
| 154 | "nodeid %d memb %d bad %u", |
| 155 | result, nodeid, memb->nodeid, |
| 156 | count_bad); |
| 157 | print_hex_dump_bytes("dlm_recover_dir ", |
| 158 | DUMP_PREFIX_NONE, |
| 159 | b, namelen); |
| 160 | } |
| 161 | |
| 162 | /* The name was found in rsbtbl, and the |
| 163 | * master nodeid matches memb->nodeid. */ |
| 164 | |
| 165 | if (result == DLM_LU_MATCH && |
| 166 | nodeid == memb->nodeid) { |
| 167 | count_match++; |
| 168 | } |
| 169 | |
| 170 | /* The name was not found in rsbtbl and was |
| 171 | * added with memb->nodeid as the master. */ |
| 172 | |
| 173 | if (result == DLM_LU_ADD) { |
| 174 | count_add++; |
| 175 | } |
| 176 | |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 177 | last_len = namelen; |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 178 | memcpy(last_name, b, namelen); |
| 179 | b += namelen; |
Al Viro | cd9df1a | 2008-01-25 04:08:09 -0500 | [diff] [blame] | 180 | left -= namelen; |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 181 | count++; |
| 182 | } |
| 183 | } |
David Teigland | c04fecb | 2012-05-10 10:18:07 -0500 | [diff] [blame] | 184 | done: |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 185 | ; |
| 186 | } |
| 187 | |
| 188 | out_status: |
| 189 | error = 0; |
David Teigland | c04fecb | 2012-05-10 10:18:07 -0500 | [diff] [blame] | 190 | dlm_set_recover_status(ls, DLM_RS_DIR); |
| 191 | |
| 192 | log_debug(ls, "dlm_recover_directory %u in %u new", |
| 193 | count, count_add); |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 194 | out_free: |
| 195 | kfree(last_name); |
| 196 | out: |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 197 | return error; |
| 198 | } |
| 199 | |
David Teigland | 85f0379 | 2008-01-16 13:02:31 -0600 | [diff] [blame] | 200 | static struct dlm_rsb *find_rsb_root(struct dlm_ls *ls, char *name, int len) |
| 201 | { |
| 202 | struct dlm_rsb *r; |
David Teigland | 7210cb7 | 2012-03-08 12:37:12 -0600 | [diff] [blame] | 203 | uint32_t hash, bucket; |
| 204 | int rv; |
| 205 | |
| 206 | hash = jhash(name, len, 0); |
| 207 | bucket = hash & (ls->ls_rsbtbl_size - 1); |
| 208 | |
| 209 | spin_lock(&ls->ls_rsbtbl[bucket].lock); |
David Teigland | c04fecb | 2012-05-10 10:18:07 -0500 | [diff] [blame] | 210 | rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[bucket].keep, name, len, &r); |
David Teigland | 7210cb7 | 2012-03-08 12:37:12 -0600 | [diff] [blame] | 211 | if (rv) |
| 212 | rv = dlm_search_rsb_tree(&ls->ls_rsbtbl[bucket].toss, |
David Teigland | c04fecb | 2012-05-10 10:18:07 -0500 | [diff] [blame] | 213 | name, len, &r); |
David Teigland | 7210cb7 | 2012-03-08 12:37:12 -0600 | [diff] [blame] | 214 | spin_unlock(&ls->ls_rsbtbl[bucket].lock); |
| 215 | |
| 216 | if (!rv) |
| 217 | return r; |
David Teigland | 85f0379 | 2008-01-16 13:02:31 -0600 | [diff] [blame] | 218 | |
| 219 | down_read(&ls->ls_root_sem); |
| 220 | list_for_each_entry(r, &ls->ls_root_list, res_root_list) { |
| 221 | if (len == r->res_length && !memcmp(name, r->res_name, len)) { |
| 222 | up_read(&ls->ls_root_sem); |
David Teigland | c04fecb | 2012-05-10 10:18:07 -0500 | [diff] [blame] | 223 | log_debug(ls, "find_rsb_root revert to root_list %s", |
David Teigland | 7210cb7 | 2012-03-08 12:37:12 -0600 | [diff] [blame] | 224 | r->res_name); |
David Teigland | 85f0379 | 2008-01-16 13:02:31 -0600 | [diff] [blame] | 225 | return r; |
| 226 | } |
| 227 | } |
| 228 | up_read(&ls->ls_root_sem); |
| 229 | return NULL; |
| 230 | } |
| 231 | |
| 232 | /* Find the rsb where we left off (or start again), then send rsb names |
| 233 | for rsb's we're master of and whose directory node matches the requesting |
| 234 | node. inbuf is the rsb name last sent, inlen is the name's length */ |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 235 | |
| 236 | void dlm_copy_master_names(struct dlm_ls *ls, char *inbuf, int inlen, |
| 237 | char *outbuf, int outlen, int nodeid) |
| 238 | { |
| 239 | struct list_head *list; |
David Teigland | 85f0379 | 2008-01-16 13:02:31 -0600 | [diff] [blame] | 240 | struct dlm_rsb *r; |
| 241 | int offset = 0, dir_nodeid; |
Harvey Harrison | cd8e467 | 2008-11-12 16:28:43 -0600 | [diff] [blame] | 242 | __be16 be_namelen; |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 243 | |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 244 | down_read(&ls->ls_root_sem); |
David Teigland | 85f0379 | 2008-01-16 13:02:31 -0600 | [diff] [blame] | 245 | |
| 246 | if (inlen > 1) { |
| 247 | r = find_rsb_root(ls, inbuf, inlen); |
| 248 | if (!r) { |
| 249 | inbuf[inlen - 1] = '\0'; |
| 250 | log_error(ls, "copy_master_names from %d start %d %s", |
| 251 | nodeid, inlen, inbuf); |
| 252 | goto out; |
| 253 | } |
| 254 | list = r->res_root_list.next; |
| 255 | } else { |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 256 | list = ls->ls_root_list.next; |
David Teigland | 85f0379 | 2008-01-16 13:02:31 -0600 | [diff] [blame] | 257 | } |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 258 | |
| 259 | for (offset = 0; list != &ls->ls_root_list; list = list->next) { |
| 260 | r = list_entry(list, struct dlm_rsb, res_root_list); |
| 261 | if (r->res_nodeid) |
| 262 | continue; |
| 263 | |
| 264 | dir_nodeid = dlm_dir_nodeid(r); |
| 265 | if (dir_nodeid != nodeid) |
| 266 | continue; |
| 267 | |
| 268 | /* |
| 269 | * The block ends when we can't fit the following in the |
| 270 | * remaining buffer space: |
| 271 | * namelen (uint16_t) + |
| 272 | * name (r->res_length) + |
| 273 | * end-of-block record 0x0000 (uint16_t) |
| 274 | */ |
| 275 | |
| 276 | if (offset + sizeof(uint16_t)*2 + r->res_length > outlen) { |
| 277 | /* Write end-of-block record */ |
Harvey Harrison | cd8e467 | 2008-11-12 16:28:43 -0600 | [diff] [blame] | 278 | be_namelen = cpu_to_be16(0); |
| 279 | memcpy(outbuf + offset, &be_namelen, sizeof(__be16)); |
| 280 | offset += sizeof(__be16); |
David Teigland | c04fecb | 2012-05-10 10:18:07 -0500 | [diff] [blame] | 281 | ls->ls_recover_dir_sent_msg++; |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 282 | goto out; |
| 283 | } |
| 284 | |
| 285 | be_namelen = cpu_to_be16(r->res_length); |
Harvey Harrison | cd8e467 | 2008-11-12 16:28:43 -0600 | [diff] [blame] | 286 | memcpy(outbuf + offset, &be_namelen, sizeof(__be16)); |
| 287 | offset += sizeof(__be16); |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 288 | memcpy(outbuf + offset, r->res_name, r->res_length); |
| 289 | offset += r->res_length; |
David Teigland | c04fecb | 2012-05-10 10:18:07 -0500 | [diff] [blame] | 290 | ls->ls_recover_dir_sent_res++; |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 291 | } |
| 292 | |
| 293 | /* |
| 294 | * If we've reached the end of the list (and there's room) write a |
| 295 | * terminating record. |
| 296 | */ |
| 297 | |
| 298 | if ((list == &ls->ls_root_list) && |
| 299 | (offset + sizeof(uint16_t) <= outlen)) { |
Harvey Harrison | cd8e467 | 2008-11-12 16:28:43 -0600 | [diff] [blame] | 300 | be_namelen = cpu_to_be16(0xFFFF); |
| 301 | memcpy(outbuf + offset, &be_namelen, sizeof(__be16)); |
| 302 | offset += sizeof(__be16); |
David Teigland | c04fecb | 2012-05-10 10:18:07 -0500 | [diff] [blame] | 303 | ls->ls_recover_dir_sent_msg++; |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 304 | } |
David Teigland | e7fd417 | 2006-01-18 09:30:29 +0000 | [diff] [blame] | 305 | out: |
| 306 | up_read(&ls->ls_root_sem); |
| 307 | } |
| 308 | |