| /****************************************************************************** |
| ******************************************************************************* |
| ** |
| ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
| ** Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. |
| ** |
| ** This copyrighted material is made available to anyone wishing to use, |
| ** modify, copy, or redistribute it subject to the terms and conditions |
| ** of the GNU General Public License v.2. |
| ** |
| ******************************************************************************* |
| ******************************************************************************/ |
| |
| #include "dlm_internal.h" |
| #include "lockspace.h" |
| #include "member.h" |
| #include "lowcomms.h" |
| #include "rcom.h" |
| #include "config.h" |
| #include "memory.h" |
| #include "recover.h" |
| #include "util.h" |
| #include "lock.h" |
| #include "dir.h" |
| |
| |
| static void put_free_de(struct dlm_ls *ls, struct dlm_direntry *de) |
| { |
| spin_lock(&ls->ls_recover_list_lock); |
| list_add(&de->list, &ls->ls_recover_list); |
| spin_unlock(&ls->ls_recover_list_lock); |
| } |
| |
| static struct dlm_direntry *get_free_de(struct dlm_ls *ls, int len) |
| { |
| int found = 0; |
| struct dlm_direntry *de; |
| |
| spin_lock(&ls->ls_recover_list_lock); |
| list_for_each_entry(de, &ls->ls_recover_list, list) { |
| if (de->length == len) { |
| list_del(&de->list); |
| de->master_nodeid = 0; |
| memset(de->name, 0, len); |
| found = 1; |
| break; |
| } |
| } |
| spin_unlock(&ls->ls_recover_list_lock); |
| |
| if (!found) |
| de = kzalloc(sizeof(struct dlm_direntry) + len, GFP_KERNEL); |
| return de; |
| } |
| |
| void dlm_clear_free_entries(struct dlm_ls *ls) |
| { |
| struct dlm_direntry *de; |
| |
| spin_lock(&ls->ls_recover_list_lock); |
| while (!list_empty(&ls->ls_recover_list)) { |
| de = list_entry(ls->ls_recover_list.next, struct dlm_direntry, |
| list); |
| list_del(&de->list); |
| kfree(de); |
| } |
| spin_unlock(&ls->ls_recover_list_lock); |
| } |
| |
| /* |
| * We use the upper 16 bits of the hash value to select the directory node. |
| * Low bits are used for distribution of rsb's among hash buckets on each node. |
| * |
| * To give the exact range wanted (0 to num_nodes-1), we apply a modulus of |
| * num_nodes to the hash value. This value in the desired range is used as an |
| * offset into the sorted list of nodeid's to give the particular nodeid. |
| */ |
| |
| int dlm_hash2nodeid(struct dlm_ls *ls, uint32_t hash) |
| { |
| struct list_head *tmp; |
| struct dlm_member *memb = NULL; |
| uint32_t node, n = 0; |
| int nodeid; |
| |
| if (ls->ls_num_nodes == 1) { |
| nodeid = dlm_our_nodeid(); |
| goto out; |
| } |
| |
| if (ls->ls_node_array) { |
| node = (hash >> 16) % ls->ls_total_weight; |
| nodeid = ls->ls_node_array[node]; |
| goto out; |
| } |
| |
| /* make_member_array() failed to kmalloc ls_node_array... */ |
| |
| node = (hash >> 16) % ls->ls_num_nodes; |
| |
| list_for_each(tmp, &ls->ls_nodes) { |
| if (n++ != node) |
| continue; |
| memb = list_entry(tmp, struct dlm_member, list); |
| break; |
| } |
| |
| DLM_ASSERT(memb , printk("num_nodes=%u n=%u node=%u\n", |
| ls->ls_num_nodes, n, node);); |
| nodeid = memb->nodeid; |
| out: |
| return nodeid; |
| } |
| |
| int dlm_dir_nodeid(struct dlm_rsb *r) |
| { |
| return dlm_hash2nodeid(r->res_ls, r->res_hash); |
| } |
| |
| static inline uint32_t dir_hash(struct dlm_ls *ls, char *name, int len) |
| { |
| uint32_t val; |
| |
| val = jhash(name, len, 0); |
| val &= (ls->ls_dirtbl_size - 1); |
| |
| return val; |
| } |
| |
| static void add_entry_to_hash(struct dlm_ls *ls, struct dlm_direntry *de) |
| { |
| uint32_t bucket; |
| |
| bucket = dir_hash(ls, de->name, de->length); |
| list_add_tail(&de->list, &ls->ls_dirtbl[bucket].list); |
| } |
| |
| static struct dlm_direntry *search_bucket(struct dlm_ls *ls, char *name, |
| int namelen, uint32_t bucket) |
| { |
| struct dlm_direntry *de; |
| |
| list_for_each_entry(de, &ls->ls_dirtbl[bucket].list, list) { |
| if (de->length == namelen && !memcmp(name, de->name, namelen)) |
| goto out; |
| } |
| de = NULL; |
| out: |
| return de; |
| } |
| |
| void dlm_dir_remove_entry(struct dlm_ls *ls, int nodeid, char *name, int namelen) |
| { |
| struct dlm_direntry *de; |
| uint32_t bucket; |
| |
| bucket = dir_hash(ls, name, namelen); |
| |
| write_lock(&ls->ls_dirtbl[bucket].lock); |
| |
| de = search_bucket(ls, name, namelen, bucket); |
| |
| if (!de) { |
| log_error(ls, "remove fr %u none", nodeid); |
| goto out; |
| } |
| |
| if (de->master_nodeid != nodeid) { |
| log_error(ls, "remove fr %u ID %u", nodeid, de->master_nodeid); |
| goto out; |
| } |
| |
| list_del(&de->list); |
| kfree(de); |
| out: |
| write_unlock(&ls->ls_dirtbl[bucket].lock); |
| } |
| |
| void dlm_dir_clear(struct dlm_ls *ls) |
| { |
| struct list_head *head; |
| struct dlm_direntry *de; |
| int i; |
| |
| DLM_ASSERT(list_empty(&ls->ls_recover_list), ); |
| |
| for (i = 0; i < ls->ls_dirtbl_size; i++) { |
| write_lock(&ls->ls_dirtbl[i].lock); |
| head = &ls->ls_dirtbl[i].list; |
| while (!list_empty(head)) { |
| de = list_entry(head->next, struct dlm_direntry, list); |
| list_del(&de->list); |
| put_free_de(ls, de); |
| } |
| write_unlock(&ls->ls_dirtbl[i].lock); |
| } |
| } |
| |
| int dlm_recover_directory(struct dlm_ls *ls) |
| { |
| struct dlm_member *memb; |
| struct dlm_direntry *de; |
| char *b, *last_name = NULL; |
| int error = -ENOMEM, last_len, count = 0; |
| uint16_t namelen; |
| |
| log_debug(ls, "dlm_recover_directory"); |
| |
| if (dlm_no_directory(ls)) |
| goto out_status; |
| |
| dlm_dir_clear(ls); |
| |
| last_name = kmalloc(DLM_RESNAME_MAXLEN, GFP_KERNEL); |
| if (!last_name) |
| goto out; |
| |
| list_for_each_entry(memb, &ls->ls_nodes, list) { |
| memset(last_name, 0, DLM_RESNAME_MAXLEN); |
| last_len = 0; |
| |
| for (;;) { |
| int left; |
| error = dlm_recovery_stopped(ls); |
| if (error) |
| goto out_free; |
| |
| error = dlm_rcom_names(ls, memb->nodeid, |
| last_name, last_len); |
| if (error) |
| goto out_free; |
| |
| schedule(); |
| |
| /* |
| * pick namelen/name pairs out of received buffer |
| */ |
| |
| b = ls->ls_recover_buf->rc_buf; |
| left = ls->ls_recover_buf->rc_header.h_length; |
| left -= sizeof(struct dlm_rcom); |
| |
| for (;;) { |
| __be16 v; |
| |
| error = -EINVAL; |
| if (left < sizeof(__be16)) |
| goto out_free; |
| |
| memcpy(&v, b, sizeof(__be16)); |
| namelen = be16_to_cpu(v); |
| b += sizeof(__be16); |
| left -= sizeof(__be16); |
| |
| /* namelen of 0xFFFFF marks end of names for |
| this node; namelen of 0 marks end of the |
| buffer */ |
| |
| if (namelen == 0xFFFF) |
| goto done; |
| if (!namelen) |
| break; |
| |
| if (namelen > left) |
| goto out_free; |
| |
| if (namelen > DLM_RESNAME_MAXLEN) |
| goto out_free; |
| |
| error = -ENOMEM; |
| de = get_free_de(ls, namelen); |
| if (!de) |
| goto out_free; |
| |
| de->master_nodeid = memb->nodeid; |
| de->length = namelen; |
| last_len = namelen; |
| memcpy(de->name, b, namelen); |
| memcpy(last_name, b, namelen); |
| b += namelen; |
| left -= namelen; |
| |
| add_entry_to_hash(ls, de); |
| count++; |
| } |
| } |
| done: |
| ; |
| } |
| |
| out_status: |
| error = 0; |
| dlm_set_recover_status(ls, DLM_RS_DIR); |
| log_debug(ls, "dlm_recover_directory %d entries", count); |
| out_free: |
| kfree(last_name); |
| out: |
| dlm_clear_free_entries(ls); |
| return error; |
| } |
| |
| static int get_entry(struct dlm_ls *ls, int nodeid, char *name, |
| int namelen, int *r_nodeid) |
| { |
| struct dlm_direntry *de, *tmp; |
| uint32_t bucket; |
| |
| bucket = dir_hash(ls, name, namelen); |
| |
| write_lock(&ls->ls_dirtbl[bucket].lock); |
| de = search_bucket(ls, name, namelen, bucket); |
| if (de) { |
| *r_nodeid = de->master_nodeid; |
| write_unlock(&ls->ls_dirtbl[bucket].lock); |
| if (*r_nodeid == nodeid) |
| return -EEXIST; |
| return 0; |
| } |
| |
| write_unlock(&ls->ls_dirtbl[bucket].lock); |
| |
| de = kzalloc(sizeof(struct dlm_direntry) + namelen, GFP_KERNEL); |
| if (!de) |
| return -ENOMEM; |
| |
| de->master_nodeid = nodeid; |
| de->length = namelen; |
| memcpy(de->name, name, namelen); |
| |
| write_lock(&ls->ls_dirtbl[bucket].lock); |
| tmp = search_bucket(ls, name, namelen, bucket); |
| if (tmp) { |
| kfree(de); |
| de = tmp; |
| } else { |
| list_add_tail(&de->list, &ls->ls_dirtbl[bucket].list); |
| } |
| *r_nodeid = de->master_nodeid; |
| write_unlock(&ls->ls_dirtbl[bucket].lock); |
| return 0; |
| } |
| |
| int dlm_dir_lookup(struct dlm_ls *ls, int nodeid, char *name, int namelen, |
| int *r_nodeid) |
| { |
| return get_entry(ls, nodeid, name, namelen, r_nodeid); |
| } |
| |
| static struct dlm_rsb *find_rsb_root(struct dlm_ls *ls, char *name, int len) |
| { |
| struct dlm_rsb *r; |
| |
| down_read(&ls->ls_root_sem); |
| list_for_each_entry(r, &ls->ls_root_list, res_root_list) { |
| if (len == r->res_length && !memcmp(name, r->res_name, len)) { |
| up_read(&ls->ls_root_sem); |
| return r; |
| } |
| } |
| up_read(&ls->ls_root_sem); |
| return NULL; |
| } |
| |
| /* Find the rsb where we left off (or start again), then send rsb names |
| for rsb's we're master of and whose directory node matches the requesting |
| node. inbuf is the rsb name last sent, inlen is the name's length */ |
| |
| void dlm_copy_master_names(struct dlm_ls *ls, char *inbuf, int inlen, |
| char *outbuf, int outlen, int nodeid) |
| { |
| struct list_head *list; |
| struct dlm_rsb *r; |
| int offset = 0, dir_nodeid; |
| uint16_t be_namelen; |
| |
| down_read(&ls->ls_root_sem); |
| |
| if (inlen > 1) { |
| r = find_rsb_root(ls, inbuf, inlen); |
| if (!r) { |
| inbuf[inlen - 1] = '\0'; |
| log_error(ls, "copy_master_names from %d start %d %s", |
| nodeid, inlen, inbuf); |
| goto out; |
| } |
| list = r->res_root_list.next; |
| } else { |
| list = ls->ls_root_list.next; |
| } |
| |
| for (offset = 0; list != &ls->ls_root_list; list = list->next) { |
| r = list_entry(list, struct dlm_rsb, res_root_list); |
| if (r->res_nodeid) |
| continue; |
| |
| dir_nodeid = dlm_dir_nodeid(r); |
| if (dir_nodeid != nodeid) |
| continue; |
| |
| /* |
| * The block ends when we can't fit the following in the |
| * remaining buffer space: |
| * namelen (uint16_t) + |
| * name (r->res_length) + |
| * end-of-block record 0x0000 (uint16_t) |
| */ |
| |
| if (offset + sizeof(uint16_t)*2 + r->res_length > outlen) { |
| /* Write end-of-block record */ |
| be_namelen = 0; |
| memcpy(outbuf + offset, &be_namelen, sizeof(uint16_t)); |
| offset += sizeof(uint16_t); |
| goto out; |
| } |
| |
| be_namelen = cpu_to_be16(r->res_length); |
| memcpy(outbuf + offset, &be_namelen, sizeof(uint16_t)); |
| offset += sizeof(uint16_t); |
| memcpy(outbuf + offset, r->res_name, r->res_length); |
| offset += r->res_length; |
| } |
| |
| /* |
| * If we've reached the end of the list (and there's room) write a |
| * terminating record. |
| */ |
| |
| if ((list == &ls->ls_root_list) && |
| (offset + sizeof(uint16_t) <= outlen)) { |
| be_namelen = 0xFFFF; |
| memcpy(outbuf + offset, &be_namelen, sizeof(uint16_t)); |
| offset += sizeof(uint16_t); |
| } |
| |
| out: |
| up_read(&ls->ls_root_sem); |
| } |
| |