blob: b73c34375f604b2565d64e84756d102bd76a884d [file] [log] [blame]
Andy Adamson16b374c2010-10-20 00:18:04 -04001/*
2 * Device operations for the pnfs nfs4 file layout driver.
3 *
4 * Copyright (c) 2002
5 * The Regents of the University of Michigan
6 * All Rights Reserved
7 *
8 * Dean Hildebrand <dhildebz@umich.edu>
9 * Garth Goodson <Garth.Goodson@netapp.com>
10 *
11 * Permission is granted to use, copy, create derivative works, and
12 * redistribute this software and such derivative works for any purpose,
13 * so long as the name of the University of Michigan is not used in
14 * any advertising or publicity pertaining to the use or distribution
15 * of this software without specific, written prior authorization. If
16 * the above copyright notice or any other identification of the
17 * University of Michigan is included in any copy of any portion of
18 * this software, then the disclaimer below must also be included.
19 *
20 * This software is provided as is, without representation or warranty
21 * of any kind either express or implied, including without limitation
22 * the implied warranties of merchantability, fitness for a particular
23 * purpose, or noninfringement. The Regents of the University of
24 * Michigan shall not be liable for any damages, including special,
25 * indirect, incidental, or consequential damages, with respect to any
26 * claim arising out of or in connection with the use of the software,
27 * even if it has been or is hereafter advised of the possibility of
28 * such damages.
29 */
30
31#include <linux/nfs_fs.h>
32#include <linux/vmalloc.h>
33
34#include "internal.h"
35#include "nfs4filelayout.h"
36
37#define NFSDBG_FACILITY NFSDBG_PNFS_LD
38
39/*
40 * Data server cache
41 *
42 * Data servers can be mapped to different device ids.
43 * nfs4_pnfs_ds reference counting
44 * - set to 1 on allocation
45 * - incremented when a device id maps a data server already in the cache.
46 * - decremented when deviceid is removed from the cache.
47 */
48DEFINE_SPINLOCK(nfs4_ds_cache_lock);
49static LIST_HEAD(nfs4_data_server_cache);
50
51/* Debug routines */
52void
53print_ds(struct nfs4_pnfs_ds *ds)
54{
55 if (ds == NULL) {
56 printk("%s NULL device\n", __func__);
57 return;
58 }
59 printk(" ip_addr %x port %hu\n"
60 " ref count %d\n"
61 " client %p\n"
62 " cl_exchange_flags %x\n",
63 ntohl(ds->ds_ip_addr), ntohs(ds->ds_port),
64 atomic_read(&ds->ds_count), ds->ds_clp,
65 ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0);
66}
67
68void
69print_ds_list(struct nfs4_file_layout_dsaddr *dsaddr)
70{
71 int i;
72
73 ifdebug(FACILITY) {
74 printk("%s dsaddr->ds_num %d\n", __func__,
75 dsaddr->ds_num);
76 for (i = 0; i < dsaddr->ds_num; i++)
77 print_ds(dsaddr->ds_list[i]);
78 }
79}
80
81void print_deviceid(struct nfs4_deviceid *id)
82{
83 u32 *p = (u32 *)id;
84
85 dprintk("%s: device id= [%x%x%x%x]\n", __func__,
86 p[0], p[1], p[2], p[3]);
87}
88
89/* nfs4_ds_cache_lock is held */
90static struct nfs4_pnfs_ds *
91_data_server_lookup_locked(u32 ip_addr, u32 port)
92{
93 struct nfs4_pnfs_ds *ds;
94
95 dprintk("_data_server_lookup: ip_addr=%x port=%hu\n",
96 ntohl(ip_addr), ntohs(port));
97
98 list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) {
99 if (ds->ds_ip_addr == ip_addr &&
100 ds->ds_port == port) {
101 return ds;
102 }
103 }
104 return NULL;
105}
106
107static void
108destroy_ds(struct nfs4_pnfs_ds *ds)
109{
110 dprintk("--> %s\n", __func__);
111 ifdebug(FACILITY)
112 print_ds(ds);
113
114 if (ds->ds_clp)
115 nfs_put_client(ds->ds_clp);
116 kfree(ds);
117}
118
119static void
120nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
121{
122 struct nfs4_pnfs_ds *ds;
123 int i;
124
125 print_deviceid(&dsaddr->deviceid.de_id);
126
127 for (i = 0; i < dsaddr->ds_num; i++) {
128 ds = dsaddr->ds_list[i];
129 if (ds != NULL) {
130 if (atomic_dec_and_lock(&ds->ds_count,
131 &nfs4_ds_cache_lock)) {
132 list_del_init(&ds->ds_node);
133 spin_unlock(&nfs4_ds_cache_lock);
134 destroy_ds(ds);
135 }
136 }
137 }
138 kfree(dsaddr->stripe_indices);
139 kfree(dsaddr);
140}
141
142void
143nfs4_fl_free_deviceid_callback(struct pnfs_deviceid_node *device)
144{
145 struct nfs4_file_layout_dsaddr *dsaddr =
146 container_of(device, struct nfs4_file_layout_dsaddr, deviceid);
147
148 nfs4_fl_free_deviceid(dsaddr);
149}
150
151static struct nfs4_pnfs_ds *
152nfs4_pnfs_ds_add(struct inode *inode, u32 ip_addr, u32 port)
153{
154 struct nfs4_pnfs_ds *tmp_ds, *ds;
155
156 ds = kzalloc(sizeof(*tmp_ds), GFP_KERNEL);
157 if (!ds)
158 goto out;
159
160 spin_lock(&nfs4_ds_cache_lock);
161 tmp_ds = _data_server_lookup_locked(ip_addr, port);
162 if (tmp_ds == NULL) {
163 ds->ds_ip_addr = ip_addr;
164 ds->ds_port = port;
165 atomic_set(&ds->ds_count, 1);
166 INIT_LIST_HEAD(&ds->ds_node);
167 ds->ds_clp = NULL;
168 list_add(&ds->ds_node, &nfs4_data_server_cache);
169 dprintk("%s add new data server ip 0x%x\n", __func__,
170 ds->ds_ip_addr);
171 } else {
172 kfree(ds);
173 atomic_inc(&tmp_ds->ds_count);
174 dprintk("%s data server found ip 0x%x, inc'ed ds_count to %d\n",
175 __func__, tmp_ds->ds_ip_addr,
176 atomic_read(&tmp_ds->ds_count));
177 ds = tmp_ds;
178 }
179 spin_unlock(&nfs4_ds_cache_lock);
180out:
181 return ds;
182}
183
184/*
185 * Currently only support ipv4, and one multi-path address.
186 */
187static struct nfs4_pnfs_ds *
188decode_and_add_ds(__be32 **pp, struct inode *inode)
189{
190 struct nfs4_pnfs_ds *ds = NULL;
191 char *buf;
192 const char *ipend, *pstr;
193 u32 ip_addr, port;
194 int nlen, rlen, i;
195 int tmp[2];
196 __be32 *r_netid, *r_addr, *p = *pp;
197
198 /* r_netid */
199 nlen = be32_to_cpup(p++);
200 r_netid = p;
201 p += XDR_QUADLEN(nlen);
202
203 /* r_addr */
204 rlen = be32_to_cpup(p++);
205 r_addr = p;
206 p += XDR_QUADLEN(rlen);
207 *pp = p;
208
209 /* Check that netid is "tcp" */
210 if (nlen != 3 || memcmp((char *)r_netid, "tcp", 3)) {
211 dprintk("%s: ERROR: non ipv4 TCP r_netid\n", __func__);
212 goto out_err;
213 }
214
215 /* ipv6 length plus port is legal */
216 if (rlen > INET6_ADDRSTRLEN + 8) {
Jesper Juhlad3d2ee2011-01-17 18:41:50 +0000217 dprintk("%s: Invalid address, length %d\n", __func__,
Andy Adamson16b374c2010-10-20 00:18:04 -0400218 rlen);
219 goto out_err;
220 }
221 buf = kmalloc(rlen + 1, GFP_KERNEL);
Stanislav Fomichevb9f81052011-02-05 23:13:01 +0000222 if (!buf) {
223 dprintk("%s: Not enough memory\n", __func__);
224 goto out_err;
225 }
Andy Adamson16b374c2010-10-20 00:18:04 -0400226 buf[rlen] = '\0';
227 memcpy(buf, r_addr, rlen);
228
229 /* replace the port dots with dashes for the in4_pton() delimiter*/
230 for (i = 0; i < 2; i++) {
231 char *res = strrchr(buf, '.');
Jesper Juhlad3d2ee2011-01-17 18:41:50 +0000232 if (!res) {
233 dprintk("%s: Failed finding expected dots in port\n",
234 __func__);
235 goto out_free;
236 }
Andy Adamson16b374c2010-10-20 00:18:04 -0400237 *res = '-';
238 }
239
240 /* Currently only support ipv4 address */
241 if (in4_pton(buf, rlen, (u8 *)&ip_addr, '-', &ipend) == 0) {
242 dprintk("%s: Only ipv4 addresses supported\n", __func__);
243 goto out_free;
244 }
245
246 /* port */
247 pstr = ipend;
248 sscanf(pstr, "-%d-%d", &tmp[0], &tmp[1]);
249 port = htons((tmp[0] << 8) | (tmp[1]));
250
251 ds = nfs4_pnfs_ds_add(inode, ip_addr, port);
Jesper Juhlad3d2ee2011-01-17 18:41:50 +0000252 dprintk("%s: Decoded address and port %s\n", __func__, buf);
Andy Adamson16b374c2010-10-20 00:18:04 -0400253out_free:
254 kfree(buf);
255out_err:
256 return ds;
257}
258
259/* Decode opaque device data and return the result */
260static struct nfs4_file_layout_dsaddr*
261decode_device(struct inode *ino, struct pnfs_device *pdev)
262{
263 int i, dummy;
264 u32 cnt, num;
265 u8 *indexp;
266 __be32 *p = (__be32 *)pdev->area, *indicesp;
267 struct nfs4_file_layout_dsaddr *dsaddr;
268
269 /* Get the stripe count (number of stripe index) */
270 cnt = be32_to_cpup(p++);
271 dprintk("%s stripe count %d\n", __func__, cnt);
272 if (cnt > NFS4_PNFS_MAX_STRIPE_CNT) {
273 printk(KERN_WARNING "%s: stripe count %d greater than "
274 "supported maximum %d\n", __func__,
275 cnt, NFS4_PNFS_MAX_STRIPE_CNT);
276 goto out_err;
277 }
278
279 /* Check the multipath list count */
280 indicesp = p;
281 p += XDR_QUADLEN(cnt << 2);
282 num = be32_to_cpup(p++);
283 dprintk("%s ds_num %u\n", __func__, num);
284 if (num > NFS4_PNFS_MAX_MULTI_CNT) {
285 printk(KERN_WARNING "%s: multipath count %d greater than "
286 "supported maximum %d\n", __func__,
287 num, NFS4_PNFS_MAX_MULTI_CNT);
288 goto out_err;
289 }
290 dsaddr = kzalloc(sizeof(*dsaddr) +
291 (sizeof(struct nfs4_pnfs_ds *) * (num - 1)),
292 GFP_KERNEL);
293 if (!dsaddr)
294 goto out_err;
295
296 dsaddr->stripe_indices = kzalloc(sizeof(u8) * cnt, GFP_KERNEL);
297 if (!dsaddr->stripe_indices)
298 goto out_err_free;
299
300 dsaddr->stripe_count = cnt;
301 dsaddr->ds_num = num;
302
303 memcpy(&dsaddr->deviceid.de_id, &pdev->dev_id, sizeof(pdev->dev_id));
304
305 /* Go back an read stripe indices */
306 p = indicesp;
307 indexp = &dsaddr->stripe_indices[0];
308 for (i = 0; i < dsaddr->stripe_count; i++) {
309 *indexp = be32_to_cpup(p++);
310 if (*indexp >= num)
311 goto out_err_free;
312 indexp++;
313 }
314 /* Skip already read multipath list count */
315 p++;
316
317 for (i = 0; i < dsaddr->ds_num; i++) {
318 int j;
319
320 dummy = be32_to_cpup(p++); /* multipath count */
321 if (dummy > 1) {
322 printk(KERN_WARNING
323 "%s: Multipath count %d not supported, "
324 "skipping all greater than 1\n", __func__,
325 dummy);
326 }
327 for (j = 0; j < dummy; j++) {
328 if (j == 0) {
329 dsaddr->ds_list[i] = decode_and_add_ds(&p, ino);
330 if (dsaddr->ds_list[i] == NULL)
331 goto out_err_free;
332 } else {
333 u32 len;
334 /* skip extra multipath */
335 len = be32_to_cpup(p++);
336 p += XDR_QUADLEN(len);
337 len = be32_to_cpup(p++);
338 p += XDR_QUADLEN(len);
339 continue;
340 }
341 }
342 }
343 return dsaddr;
344
345out_err_free:
346 nfs4_fl_free_deviceid(dsaddr);
347out_err:
348 dprintk("%s ERROR: returning NULL\n", __func__);
349 return NULL;
350}
351
352/*
353 * Decode the opaque device specified in 'dev'
354 * and add it to the list of available devices.
355 * If the deviceid is already cached, nfs4_add_deviceid will return
356 * a pointer to the cached struct and throw away the new.
357 */
358static struct nfs4_file_layout_dsaddr*
359decode_and_add_device(struct inode *inode, struct pnfs_device *dev)
360{
361 struct nfs4_file_layout_dsaddr *dsaddr;
362 struct pnfs_deviceid_node *d;
363
364 dsaddr = decode_device(inode, dev);
365 if (!dsaddr) {
366 printk(KERN_WARNING "%s: Could not decode or add device\n",
367 __func__);
368 return NULL;
369 }
370
371 d = pnfs_add_deviceid(NFS_SERVER(inode)->nfs_client->cl_devid_cache,
372 &dsaddr->deviceid);
373
374 return container_of(d, struct nfs4_file_layout_dsaddr, deviceid);
375}
376
377/*
378 * Retrieve the information for dev_id, add it to the list
379 * of available devices, and return it.
380 */
381struct nfs4_file_layout_dsaddr *
382get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id)
383{
384 struct pnfs_device *pdev = NULL;
385 u32 max_resp_sz;
386 int max_pages;
387 struct page **pages = NULL;
388 struct nfs4_file_layout_dsaddr *dsaddr = NULL;
389 int rc, i;
390 struct nfs_server *server = NFS_SERVER(inode);
391
392 /*
393 * Use the session max response size as the basis for setting
394 * GETDEVICEINFO's maxcount
395 */
396 max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
397 max_pages = max_resp_sz >> PAGE_SHIFT;
398 dprintk("%s inode %p max_resp_sz %u max_pages %d\n",
399 __func__, inode, max_resp_sz, max_pages);
400
401 pdev = kzalloc(sizeof(struct pnfs_device), GFP_KERNEL);
402 if (pdev == NULL)
403 return NULL;
404
405 pages = kzalloc(max_pages * sizeof(struct page *), GFP_KERNEL);
406 if (pages == NULL) {
407 kfree(pdev);
408 return NULL;
409 }
410 for (i = 0; i < max_pages; i++) {
411 pages[i] = alloc_page(GFP_KERNEL);
412 if (!pages[i])
413 goto out_free;
414 }
415
416 /* set pdev->area */
417 pdev->area = vmap(pages, max_pages, VM_MAP, PAGE_KERNEL);
418 if (!pdev->area)
419 goto out_free;
420
421 memcpy(&pdev->dev_id, dev_id, sizeof(*dev_id));
422 pdev->layout_type = LAYOUT_NFSV4_1_FILES;
423 pdev->pages = pages;
424 pdev->pgbase = 0;
425 pdev->pglen = PAGE_SIZE * max_pages;
426 pdev->mincount = 0;
427
428 rc = nfs4_proc_getdeviceinfo(server, pdev);
429 dprintk("%s getdevice info returns %d\n", __func__, rc);
430 if (rc)
431 goto out_free;
432
433 /*
434 * Found new device, need to decode it and then add it to the
435 * list of known devices for this mountpoint.
436 */
437 dsaddr = decode_and_add_device(inode, pdev);
438out_free:
439 if (pdev->area != NULL)
440 vunmap(pdev->area);
441 for (i = 0; i < max_pages; i++)
442 __free_page(pages[i]);
443 kfree(pages);
444 kfree(pdev);
445 dprintk("<-- %s dsaddr %p\n", __func__, dsaddr);
446 return dsaddr;
447}
448
449struct nfs4_file_layout_dsaddr *
450nfs4_fl_find_get_deviceid(struct nfs_client *clp, struct nfs4_deviceid *id)
451{
452 struct pnfs_deviceid_node *d;
453
454 d = pnfs_find_get_deviceid(clp->cl_devid_cache, id);
455 return (d == NULL) ? NULL :
456 container_of(d, struct nfs4_file_layout_dsaddr, deviceid);
457}